In [1]:
import pandas as pd
from pathlib import Path

In [9]:

PROCESSED_PATH = Path("data/processed")
OUTPUT_PATH = Path("data/processed")
OUTPUT_PATH.mkdir(parents=True, exist_ok=True)

def build_features():
    df = pd.read_csv(PROCESSED_PATH / "nasdaq100_features.csv")

    # REMOVE stray index column
    if "index" in df.columns:
        df = df.drop(columns=["index"])

    # Date handling
    df["date"] = pd.to_datetime(df["date"])
    df = df.sort_values("date").reset_index(drop=True)

    # Market features
    df["Return"] = df["Close"].pct_change()
    df["MA20"] = df["Close"].rolling(20).mean()
    df["MA50"] = df["Close"].rolling(50).mean()
    df["Volatility"] = df["Return"].rolling(20).std()

    # Target: Market Regime (next-day prediction)
    def market_regime(x):
        if x > 0.005:
            return 2  # Bull
        elif x < -0.005:
            return 0  # Bear
        else:
            return 1  # Sideways

    df["Target"] = df["Return"].shift(-1).apply(market_regime)

    # Drop NaNs
    df = df.dropna().reset_index(drop=True)

    # Save final dataset
    df.to_csv(OUTPUT_PATH / "nasdaq100_ml_dataset.csv", index=False)

    print("Feature engineering completed")
    print("Dataset saved to data/processed/nasdaq100_ml_dataset.csv")

if __name__ == "__main__":
    build_features()


Feature engineering completed
Dataset saved to data/processed/nasdaq100_ml_dataset.csv
