In [4]:
from pathlib import Path
import pandas as pd
import numpy as np

PROJECT_ROOT = Path().resolve().parent
DATA_DIR = PROJECT_ROOT / "data" / "processed"
df = pd.read_csv(DATA_DIR / "MSFT_preprocessed.csv", parse_dates=["date"])
df = df.sort_values("date")

In [None]:
def create_stock_features(df):
    """
    Feature engineering for stock prediction.
    Features included:
    1. Daily return
    2. Log return
    3. Rolling volatility
    4. Moving averages (5, 20)
    5. MA spread
    6. Volume rolling mean
    7. High-Low spread
    8. Close-Open difference
    9. RSI (14-day)
    10. Momentum (5-day)
    """

    # 1. Daily return
    df['daily_return'] = df['close'].pct_change().fillna(0)

    # 2. Log return
    df['log_return'] = np.log(df['close'] / df['close'].shift(1)).fillna(0)

    # 3. Rolling volatility (5-day)
    df['volatility_5d'] = df['daily_return'].rolling(5).std().fillna(0)

    # 4. Moving averages (5, 20)
    df['ma_5'] = df['close'].rolling(5).mean().fillna(df['close'])
    df['ma_20'] = df['close'].rolling(20).mean().fillna(df['close'])

    # 5. MA spread
    df['ma_spread'] = df['ma_5'] - df['ma_20']

    # 6. Volume rolling mean (5-day)
    df['vol_mean_5'] = df['volume'].rolling(5).mean().fillna(df['volume'])

    # 7. High-Low spread
    df['hl_spread'] = df['high'] - df['low']

    # 8. Close-Open difference
    df['co_diff'] = df['close'] - df['open']

    # 9. RSI (Relative Strength Index, 14-day)
    window = 14
    delta = df['close'].diff()
    gain = np.where(delta > 0, delta, 0)
    loss = np.where(delta < 0, -delta, 0)
    roll_up = pd.Series(gain).rolling(window).mean()
    roll_down = pd.Series(loss).rolling(window).mean()
    rs = roll_up / roll_down
    df['rsi'] = 100 - (100 / (1 + rs))
    df['rsi'] = df['rsi'].fillna(50)

    # 10. Momentum (5-day)
    df['momentum_5'] = df['close'] - df['close'].shift(5)
    df['momentum_5'] = df['momentum_5'].fillna(0)

    return df

