# Feature Engineering V2

## Volatility
- ATR5
- std5
- norm_tr
- vol_ratio_5

## Momentum
- ROC2
- ROC3
- ROC5
- RSI7

## Trend Direction
- MA5
- MA10
- Close/MA5 ratio

## Candle Structure
- candle_body_pct

## Volume
- vol_ma5
- vol_spike_5

## Lagged Returns
- return_1d
- return_2d
- return_3d

## Target
- target (3-day direction)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:

pd.set_option("display.max_columns", None)

# Load cleaned historical data
df = pd.read_csv("C:/Users/USER/Documents/QQQ Project/data/processed/new_QQQ_data.csv",
                 parse_dates=["Date"],
                 index_col="Date")

df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1999-03-10,51.125,51.15625,50.28125,43.478279,5232000
1999-03-11,51.4375,51.734375,50.3125,43.69117,9688600
1999-03-12,51.125,51.15625,49.65625,42.626823,8743600
1999-03-15,50.4375,51.5625,49.90625,43.850815,6369000
1999-03-16,51.71875,52.15625,51.15625,44.223331,4905800


In [3]:
df["returns"] = df["Close"].pct_change()


ATR 5 Day

In [4]:
def compute_atr(df, window=5):
    high_low = df["High"] - df["Low"]
    high_close = (df["High"] - df["Close"].shift()).abs()
    low_close  = (df["Low"] - df["Close"].shift()).abs()
    tr = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
    atr = tr.rolling(window).mean()
    return atr

df["ATR5"] = compute_atr(df, 5)


Rolling STD 5

In [5]:
df["std5"] = df["returns"].rolling(5).std()


Normalized True Range + vol ratio

In [6]:
df["norm_tr"] = (df["High"] - df["Low"]) / df["Close"]
df["vol_ratio_5"] = df["ATR5"] / df["std5"]


Momentum ROC 2,3,5 AND RSI 7

In [7]:
# ROC2, ROC3, ROC5
df["roc2"] = df["Close"].pct_change(2)
df["roc3"] = df["Close"].pct_change(3)
df["roc5"] = df["Close"].pct_change(5)

# RSI7
def compute_rsi(series, window=7):
    delta = series.diff()
    gain = delta.clip(lower=0).rolling(window).mean()
    loss = (-delta).clip(lower=0).rolling(window).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

df["rsi7"] = compute_rsi(df["Close"], 7)


Trend MA 5 , MA 10, Close MA5

In [8]:
df["MA5"] = df["Close"].rolling(5).mean()
df["MA10"] = df["Close"].rolling(10).mean()
df["close_ma5_ratio"] = df["Close"] / df["MA5"]


Candle Structure Feature

In [9]:
df["candle_body_pct"] = (
    (df["Close"] - df["Open"]).abs() /
    (df["High"] - df["Low"]).replace(0, np.nan)
).fillna(0)


Volume Features 5day

In [10]:
df["vol_ma5"] = df["Volume"].rolling(5).mean()
df["vol_spike_5"] = df["Volume"] / df["vol_ma5"]


Lagged Returns

In [11]:
df["return_1d"] = df["Close"].pct_change(1)
df["return_2d"] = df["Close"].pct_change(2)
df["return_3d"] = df["Close"].pct_change(3)


Target (3-Day Direction)

In [12]:
df["future_3d_return"] = df["Close"].shift(-3) / df["Close"] - 1
df["target"] = (df["future_3d_return"] > 0).astype(int)


In [13]:
df_clean = df.dropna().copy()
df_clean.head()


Unnamed: 0_level_0,Open,High,Low,Close,Volume,returns,ATR5,std5,norm_tr,vol_ratio_5,roc2,roc3,roc5,rsi7,MA5,MA10,close_ma5_ratio,candle_body_pct,vol_ma5,vol_spike_5,return_1d,return_2d,return_3d,future_3d_return,target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
1999-03-23,50.3125,50.5,48.71875,41.562469,10962400,-0.035207,8.066575,0.020802,0.042857,387.772095,-0.04698,-0.071344,-0.060169,41.071369,43.382503,43.478293,0.958047,4.912298,6392200.0,1.714965,-0.035207,-0.04698,-0.071344,0.060179,1
1999-03-24,49.25,50.5,48.5,42.972725,8447000,0.033931,8.298747,0.029548,0.046541,280.859638,-0.002471,-0.014643,-0.021212,42.857116,43.196243,43.427738,0.994826,3.138638,7288600.0,1.158933,0.033931,-0.002471,-0.014643,0.062539,1
1999-03-25,51.25,52.3125,50.914062,44.542622,8133800,0.036532,8.428757,0.03364,0.031395,250.554871,0.071703,0.033971,-0.004757,52.17382,43.153664,43.512883,1.032186,4.796338,7945680.0,1.023676,0.036532,0.071703,0.033971,0.017921,1
1999-03-26,51.875,52.46875,51.53125,44.063675,8256200,-0.010753,8.315086,0.031463,0.021276,264.279574,0.025387,0.060179,0.010372,51.063811,43.244133,43.656568,1.018952,8.33208,8164840.0,1.011189,-0.010753,0.025387,0.060179,0.013889,1
1999-03-29,52.375,53.65625,52.0625,45.660183,5113800,0.036232,8.643367,0.033227,0.034905,260.131978,0.02509,0.062539,0.059913,55.48375,43.760335,43.837505,1.043415,4.213219,8182640.0,0.624957,0.036232,0.02509,0.062539,-0.001165,0


In [15]:
final_features = [
    "ATR5", "std5", "norm_tr", "vol_ratio_5",
    "roc2", "roc3", "roc5", "rsi7",
    "MA5", "MA10", "close_ma5_ratio",
    "candle_body_pct",
    "vol_ma5", "vol_spike_5",
    "return_1d", "return_2d", "return_3d",
    "target"
]

df_final = df_clean[final_features].copy()
df_final.to_csv("C:/Users/USER/Documents/QQQ Project/data/processed/qqq_features_classificationv2.csv")

df_final.head()


Unnamed: 0_level_0,ATR5,std5,norm_tr,vol_ratio_5,roc2,roc3,roc5,rsi7,MA5,MA10,close_ma5_ratio,candle_body_pct,vol_ma5,vol_spike_5,return_1d,return_2d,return_3d,target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1999-03-23,8.066575,0.020802,0.042857,387.772095,-0.04698,-0.071344,-0.060169,41.071369,43.382503,43.478293,0.958047,4.912298,6392200.0,1.714965,-0.035207,-0.04698,-0.071344,1
1999-03-24,8.298747,0.029548,0.046541,280.859638,-0.002471,-0.014643,-0.021212,42.857116,43.196243,43.427738,0.994826,3.138638,7288600.0,1.158933,0.033931,-0.002471,-0.014643,1
1999-03-25,8.428757,0.03364,0.031395,250.554871,0.071703,0.033971,-0.004757,52.17382,43.153664,43.512883,1.032186,4.796338,7945680.0,1.023676,0.036532,0.071703,0.033971,1
1999-03-26,8.315086,0.031463,0.021276,264.279574,0.025387,0.060179,0.010372,51.063811,43.244133,43.656568,1.018952,8.33208,8164840.0,1.011189,-0.010753,0.025387,0.060179,1
1999-03-29,8.643367,0.033227,0.034905,260.131978,0.02509,0.062539,0.059913,55.48375,43.760335,43.837505,1.043415,4.213219,8182640.0,0.624957,0.036232,0.02509,0.062539,0
