Download the weekly price series of the Russell 3000 index from yahoo finance

In [1]:
import yfinance as yf

russell = yf.download("^RUA",interval='1wk')
russell

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1987-09-07,178.960007,181.470001,177.050003,181.339996,181.339996,0
1987-09-14,181.339996,182.250000,177.199997,177.690002,177.690002,0
1987-09-21,177.690002,181.009995,174.649994,180.229996,180.229996,0
1987-09-28,180.229996,184.949997,180.229996,184.699997,184.699997,0
1987-10-05,184.710007,184.940002,176.029999,176.070007,176.070007,0
...,...,...,...,...,...,...
2022-03-21,2597.310059,2639.239990,2573.159912,2637.370117,2637.370117,0
2022-03-28,2637.370117,2697.919922,2620.800049,2643.199951,2643.199951,0
2022-04-04,2643.750000,2667.419922,2575.300049,2597.179932,2597.179932,0
2022-04-11,2595.360107,2595.360107,2541.489990,2582.270020,2582.270020,0


Use technical analysis library to do feature engineering

In [2]:
import ta
from ta import add_all_ta_features
from ta.utils import dropna
import warnings
warnings.filterwarnings("ignore")

mom_data = add_all_ta_features(russell.copy(), open="Open", high="High", low="Low", close="Close", volume="Volume")
mom_data.columns

Index(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'volume_adi',
       'volume_obv', 'volume_cmf', 'volume_fi', 'volume_em', 'volume_sma_em',
       'volume_vpt', 'volume_vwap', 'volume_mfi', 'volume_nvi',
       'volatility_bbm', 'volatility_bbh', 'volatility_bbl', 'volatility_bbw',
       'volatility_bbp', 'volatility_bbhi', 'volatility_bbli',
       'volatility_kcc', 'volatility_kch', 'volatility_kcl', 'volatility_kcw',
       'volatility_kcp', 'volatility_kchi', 'volatility_kcli',
       'volatility_dcl', 'volatility_dch', 'volatility_dcm', 'volatility_dcw',
       'volatility_dcp', 'volatility_atr', 'volatility_ui', 'trend_macd',
       'trend_macd_signal', 'trend_macd_diff', 'trend_sma_fast',
       'trend_sma_slow', 'trend_ema_fast', 'trend_ema_slow',
       'trend_vortex_ind_pos', 'trend_vortex_ind_neg', 'trend_vortex_ind_diff',
       'trend_trix', 'trend_mass_index', 'trend_dpo', 'trend_kst',
       'trend_kst_sig', 'trend_kst_diff', 'trend_ichimoku_conv',
       

In [3]:
mom_data['momentum_rsi'] = ta.momentum.RSIIndicator(close=mom_data.Close, window=13).rsi()
mom_data['trend_cci'] = ta.trend.CCIIndicator(high=mom_data.High, low=mom_data.Low, close=mom_data.Close, window=13).cci()
mom_data['4_week_return']=mom_data['Close'].pct_change(periods=4)
mom_data['12_week_return']=mom_data['Close'].pct_change(periods=12)

Select six technical indicators

In [4]:
indicators = [ 'momentum_rsi',
              'trend_cci',
              'trend_mass_index',
             '4_week_return',
             '12_week_return',
             'volatility_dcw']
columns = indicators + ['others_dr','Open', 'High', 'Low','Close']

In [5]:
data = mom_data[columns]
data = data.dropna()

train = data[(data.index.year >= 1998) &(data.index.year < 2018)]
test = data[data.index.year >= 2018]
train

Unnamed: 0_level_0,momentum_rsi,trend_cci,trend_mass_index,4_week_return,12_week_return,volatility_dcw,others_dr,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1998-01-05,49.231019,-21.616250,26.659479,-0.025085,-0.025433,13.251013,-4.922125,544.479980,548.489990,514.869995,517.679993
1998-01-12,55.466632,-12.926268,26.743245,0.013700,0.011403,13.212305,3.484774,517.679993,537.429993,509.190002,535.719971
1998-01-19,54.704066,72.650718,26.746042,0.018623,0.038438,13.195110,-0.352788,535.719971,544.729980,530.760010,533.830017
1998-01-26,58.449950,-4.272169,26.927275,0.001451,0.047166,13.162906,2.143005,533.830017,551.080017,494.339996,545.270020
1998-02-02,63.570821,229.494143,26.950025,0.087969,0.084471,15.488280,3.291938,545.270020,563.950012,545.270020,563.219971
...,...,...,...,...,...,...,...,...,...,...,...
2017-11-27,81.055632,144.027797,24.530975,0.021935,0.076021,9.940203,1.433523,1543.500000,1575.079956,1541.109985,1565.880005
2017-12-04,81.573289,155.337173,24.709231,0.027411,0.060894,10.258453,0.222238,1568.689941,1580.369995,1553.640015,1569.359985
2017-12-11,83.372422,161.455997,24.826581,0.035169,0.066911,10.511165,0.810520,1569.719971,1584.780029,1567.260010,1582.079956
2017-12-18,84.183903,148.520043,24.792869,0.028826,0.062069,11.100188,0.389996,1584.109985,1594.329956,1583.310059,1588.250000


Standardize the training set

In [6]:
from sklearn.preprocessing import StandardScaler
import pandas as pd 

sc = StandardScaler()
transformed_data = sc.fit_transform(train[indicators])
transformed_data = pd.DataFrame(transformed_data,index=train.index,columns=indicators)
transformed_data

Unnamed: 0_level_0,momentum_rsi,trend_cci,trend_mass_index,4_week_return,12_week_return,volatility_dcw
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1998-01-05,-0.529574,-0.499413,1.315247,-0.648223,-0.527139,-0.325932
1998-01-12,-0.001574,-0.416617,1.380572,0.178634,-0.053749,-0.330483
1998-01-19,-0.066145,0.398749,1.382754,0.283606,0.293685,-0.332505
1998-01-26,0.251038,-0.334162,1.524089,-0.082499,0.405849,-0.336291
1998-02-02,0.684647,1.893130,1.541830,1.762011,0.885255,-0.062888
...,...,...,...,...,...,...
2017-11-27,2.165172,1.078819,-0.344668,0.354200,0.776668,-0.715197
2017-12-04,2.209005,1.186573,-0.205656,0.470949,0.582271,-0.677779
2017-12-11,2.361346,1.244872,-0.114140,0.636347,0.659593,-0.648066
2017-12-18,2.430059,1.121621,-0.140430,0.501115,0.597366,-0.578813
