In [5]:
%matplotlib inline
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import yfinance as yf

import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS
from sklearn.preprocessing import scale
from scipy.stats.mstats import winsorize
import talib

import warnings
warnings.filterwarnings('ignore')

In [6]:
# sns.set_style('whitegrid')
idx = pd.IndexSlice
deciles = np.arange(.1, 1, .1).round(1)
function_groups = ['Overlap Studies',
                   'Momentum Indicators',
                   'Volume Indicators',
                   'Volatility Indicators',
                   'Price Transform',
                   'Cycle Indicators',
                   'Pattern Recognition',
                   'Statistic Functions',
                   'Math Transform',
                   'Math Operators']
talib_grps = talib.get_function_groups()

In [7]:
df_close = pd.read_pickle("other/close price.pkl.zip")
df_high = pd.read_pickle("other/high price.pkl.zip")
df_low = pd.read_pickle("other/low price.pkl.zip")
df_open = pd.read_pickle("other/open price.pkl.zip")
df_volume = pd.read_pickle("other/volume.pkl.zip")

# stock return

In [45]:
df_return_long = pd.read_pickle("return/stock return(for integration).pkl.zip")

In [46]:
df_return_long

Unnamed: 0,date,Ticker,timelag
0,2019-01-02,AAL,2018-12-31
1,2019-01-03,AAL,2019-01-02
2,2019-01-04,AAL,2019-01-03
3,2019-01-07,AAL,2019-01-04
4,2019-01-08,AAL,2019-01-07
...,...,...,...
628995,2023-12-22,ZS,2023-12-21
628996,2023-12-26,ZS,2023-12-22
628997,2023-12-27,ZS,2023-12-26
628998,2023-12-28,ZS,2023-12-27


# Moving Average

In [47]:
df_MA = pd.DataFrame()
ls_MA = ['date','Ticker','timelag']

## SMA

In [48]:
df_SMA_5 = pd.DataFrame()
df_SMA_10 = pd.DataFrame()
df_SMA_30 = pd.DataFrame()
for i in df_close.columns:
    df_SMA_5[i] = talib.SMA(df_close[i],timeperiod=5)
    df_SMA_10[i] = talib.SMA(df_close[i],timeperiod=10)
    df_SMA_30[i] = talib.SMA(df_close[i],timeperiod=30)

In [49]:
df_SMA_5_long = pd.melt(df_SMA_5.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='SMA_5')
df_SMA_10_long = pd.melt(df_SMA_10.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='SMA_10')
df_SMA_30_long = pd.melt(df_SMA_30.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='SMA_30')

In [50]:
df_SMA_5_long['DlyCalDt'] = pd.to_datetime(df_SMA_5_long['DlyCalDt'])
df_SMA_10_long['DlyCalDt'] = pd.to_datetime(df_SMA_10_long['DlyCalDt'])
df_SMA_30_long['DlyCalDt'] = pd.to_datetime(df_SMA_30_long['DlyCalDt'])

In [51]:
df_MA = pd.merge(df_return_long,df_SMA_5_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_MA = pd.merge(df_MA,df_SMA_10_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_MA = pd.merge(df_MA,df_SMA_30_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_MA = ls_MA + ['SMA_5','SMA_10','SMA_30']
df_MA = df_MA[ls_MA]

## WMA

In [53]:
df_WMA_5 = pd.DataFrame()
df_WMA_10 = pd.DataFrame()
df_WMA_30 = pd.DataFrame()
for i in df_close.columns:
    df_WMA_5[i] = talib.WMA(df_close[i],timeperiod=5)
    df_WMA_10[i] = talib.WMA(df_close[i],timeperiod=10)
    df_WMA_30[i] = talib.WMA(df_close[i],timeperiod=30)

In [54]:
df_WMA_5_long = pd.melt(df_WMA_5.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='WMA_5')
df_WMA_10_long = pd.melt(df_WMA_10.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='WMA_10')
df_WMA_30_long = pd.melt(df_WMA_30.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='WMA_30')

In [55]:
df_WMA_5_long['DlyCalDt'] = pd.to_datetime(df_WMA_5_long['DlyCalDt'])
df_WMA_10_long['DlyCalDt'] = pd.to_datetime(df_WMA_10_long['DlyCalDt'])
df_WMA_30_long['DlyCalDt'] = pd.to_datetime(df_WMA_30_long['DlyCalDt'])

In [56]:
df_MA = pd.merge(df_MA,df_WMA_5_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_MA = pd.merge(df_MA,df_WMA_10_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_MA = pd.merge(df_MA,df_WMA_30_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_MA = ls_MA + ['WMA_5','WMA_10','WMA_30']
df_MA = df_MA[ls_MA]

## TEMA

In [57]:
df_TEMA_5 = pd.DataFrame()
df_TEMA_10 = pd.DataFrame()
df_TEMA_30 = pd.DataFrame()
for i in df_close.columns:
    df_TEMA_5[i] = talib.TEMA(df_close[i],timeperiod=5)
    df_TEMA_10[i] = talib.TEMA(df_close[i],timeperiod=10)
    df_TEMA_30[i] = talib.TEMA(df_close[i],timeperiod=30)

In [58]:
df_TEMA_5_long = pd.melt(df_TEMA_5.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='TEMA_5')
df_TEMA_10_long = pd.melt(df_TEMA_10.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='TEMA_10')
df_TEMA_30_long = pd.melt(df_TEMA_30.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='TEMA_30')

In [59]:
df_TEMA_5_long['DlyCalDt'] = pd.to_datetime(df_TEMA_5_long['DlyCalDt'])
df_TEMA_10_long['DlyCalDt'] = pd.to_datetime(df_TEMA_10_long['DlyCalDt'])
df_TEMA_30_long['DlyCalDt'] = pd.to_datetime(df_TEMA_30_long['DlyCalDt'])

In [60]:
df_MA = pd.merge(df_MA,df_TEMA_5_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_MA = pd.merge(df_MA,df_TEMA_10_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_MA = pd.merge(df_MA,df_TEMA_30_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_MA = ls_MA + ['TEMA_5','TEMA_10','TEMA_30']
df_MA = df_MA[ls_MA]

## MAMA

In [61]:
df_MAMA = pd.DataFrame()
df_FAMA = pd.DataFrame()
for i in df_close.columns:
    df_MAMA[i] = talib.MAMA(df_close[i],fastlimit=0.5,slowlimit=0.05)[0]
    df_FAMA[i] = talib.MAMA(df_close[i],fastlimit=0.5,slowlimit=0.05)[1]

In [62]:
df_MAMA_long = pd.melt(df_MAMA.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='MAMA')
df_FAMA_long = pd.melt(df_FAMA.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='FAMA')

In [63]:
df_MAMA_long['DlyCalDt'] = pd.to_datetime(df_MAMA_long['DlyCalDt'])
df_FAMA_long['DlyCalDt'] = pd.to_datetime(df_FAMA_long['DlyCalDt'])

In [64]:
df_MA = pd.merge(df_MA,df_MAMA_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_MA = pd.merge(df_MA,df_FAMA_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_MA = ls_MA + ['MAMA','FAMA']
df_MA = df_MA[ls_MA]

## factors reserve

In [65]:
df_MA.isnull().sum()

date       0
Ticker     0
timelag    0
SMA_5      0
SMA_10     0
SMA_30     0
WMA_5      0
WMA_10     0
WMA_30     0
TEMA_5     0
TEMA_10    0
TEMA_30    0
MAMA       0
FAMA       0
dtype: int64

In [66]:
df_MA.drop("timelag",axis = 1).to_pickle("factor/Moving Averages.pkl.zip",compression='zip')

# Overlap

In [68]:
df_overlap = pd.DataFrame()
ls_overlap = ['date','Ticker','timelag']

## Bollinger Bands

### factor generation

In [69]:
df_BBANDS_upper = pd.DataFrame()
df_BBANDS_middle = pd.DataFrame()
df_BBANDS_lower = pd.DataFrame()
for i in df_close.columns:
    df_BBANDS_upper[i] = talib.BBANDS(df_close[i],timeperiod=20,nbdevup=2,nbdevdn=2,matype=1)[0]
    df_BBANDS_middle[i] = talib.BBANDS(df_close[i],timeperiod=20,nbdevup=2,nbdevdn=2,matype=1)[1]
    df_BBANDS_lower[i] = talib.BBANDS(df_close[i],timeperiod=20,nbdevup=2,nbdevdn=2,matype=1)[2]

In [70]:
df_BBANDS_upper_long = pd.melt(df_BBANDS_upper.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='BBANDS_upper')
df_BBANDS_middle_long = pd.melt(df_BBANDS_middle.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='BBANDS_middle')
df_BBANDS_lower_long = pd.melt(df_BBANDS_lower.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='BBANDS_lower')

In [71]:
df_BBANDS_upper_long['DlyCalDt'] = pd.to_datetime(df_BBANDS_upper_long['DlyCalDt'])
df_BBANDS_middle_long['DlyCalDt'] = pd.to_datetime(df_BBANDS_middle_long['DlyCalDt'])
df_BBANDS_lower_long['DlyCalDt'] = pd.to_datetime(df_BBANDS_lower_long['DlyCalDt'])

In [72]:
df_overlap = pd.merge(df_return_long,df_BBANDS_upper_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_overlap = pd.merge(df_overlap,df_BBANDS_middle_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_overlap = pd.merge(df_overlap,df_BBANDS_lower_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_overlap += ['BBANDS_upper','BBANDS_middle','BBANDS_lower']
df_overlap = df_overlap[ls_overlap]

### Normalized squeeze & mean reversion indicators

In [73]:
df_bb_up = df_BBANDS_upper/df_close
df_bb_low = df_BBANDS_lower/df_close
df_bb_squeeze = (df_bb_up - df_bb_low)/df_close

In [74]:
df_bb_up_long = pd.melt(df_bb_up.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='BBANDS_up')
df_bb_low_long = pd.melt(df_bb_low.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='BBANDS_low')
df_bb_squeeze_long = pd.melt(df_bb_squeeze.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='BBANDS_squeeze')

In [75]:
df_bb_up_long['DlyCalDt'] = pd.to_datetime(df_bb_up_long['DlyCalDt'])
df_bb_low_long['DlyCalDt'] = pd.to_datetime(df_bb_low_long['DlyCalDt'])
df_bb_squeeze_long['DlyCalDt'] = pd.to_datetime(df_bb_squeeze_long['DlyCalDt'])

In [76]:
df_overlap = pd.merge(df_overlap,df_bb_up_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_overlap = pd.merge(df_overlap,df_bb_low_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_overlap = pd.merge(df_overlap,df_bb_squeeze_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_overlap += ['BBANDS_up','BBANDS_low','BBANDS_squeeze']
df_overlap = df_overlap[ls_overlap]

## Hilbert Transform - Instantaneous Trendline

### factor generation

In [77]:
df_HTITREND = pd.DataFrame()
for i in df_close.columns:
    df_HTITREND[i] = talib.HT_TRENDLINE(df_close[i])

In [78]:
df_HTITREND_long = pd.melt(df_HTITREND.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='HTITREND')
df_HTITREND_long['DlyCalDt'] = pd.to_datetime(df_HTITREND_long['DlyCalDt'])

In [79]:
df_overlap = pd.merge(df_overlap,df_HTITREND_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_overlap += ['HTITREND']
df_overlap = df_overlap[ls_overlap]

### Hilbert-based normalized indicator

In [80]:
df_HT_norm = df_HTITREND/df_close - 1

In [81]:
df_HT_norm_long = pd.melt(df_HT_norm.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='HT_norm')
df_HT_norm_long['DlyCalDt'] = pd.to_datetime(df_HT_norm_long['DlyCalDt'])

In [82]:
df_overlap = pd.merge(df_overlap,df_HT_norm_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_overlap += ['HT_norm']
df_overlap = df_overlap[ls_overlap]

## SAR

### factor generation

In [83]:
df_SAR = pd.DataFrame()
for i in df_close.columns:
    df_SAR[i] = talib.SAR(df_high[i],df_low[i],acceleration=0.02,maximum=0.2)

In [84]:
df_SAR_long = pd.melt(df_SAR.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='SAR')
df_SAR_long['DlyCalDt'] = pd.to_datetime(df_SAR_long['DlyCalDt'])

In [85]:
df_overlap = pd.merge(df_overlap,df_SAR_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_overlap += ['SAR']
df_overlap = df_overlap[ls_overlap]

### Normalized SAR indicator

In [86]:
df_SAR_norm = df_SAR/df_close -1

In [87]:
df_SAR_norm_long = pd.melt(df_SAR_norm.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='SAR_norm')
df_SAR_norm_long['DlyCalDt'] = pd.to_datetime(df_SAR_norm_long['DlyCalDt'])

In [88]:
df_overlap = pd.merge(df_overlap,df_SAR_norm_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_overlap += ['SAR_norm']
df_overlap = df_overlap[ls_overlap]

## factor reserve

In [89]:
df_overlap.isnull().sum()

date              0
Ticker            0
timelag           0
BBANDS_upper      0
BBANDS_middle     0
BBANDS_lower      0
BBANDS_up         0
BBANDS_low        0
BBANDS_squeeze    0
HTITREND          0
HT_norm           0
SAR               0
SAR_norm          0
dtype: int64

In [90]:
df_overlap.drop("timelag",axis = 1).to_pickle("factor/Overlap.pkl.zip",compression='zip')

# Momentum

In [91]:
df_momentum = pd.DataFrame()
ls_momentum = ['date','Ticker','timelag']

## DM

In [92]:
df_pDM = pd.DataFrame()
df_mDM = pd.DataFrame()
for i in df_close.columns:
    df_pDM[i] = talib.PLUS_DM(df_high[i],df_low[i],timeperiod=10)
    df_mDM[i] = talib.MINUS_DM(df_high[i],df_low[i],timeperiod=10)

In [93]:
df_pDM_long = pd.melt(df_pDM.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='pDM')
df_mDM_long = pd.melt(df_mDM.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='mDM')

In [94]:
df_pDM_long['DlyCalDt'] = pd.to_datetime(df_pDM_long['DlyCalDt'])
df_mDM_long['DlyCalDt'] = pd.to_datetime(df_mDM_long['DlyCalDt'])

In [95]:
df_momentum = pd.merge(df_return_long,df_pDM_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_momentum = pd.merge(df_momentum,df_mDM_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_momentum += ['pDM','mDM']
df_momentum = df_momentum[ls_momentum]

## DI

In [96]:
df_pDI = pd.DataFrame()
df_mDI = pd.DataFrame()
for i in df_close.columns:
    df_pDI[i] = talib.PLUS_DI(df_high[i],df_low[i],df_close[i],timeperiod=14)
    df_mDI[i] = talib.MINUS_DI(df_high[i],df_low[i],df_close[i],timeperiod=14)

In [97]:
df_pDI_long = pd.melt(df_pDI.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='pDI')
df_mDI_long = pd.melt(df_mDI.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='mDI')

In [98]:
df_pDI_long['DlyCalDt'] = pd.to_datetime(df_pDI_long['DlyCalDt'])
df_mDI_long['DlyCalDt'] = pd.to_datetime(df_mDI_long['DlyCalDt'])

In [99]:
df_momentum = pd.merge(df_momentum,df_pDI_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_momentum = pd.merge(df_momentum,df_mDI_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_momentum += ['pDI','mDI']
df_momentum = df_momentum[ls_momentum]

## ADX

In [100]:
df_ADX = pd.DataFrame()
df_ADXR = pd.DataFrame()
for i in df_close.columns:
    df_ADX[i] = talib.ADX(df_high[i],df_low[i],df_close[i],timeperiod=15)
    df_ADXR[i] = talib.ADXR(df_high[i],df_low[i],df_close[i],timeperiod=15)

In [101]:
df_ADX_long = pd.melt(df_ADX.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='ADX')
df_ADXR_long = pd.melt(df_ADXR.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='ADXR')

In [102]:
df_ADX_long['DlyCalDt'] = pd.to_datetime(df_ADX_long['DlyCalDt'])
df_ADXR_long['DlyCalDt'] = pd.to_datetime(df_ADXR_long['DlyCalDt'])

In [103]:
df_momentum = pd.merge(df_momentum,df_ADX_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_momentum = pd.merge(df_momentum,df_ADXR_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_momentum += ['ADX','ADXR']
df_momentum = df_momentum[ls_momentum]

## APO/PPO

In [104]:
df_APO = pd.DataFrame()
df_PPO = pd.DataFrame()
for i in df_close.columns:
    df_APO[i] = talib.APO(df_close[i],fastperiod=12,slowperiod=26,matype=0)
    df_PPO[i] = talib.PPO(df_close[i],fastperiod=12,slowperiod=26,matype=0)

In [105]:
df_APO_long = pd.melt(df_APO.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='APO')
df_PPO_long = pd.melt(df_PPO.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='PPO')

In [106]:
df_APO_long['DlyCalDt'] = pd.to_datetime(df_APO_long['DlyCalDt'])
df_PPO_long['DlyCalDt'] = pd.to_datetime(df_PPO_long['DlyCalDt'])

In [107]:
df_momentum = pd.merge(df_momentum,df_APO_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_momentum = pd.merge(df_momentum,df_PPO_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_momentum += ['APO','PPO']
df_momentum = df_momentum[ls_momentum]

## AROON

In [108]:
df_AROON_up = pd.DataFrame()
df_AROON_down = pd.DataFrame()
df_AROONOSC = pd.DataFrame()
for i in df_close.columns:
    df_AROON_up[i],df_AROON_down[i] = talib.AROON(df_high[i],df_low[i],timeperiod=15)
    df_AROONOSC[i] = talib.AROONOSC(df_high[i],df_low[i],timeperiod=15)

In [109]:
df_AROON_up_long = pd.melt(df_AROON_up.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='AROON_up')
df_AROON_down_long = pd.melt(df_AROON_down.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='AROON_down')
df_AROONOSC_long = pd.melt(df_AROONOSC.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='AROONOSC')

In [110]:
df_AROON_up_long['DlyCalDt'] = pd.to_datetime(df_AROON_up_long['DlyCalDt'])
df_AROON_down_long['DlyCalDt'] = pd.to_datetime(df_AROON_down_long['DlyCalDt'])
df_AROONOSC_long['DlyCalDt'] = pd.to_datetime(df_AROONOSC_long['DlyCalDt'])

In [111]:
df_momentum = pd.merge(df_momentum,df_AROON_up_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_momentum = pd.merge(df_momentum,df_AROON_down_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_momentum = pd.merge(df_momentum,df_AROONOSC_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_momentum += ['AROON_up','AROON_down','AROONOSC']
df_momentum = df_momentum[ls_momentum]

## BOP/CCI

In [112]:
df_BOP = pd.DataFrame()
df_CCI = pd.DataFrame()
for i in df_close.columns:
    df_BOP[i] = talib.BOP(df_open[i],df_high[i],df_low[i],df_close[i])
    df_CCI[i] = talib.CCI(df_high[i],df_low[i],df_close[i],timeperiod=15)

In [113]:
df_BOP_long = pd.melt(df_BOP.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='BOP')
df_CCI_long = pd.melt(df_CCI.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='CCI')

In [114]:
df_BOP_long['DlyCalDt'] = pd.to_datetime(df_BOP_long['DlyCalDt'])
df_CCI_long['DlyCalDt'] = pd.to_datetime(df_CCI_long['DlyCalDt'])

In [115]:
df_momentum = pd.merge(df_momentum,df_BOP_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_momentum = pd.merge(df_momentum,df_CCI_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_momentum += ['BOP','CCI']
df_momentum = df_momentum[ls_momentum]

## MACD

In [116]:
df_MACD = pd.DataFrame()
df_MACD_signal = pd.DataFrame()
df_MACD_hist = pd.DataFrame()
for i in df_close.columns:
    df_MACD[i],df_MACD_signal[i],df_MACD_hist[i] = talib.MACD(df_close[i],fastperiod=12,slowperiod=26,signalperiod=9)

In [117]:
df_MACD_long = pd.melt(df_MACD.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='MACD')
df_MACD_signal_long = pd.melt(df_MACD_signal.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='MACD_signal')
df_MACD_hist_long = pd.melt(df_MACD_hist.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='MACD_hist')

In [118]:
df_MACD_long['DlyCalDt'] = pd.to_datetime(df_MACD_long['DlyCalDt'])
df_MACD_signal_long['DlyCalDt'] = pd.to_datetime(df_MACD_signal_long['DlyCalDt'])
df_MACD_hist_long['DlyCalDt'] = pd.to_datetime(df_MACD_hist_long['DlyCalDt'])

In [119]:
df_momentum = pd.merge(df_momentum,df_MACD_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_momentum = pd.merge(df_momentum,df_MACD_signal_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_momentum = pd.merge(df_momentum,df_MACD_hist_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_momentum += ['MACD','MACD_signal','MACD_hist']
df_momentum = df_momentum[ls_momentum]

## CMO/MFI

In [120]:
df_CMO = pd.DataFrame()
df_MFI = pd.DataFrame()
for i in df_close.columns:
    df_CMO[i] = talib.CMO(df_close[i],timeperiod=14)
    df_MFI[i] = talib.MFI(df_high[i],df_low[i],df_close[i],df_volume[i],timeperiod=14)

In [121]:
df_CMO_long = pd.melt(df_CMO.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='CMO')
df_MFI_long = pd.melt(df_MFI.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='MFI')

In [122]:
df_CMO_long['DlyCalDt'] = pd.to_datetime(df_CMO_long['DlyCalDt'])
df_MFI_long['DlyCalDt'] = pd.to_datetime(df_MFI_long['DlyCalDt'])

In [123]:
df_momentum = pd.merge(df_momentum,df_CMO_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_momentum = pd.merge(df_momentum,df_MFI_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_momentum += ['CMO','MFI']
df_momentum = df_momentum[ls_momentum]

## RSI

In [124]:
df_RSI = pd.DataFrame()
df_RSI_stoch_fast = pd.DataFrame()
df_RSI_stoch_slow = pd.DataFrame()
for i in df_close.columns:
    df_RSI[i] = talib.RSI(df_close[i],timeperiod=14)
    df_RSI_stoch_fast[i],df_RSI_stoch_slow[i] = talib.STOCHRSI(df_close[i],timeperiod=14,fastk_period=5,fastd_period=3,fastd_matype=0)

In [125]:
df_RSI_long = pd.melt(df_RSI.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='RSI')
df_RSI_stoch_fast_long = pd.melt(df_RSI_stoch_fast.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='RSI_stoch_fast')
df_RSI_stoch_slow_long = pd.melt(df_RSI_stoch_slow.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='RSI_stoch_slow')

In [126]:
df_RSI_long['DlyCalDt'] = pd.to_datetime(df_RSI_long['DlyCalDt'])
df_RSI_stoch_fast_long['DlyCalDt'] = pd.to_datetime(df_RSI_stoch_fast_long['DlyCalDt'])
df_RSI_stoch_slow_long['DlyCalDt'] = pd.to_datetime(df_RSI_stoch_slow_long['DlyCalDt'])

In [127]:
df_momentum = pd.merge(df_momentum,df_RSI_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_momentum = pd.merge(df_momentum,df_RSI_stoch_fast_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_momentum = pd.merge(df_momentum,df_RSI_stoch_slow_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_momentum += ['RSI','RSI_stoch_fast','RSI_stoch_slow']
df_momentum = df_momentum[ls_momentum]

## STOCH/ULTOSC/WILLR

In [128]:
df_STOCH = pd.DataFrame()
df_UTLOSC = pd.DataFrame()
df_WILLR = pd.DataFrame()
for i in df_close.columns:
    df1,df2 = talib.STOCH(df_high[i],df_low[i],df_close[i],fastk_period=5,slowk_period=3,slowk_matype=0,slowd_period=3,slowd_matype=0)
    df_STOCH[i] = df1/df2
    df_UTLOSC[i] = talib.ULTOSC(df_high[i],df_low[i],df_close[i],timeperiod1=7,timeperiod2=14,timeperiod3=28)
    df_WILLR[i] = talib.WILLR(df_high[i],df_low[i],df_close[i],timeperiod=14)

In [129]:
df_STOCH_long = pd.melt(df_STOCH.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='STOCH')
df_UTLOSC_long = pd.melt(df_UTLOSC.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='UTLOSC')
df_WILLR_long = pd.melt(df_WILLR.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='WILLR')

In [130]:
df_STOCH_long['DlyCalDt'] = pd.to_datetime(df_STOCH_long['DlyCalDt'])
df_UTLOSC_long['DlyCalDt'] = pd.to_datetime(df_UTLOSC_long['DlyCalDt'])
df_WILLR_long['DlyCalDt'] = pd.to_datetime(df_WILLR_long['DlyCalDt'])

In [131]:
df_momentum = pd.merge(df_momentum,df_STOCH_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_momentum = pd.merge(df_momentum,df_UTLOSC_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_momentum = pd.merge(df_momentum,df_WILLR_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_momentum += ['STOCH','UTLOSC','WILLR']
df_momentum = df_momentum[ls_momentum]

## factor reserve

In [132]:
df_momentum.isnull().sum()

date              0
Ticker            0
timelag           0
pDM               0
mDM               0
pDI               0
mDI               0
ADX               0
ADXR              0
APO               0
PPO               0
AROON_up          0
AROON_down        0
AROONOSC          0
BOP               0
CCI               0
MACD              0
MACD_signal       0
MACD_hist         0
CMO               0
MFI               0
RSI               0
RSI_stoch_fast    0
RSI_stoch_slow    0
STOCH             3
UTLOSC            0
WILLR             0
dtype: int64

In [134]:
df_momentum.query("STOCH.isnull()")

Unnamed: 0,date,Ticker,timelag,pDM,mDM,pDI,mDI,ADX,ADXR,APO,...,MACD_signal,MACD_hist,CMO,MFI,RSI,RSI_stoch_fast,RSI_stoch_slow,STOCH,UTLOSC,WILLR
132826,2019-02-12,CTRM,2019-02-11,0.0,0.0,0.0,0.0,0.0,0.0,8.881784e-16,...,8.881784e-16,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,-100.0
234388,2020-06-01,HYMC,2020-05-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,-100.0
234389,2020-06-02,HYMC,2020-06-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,-100.0


In [135]:
df_momentum.fillna(method='ffill',inplace=True)

In [136]:
df_momentum.isnull().sum()

date              0
Ticker            0
timelag           0
pDM               0
mDM               0
pDI               0
mDI               0
ADX               0
ADXR              0
APO               0
PPO               0
AROON_up          0
AROON_down        0
AROONOSC          0
BOP               0
CCI               0
MACD              0
MACD_signal       0
MACD_hist         0
CMO               0
MFI               0
RSI               0
RSI_stoch_fast    0
RSI_stoch_slow    0
STOCH             0
UTLOSC            0
WILLR             0
dtype: int64

In [138]:
df_momentum.drop("timelag",axis = 1).to_pickle("factor/Momentum.pkl.zip",compression='zip')

# Volume

In [139]:
df_volume_factor = pd.DataFrame()
ls_volume = ['date','Ticker','timelag']

## Chaikin A/D

In [140]:
df_AD = pd.DataFrame()
df_ADOSC = pd.DataFrame()
for i in df_close.columns:
    df_AD[i] = talib.AD(df_high[i],df_low[i],df_close[i],df_volume[i])
    df_ADOSC[i] = talib.ADOSC(df_high[i],df_low[i],df_close[i],df_volume[i],fastperiod=3,slowperiod=10)

In [141]:
df_AD_long = pd.melt(df_AD.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='AD')
df_ADOSC_long = pd.melt(df_ADOSC.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='ADOSC')

In [142]:
df_AD_long['DlyCalDt'] = pd.to_datetime(df_AD_long['DlyCalDt'])
df_ADOSC_long['DlyCalDt'] = pd.to_datetime(df_ADOSC_long['DlyCalDt'])

In [143]:
df_volume_factor = pd.merge(df_return_long,df_AD_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
df_volume_factor = pd.merge(df_volume_factor,df_ADOSC_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_volume += ['AD','ADOSC']
df_volume_factor = df_volume_factor[ls_volume]

## OBV

In [144]:
df_OBV = pd.DataFrame()
for i in df_close.columns:
    df_OBV[i] = talib.OBV(df_close[i],df_volume[i])

In [145]:
df_OBV_long = pd.melt(df_OBV.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='OBV')
df_OBV_long['DlyCalDt'] = pd.to_datetime(df_OBV_long['DlyCalDt'])

In [146]:
df_volume_factor = pd.merge(df_volume_factor,df_OBV_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_volume += ['OBV']
df_volume_factor = df_volume_factor[ls_volume]

## VPT

In [147]:
def VPT(close, volume):
    vpt = volume * close.pct_change(1).cumsum()
    return vpt
# cite : https://medium.com/@crisvelasquez/top-9-volume-indicators-in-python-e398791b98f9
# https://mp.weixin.qq.com/s/FEX1Ow2gnywieUn86tIWqQ

In [148]:
df_VPT = pd.DataFrame()
for i in df_close.columns:
    df_VPT[i] = VPT(df_close[i],df_volume[i])

In [149]:
df_VPT_long = pd.melt(df_VPT.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='VPT')
df_VPT_long['DlyCalDt'] = pd.to_datetime(df_VPT_long['DlyCalDt'])

In [150]:
df_volume_factor = pd.merge(df_volume_factor,df_VPT_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_volume += ['VPT']
df_volume_factor = df_volume_factor[ls_volume]

## VWAP

In [151]:
def VWAP(high,low,close,volume):
    vwap = (volume * (high + low + close) / 3).cumsum() / volume.cumsum()
    return vwap
# cite : https://medium.com/@financial_python/building-a-vwap-indicator-in-python-81ff9157fb13

In [152]:
df_VWAP = pd.DataFrame()
for i in df_close.columns:
    df_VWAP[i] = VWAP(df_high[i],df_low[i],df_close[i],df_volume[i])

In [153]:
df_VWAP_long = pd.melt(df_VWAP.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='VWAP')
df_VWAP_long['DlyCalDt'] = pd.to_datetime(df_VWAP_long['DlyCalDt'])

In [154]:
df_volume_factor = pd.merge(df_volume_factor,df_VWAP_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_volume += ['VWAP']
df_volume_factor = df_volume_factor[ls_volume]

## factor reseve

In [155]:
df_volume_factor.isnull().sum()

date       0
Ticker     0
timelag    0
AD         0
ADOSC      0
OBV        0
VPT        0
VWAP       0
dtype: int64

In [157]:
df_volume_factor.drop("timelag",axis = 1).to_pickle("factor/Volume.pkl.zip",compression='zip')

# Volatility

In [162]:
df_volatility_factor = pd.DataFrame()
ls_volatility = ['date','Ticker','timelag']

## ATR

### factor generation

In [163]:
df_ATR = pd.DataFrame()
for i in df_close.columns:
    df_ATR[i] = talib.ATR(df_high[i],
                      df_low[i],
                      df_close[i],
                      timeperiod=15)

In [164]:
df_ATR_long = pd.melt(df_ATR.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='ATR')
df_ATR_long['DlyCalDt'] = pd.to_datetime(df_ATR_long['DlyCalDt'])

In [165]:
df_volatility = pd.merge(df_return_long,df_ATR_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_volatility += ['ATR']
df_volatility = df_volatility[ls_volatility]

### normalized version of ATR using rolling mean of price

In [166]:
df_ATR_normalized = df_ATR/df_close.rolling(14).mean()

In [167]:
df_ATR_normalized_long = pd.melt(df_ATR_normalized.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='ATR_normalized')
df_ATR_normalized_long['DlyCalDt'] = pd.to_datetime(df_ATR_normalized_long['DlyCalDt'])

In [168]:
df_volatility = pd.merge(df_volatility,df_ATR_normalized_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_volatility += ['ATR_normalized']
df_volatility = df_volatility[ls_volatility]

## NATR

### factor generation

In [169]:
df_NATR = pd.DataFrame()
for i in df_close.columns:
    df_NATR[i] = talib.NATR(df_high[i],
                      df_low[i],
                      df_close[i],
                      timeperiod=15)

In [170]:
df_NATR_long = pd.melt(df_NATR.reset_index(),id_vars='DlyCalDt',var_name='Ticker',value_name='NATR')
df_NATR_long['DlyCalDt'] = pd.to_datetime(df_NATR_long['DlyCalDt'])

In [171]:
df_volatility = pd.merge(df_volatility,df_NATR_long,left_on=['timelag','Ticker'],right_on = ['DlyCalDt','Ticker'],how='left')
ls_volatility += ['NATR']
df_volatility = df_volatility[ls_volatility]

## factor reserve

In [172]:
df_volatility.isnull().sum()

date              0
Ticker            0
timelag           0
ATR               0
ATR_normalized    0
NATR              0
dtype: int64

In [174]:
df_volatility.drop("timelag",axis = 1).to_pickle("factor/Volatility.pkl.zip",compression='zip')