In [1]:
# Import libraries and dependencies
import talib as ta
from talib import abstract
from talib import MA_Type
import pandas as pd

In [2]:
# Read in data
df = pd.read_csv('../Data/ETF_signal_data.csv', header=[0, 1], parse_dates=True, index_col=[0])

In [3]:
# Create a list of the tickers for later use
ticker_list = list(dict.fromkeys((df.droplevel(axis=1, level=[-1]))))

In [17]:
# Initialize multi level dataframe
indicator_metrics_df = pd.DataFrame(columns=[[], []])

In [18]:
# A for loop that will iterate through all tickers and add indicator metrics to the above dataframe
for x in ticker_list:
    
    # ATR indicator
    ATR = ta.ATR(df[x]['high'], df[x]['low'], df[x]['close'], timeperiod=14)
    indicator_metrics_df[x, 'ATR'] = ATR
    
    # CCI indicator
    CCI = ta.CCI(df[x]['high'], df[x]['low'], df[x]['close'], timeperiod=14)
    indicator_metrics_df[x, 'CCI'] = CCI
    
    # MACD indicator
    # Because this indicator creates a tuple of arrays, unlike the above indicators which create only a single
    # array. We first create a new dataframe containing the data, iterate through it appending each column
    # to the 'indicator_metrics_df' dataframe
    MACD = ta.MACD(df[x]['close'], fastperiod=12, slowperiod=26, signalperiod=9)
    MACD_df = pd.DataFrame(MACD).transpose()
    MACD_df.rename(columns={0:'MACD_FAST_PERIOD', 1:'MACD_SLOW_PERIOD', 2:'MACD_SIGNAL_PERIOD'}, inplace=True)
    for line in MACD_df:
        indicator_metrics_df[x, line] = MACD_df[line]  
    
    # RSI indicator
    RSI = ta.RSI(df[x]['close'], timeperiod=14)
    indicator_metrics_df[x, 'RSI'] = RSI

# Sort the columns labels
indicator_metrics_df = indicator_metrics_df.sort_index(axis=1)

In [35]:
# Write to CSV file
indicator_metrics_df.to_csv('../Data/engineered_features.csv')

In [39]:
# Indicator metrics only dataframe sample
indicator_metrics_df.sample(5).sort_index(axis=0)

Unnamed: 0_level_0,GDX,GDX,GDX,GDX,GDX,GDX,GDXJ,GDXJ,GDXJ,GDXJ,...,XLU,XLU,XLU,XLU,XLV,XLV,XLV,XLV,XLV,XLV
Unnamed: 0_level_1,ATR,CCI,MACD_FAST_PERIOD,MACD_SIGNAL_PERIOD,MACD_SLOW_PERIOD,RSI,ATR,CCI,MACD_FAST_PERIOD,MACD_SIGNAL_PERIOD,...,MACD_FAST_PERIOD,MACD_SIGNAL_PERIOD,MACD_SLOW_PERIOD,RSI,ATR,CCI,MACD_FAST_PERIOD,MACD_SIGNAL_PERIOD,MACD_SLOW_PERIOD,RSI
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-03-16,0.426741,-118.042658,-0.264404,0.028229,-0.292633,40.974965,0.692065,-142.857021,-0.254544,0.02123,...,0.057086,0.16897,-0.111884,58.491729,1.12503,50.3074,0.1836,0.172387,0.011213,52.775185
2018-04-23,0.403375,-14.222734,0.218363,0.033396,0.184967,50.874848,0.645038,-15.372012,0.350382,0.052929,...,0.183854,-0.006985,0.190839,52.164903,1.147029,46.736335,-0.144055,0.224449,-0.368505,50.098173
2020-06-10,1.364725,-6.573055,0.115276,-0.309302,0.424578,54.444975,1.978098,35.520642,1.085354,-0.304341,...,1.225958,0.363245,0.862713,59.35398,1.77408,63.913554,1.275546,-0.003917,1.279463,57.678432
2021-01-20,1.048979,-38.352036,-0.191756,-0.184737,-0.007019,49.333134,1.797336,-62.414318,-0.417295,-0.549986,...,0.142283,0.187879,-0.045596,56.348454,1.46103,88.070683,1.816458,0.215816,1.600641,72.80671
2021-09-23,0.729255,-119.170116,-0.633921,-0.160762,-0.47316,32.550287,1.157051,-118.854377,-0.835483,-0.207286,...,-0.49916,-0.411545,-0.087615,32.257248,1.496697,-28.563282,-0.38406,-0.351863,-0.032197,47.497502


In [20]:
# A for loop that will iterate through all tickers and add indicator metrics to the above dataframe
for x in ticker_list:
    
    # ATR indicator
    ATR = ta.ATR(df[x]['high'], df[x]['low'], df[x]['close'], timeperiod=14)
    df[x, 'ATR'] = ATR
    
    # CCI indicator
    CCI = ta.CCI(df[x]['high'], df[x]['low'], df[x]['close'], timeperiod=14)
    df[x, 'CCI'] = CCI
    
    # MACD indicator
    # Because this indicator creates a tuple of arrays, unlike the above indicators which create only a single
    # array. We first create a new dataframe containing the data, iterate through it appending each column
    # to the 'indicator_metrics_df' dataframe
    MACD = ta.MACD(df[x]['close'], fastperiod=12, slowperiod=26, signalperiod=9)
    MACD_df = pd.DataFrame(MACD).transpose()
    MACD_df.rename(columns={0:'MACD_FAST_PERIOD', 1:'MACD_SLOW_PERIOD', 2:'MACD_SIGNAL_PERIOD'}, inplace=True)
    for line in MACD_df:
        df[x, line] = MACD_df[line]  
    
    # RSI indicator
    RSI = ta.RSI(df[x]['close'], timeperiod=14)
    df[x, 'RSI'] = RSI

# Sort the columns labels
df = df.sort_index(axis=1)

In [41]:
# Write to CSV file
df.to_csv('../Data/ETF_data_including_engineered_features_and_signals.csv')

In [42]:
# Whole dataframe sample
df.sample(5).sort_index(axis=0)

Unnamed: 0_level_0,GDX,GDX,GDX,GDX,GDX,GDX,GDX,GDX,GDX,GDX,...,XLV,XLV,XLV,XLV,XLV,XLV,XLV,XLV,XLV,XLV
Unnamed: 0_level_1,ATR,BB_LOWER,BB_MIDDLE,BB_UPPER,CCI,MACD_FAST_PERIOD,MACD_SIGNAL_PERIOD,MACD_SLOW_PERIOD,RSI,SIGNAL,...,MACD_SLOW_PERIOD,RSI,SIGNAL,close,dividends,high,low,open,stock splits,volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-09-27,0.462219,21.842538,23.207237,24.571936,-116.069247,-0.056009,-0.201718,0.145709,41.146356,0.0,...,0.577233,51.023186,0.0,74.822823,0.0,75.044026,74.500232,75.044026,0,5384700
2017-10-27,0.353229,21.595923,22.322536,23.049149,-147.838847,-0.225688,-0.096903,-0.128785,36.547956,0.0,...,0.404704,45.94117,0.0,75.707664,0.0,75.79062,75.421943,75.458811,0,6122000
2019-06-05,0.460765,18.889843,20.325698,21.761552,158.732549,0.253954,0.286002,-0.032048,71.510126,-1.0,...,-0.242612,55.536784,0.0,84.54232,0.0,84.740443,83.938515,84.523447,0,24291700
2019-11-27,0.546031,25.146136,26.162789,27.179441,-30.417368,-0.161931,-0.006747,-0.155183,46.35394,0.0,...,1.484778,80.190761,0.0,94.988571,0.0,95.131284,94.550924,94.636555,0,6333200
2021-05-07,0.881496,33.277304,35.000464,36.723624,166.219166,0.584473,0.071346,0.513127,65.043297,-1.0,...,1.630743,70.866676,0.0,121.786659,0.0,122.424698,121.187885,121.236968,0,7576700
