In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import re
from datetime import datetime, date, timedelta
import warnings
warnings.filterwarnings('ignore')

import talib
from candlestick import candlestick

In [2]:
# Take a look the index
df_ind = pd.read_excel('Index/SP500 tickets.xlsx')
df_ind2 = pd.read_csv('Index/Stocks in the SP 500 Index.csv')

df_ind.columns = [col.replace(u'\xa0', u' ') for col in df_ind.columns]

In [3]:
# Define normalisation function
def NormalizeData(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

# Define Multiples
def multiples(value, length):
    return [value * i for i in range(1, length + 1)]

# Define TA Name with Multiples
def PrefixAddMultiples(prefix, items):
    tmp = [str(x) for x in items]
    return [prefix +'_' + x for x in tmp if isinstance(x, str)]

# Define Cross trading rules TA Name with Multiples
def PrefixAddCrossMultiples(prefix, items):
    tmp = []  
    for shortperiod in items:
        for longperiod in items:
            if shortperiod < longperiod:
                element = prefix + '_' + str(shortperiod) + '_' + str(longperiod)
                tmp.append(element)
    return tmp

In [None]:
# Randomly Sample 5 stocks and get their stock price
# stocks = list(df_ind['Symbol'].sample(5, random_state = 7600))

# Get all ticker price
stocks = df_ind['Symbol'].unique()

# Standardize the column names
cols = ['report_date', 'open', 'high', 'low', 'close', 'volume', 'adjusted_close']

# Stock Price Data for the above 5 stocks
df_price = pd.DataFrame()

for stock in stocks:
    
    # Read the csv
    temp = pd.read_csv(f'Stock Price/{stock}_2022-07-31.csv')
    temp.columns = cols
    temp['ticker'] = stock
    
    # Convert all numeric features to percentage change relative to the first numeric record
#     for col in ['open', 'high', 'low', 'close', 'volume', 'adjusted_close']:
#         temp[col] = (temp[col] - temp[col].iloc[0]) / temp[col].iloc[0]
    
    # Append to the stock price data
    df_price = df_price.append(temp)

In [None]:
# List out all industries the stock belongs to
inds = list(df_ind[df_ind['Symbol'].isin(stocks)]['GICS Sector'].unique())

inds_df = df_ind[df_ind['GICS Sector'].isin(inds)]

# Stock Price for all tickers belonging to the industries
df_ind_price = pd.DataFrame()
cols = ['report_date', 'open', 'high', 'low', 'close', 'volume', 'adjusted_close']

for _, row in inds_df.iterrows():
    
    ticker = row['Symbol']
    temp = pd.read_csv(f'Stock Price/{ticker}_2022-07-31.csv')
    temp.columns = cols
    temp['ticker'] = ticker
    temp['inds'] = row['GICS Sector']
    
    df_ind_price = df_ind_price.append(temp)
    
# Create Industrial Index, using close price
df_ind_idx = df_ind_price.groupby(['report_date', 'inds'], as_index = False).agg({
    'open': np.sum,
    'high': np.sum,
    'low': np.sum,
    'close': np.sum,
    'volume': np.sum,
    'adjusted_close': np.sum
    
#     'ticker': lambda x: list(x)
})

In [None]:
# For stock
df_stock_ta = pd.DataFrame()

for stock in stocks:
    temp = df_price[df_price['ticker'] == stock]

    open_ = temp['open']
    close = temp['adjusted_close']
    high = temp['high']
    low = temp['low']
    volume = temp['volume']

    ## Overlap Studies - need to create indicators
    # Calculate BBANDS - Bollinger Bands - 14 days
    temp['upperband'], temp['middleband'], temp['lowerband'] = talib.BBANDS(close, timeperiod=14, nbdevup=2, nbdevdn=2, matype=0)
    
    # Calculate Double Exponential Moving Average
    temp['dema_30'] = talib.DEMA(close, timeperiod=30)
    # temp['dema_30'] = NormalizeData(temp['dema_30'])
    
    # Calculate Exponential Moving Average
    # temp['ema_30'] = talib.EMA(close, timeperiod=30)
    # temp['ema_30'] = NormalizeData(temp['ema_30'])
    
    periods = multiples(5, 10) # VT updated 20220918
    for period in periods:
        temp[f'ema_{period}'] = talib.EMA(close, timeperiod=period)
        # temp[f'ema_{period}'] = NormalizeData(temp[f'ema_{period}'])
    
    # Calculate HT_TRENDLINE - Hilbert Transform - Instantaneous Trendline
    temp['HT'] = talib.HT_TRENDLINE(close)
    # temp['HT'] = NormalizeData(temp['HT'])
    
    # Calculate KAMA - Kaufman Adaptive Moving Average
    temp['kama_30'] = talib.KAMA(close, timeperiod=30)
    # temp['kama_30'] = NormalizeData(temp['kama_30'])
    
    # Calculate MA
    periods = [10, 20, 50] # VT updated 20220828
    for period in periods:
        temp[f'MA_{period}'] = talib.MA(close, period, matype=0)
        # temp[f'MA_{period}'] = NormalizeData(temp[f'MA_{period}'])
        
    # Calculate MAMA - MESA Adaptive Moving Average
    temp['mama'], temp['fama'] = talib.MAMA(close, fastlimit=0.5, slowlimit=0.05)
    # temp['mama'] = NormalizeData(temp['mama'])
    # temp['fama'] = NormalizeData(temp['fama'])

    # Calculate MAVP - Moving average with variable period - skipped
    # temp['mavp'] = talib.MAVP(close, periods, minperiod=2, maxperiod=30, matype=0)
    
    # Calculate MIDPOINT - MidPoint over period
    temp['midpoint'] = talib.MIDPOINT(close, timeperiod=14)
    # temp['midpoint'] = NormalizeData(temp['midpoint'])
    
    # Calculate MIDPRICE - Midpoint Price over period
    temp['midprice'] = talib.MIDPRICE(high, low, timeperiod=14)
    # temp['midprice'] = NormalizeData(temp['midprice'])
    
    # Calculate SAR - Parabolic SAR - to check for 'AOS' stock
    #temp['SAR'] = talib.SAR(high, low, acceleration=0, maximum=0)
    #temp['SAR'] = NormalizeData(temp['SAR'])
    
    # Calculate SAREXT - Parabolic SAR - Extended - to check for 'AOS' stock
    #temp['SAREXT'] = talib.SAREXT(high, low, startvalue=0, offsetonreverse=0, accelerationinitlong=0, accelerationlong=0, accelerationmaxlong=0, accelerationinitshort=0, accelerationshort=0, accelerationmaxshort=0)
    #temp['SAREXT'] = NormalizeData(temp['SAREXT'])
    
    # Calculate SMA
    periods = [30, 50] # VT updated 20220828
    for period in periods:
        temp[f'SMA_{period}'] = talib.SMA(close, period)
        # temp[f'SMA_{period}'] = NormalizeData(temp[f'SMA_{period}'])
        
    # Calculate T3 - Triple Exponential Moving Average (T3)
    temp['T3ema'] = talib.T3(close, timeperiod=5, vfactor=0)
    # temp['T3ema'] = NormalizeData(temp['T3ema'])
    
    # Calculate TEMA - Triple Exponential Moving Average
    temp['tema_30'] = talib.TEMA(close, timeperiod = 30)
    # temp['tema_30'] = NormalizeData(temp['tema_30'])
    
    # Calculate TRIMA - Triangular Moving Average
    temp['trima'] = talib.TRIMA(close, timeperiod=30)
    # temp['trima'] = NormalizeData(temp['trima'])

    # Calculate WMA - Weighted Moving Average
    temp['wma'] = talib.WMA(close, timeperiod=30)
    # temp['wma'] = NormalizeData(temp['wma'])
    
    ## Momentum Indicator Functions
    # Calculate ADX - Average Directional Movement Index
    temp['adx'] = talib.ADX(high, low, close, timeperiod=14)
    # temp['adx'] = NormalizeData(temp['adx'])
    
    # Calculate ADXR - Average Directional Movement Index Rating
    temp['adxr'] = talib.ADXR(high, low, close, timeperiod=14)
    # temp['adxr'] = NormalizeData(temp['adxr'])
    
    # Calculate APO - Absolute Price Oscillator
    temp['apo'] = talib.APO(close, fastperiod=12, slowperiod=26, matype=0)
    # temp['apo'] = NormalizeData(temp['apo'])
    
    # Calculate AROON - Aroon
    temp['aroondown'], temp['aroonup'] = talib.AROON(high, low, timeperiod=25)
    #temp['aroondown'] = NormalizeData(temp['aroondown'])
    #temp['aroonup'] = NormalizeData(temp['aroonup'])
    
    # Calculate AROONOSC - Aroon Oscillator
    temp['aroonosc'] = talib.AROONOSC(high, low, timeperiod=14)
    #temp['aroonosc'] = NormalizeData(temp['aroonosc'])
    
    # Calculate BOP - Balance Of Power
    temp['bop'] = talib.BOP(open_, high, low, close)
    #temp['bop'] = NormalizeData(temp['bop'])
    
    # Calculate CCI - Commodity Channel Index
    temp['cci'] = talib.CCI(high, low, close, timeperiod=14)
    #temp['cci'] = NormalizeData(temp['cci'])
    
    # Calculate CMO - Chande Momentum Oscillator
    temp['cmo'] = talib.CMO(close, timeperiod=14)
    #temp['cmo'] = NormalizeData(temp['cmo'])
    
    # DX - Directional Movement Index
    temp['dxi'] = talib.DX(high, low, close, timeperiod=14)
    #temp['dxi'] = NormalizeData(temp['dxi'])
    
    # Calculate MACD - Moving Average Convergence/Divergence
    temp['macd'], temp['macdsignal'], temp['macdhist'] = talib.MACD(close)
    #temp['macd'] = NormalizeData(temp['macd'])
    #temp['macdsignal'] = NormalizeData(temp['macdsignal'])
    #temp['macdhist'] = NormalizeData(temp['macdhist'])
    
    # MACDEXT - MACD with controllable MA type
    #temp['macd_ext'], temp['macdsignal_ext'], temp['macdhist_ext'] = talib.MACDEXT(close, fastperiod=12, fastmatype=0, slowperiod=26, slowmatype=0, signalperiod=9, signalmatype=0)
    
    # MACDFIX - Moving Average Convergence/Divergence Fix 12/26
    #temp['macd_ext'], temp['macdsignal_ext'], temp['macdhist_ext'] = talib.MACDFIX(close, signalperiod=9)
    
    # MFI - Money Flow Index
    temp['mfi'] = talib.MFI(high, low, close, volume, timeperiod=14)
    #temp['mfi'] = NormalizeData(temp['mfi'])
    
    # MINUS_DI - Minus Directional Indicator
    temp['MINUS_DI'] = talib.MINUS_DI(high, low, close, timeperiod=14)
    #temp['MINUS_DI'] = NormalizeData(temp['MINUS_DI'])
    
    # MINUS_DM - Minus Directional Movement
    temp['MINUS_DM'] = talib.MINUS_DM(high, low, timeperiod=14)
    #temp['MINUS_DM'] = NormalizeData(temp['MINUS_DM'])
    
    # MOM - Momentum
    temp['mom'] = talib.MOM(close, timeperiod=10)
    #temp['mom'] = NormalizeData(temp['mom'])
    
    # PLUS_DI - Plus Directional Indicator
    temp['plus_di'] = talib.PLUS_DI(high, low, close, timeperiod=14)
    #temp['plus_di'] = NormalizeData(temp['plus_di'])
    
    # PLUS_DM - Plus Directional Movement
    temp['plus_dm'] = talib.PLUS_DM(high, low, timeperiod=14)
    #temp['plus_dm'] = NormalizeData(temp['plus_dm'])
    
    # PPO - Percentage Price Oscillator
    temp['ppo'] = talib.PPO(close, fastperiod=12, slowperiod=26, matype=0)
    #temp['ppo'] = NormalizeData(temp['ppo'])
    
    # ROC - Rate of change : ((price/prevPrice)-1)*100
    #temp['roc'] = talib.ROC(close, timeperiod=10)
    #temp['roc'] = NormalizeData(temp['roc'])
    
    # ROCP - Rate of change Percentage: (price-prevPrice)/prevPrice
    #temp['rocp'] = talib.ROCP(close, timeperiod=10)
    #temp['rocp'] = NormalizeData(temp['rocp'])
    
    # ROCR - Rate of change ratio: (price/prevPrice)
    #temp['rocr'] = talib.ROCR(close, timeperiod=10)
    #temp['rocr'] = NormalizeData(temp['rocr'])
    
    # ROCR100 - Rate of change ratio 100 scale: (price/prevPrice)*100
    # temp['rocr100'] = ROCR100(close, timeperiod=10)

    # Calculate RSI
    temp['rsi_14'] = talib.RSI(close, timeperiod = 14)
    #temp['rsi_14'] = NormalizeData(temp['rsi_14'])
    
    # STOCH - Stochastic
    temp['slowk'], temp['slowd'] = talib.STOCH(high, low, close, fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
    #temp['slowk'] = NormalizeData(temp['slowk'])
    #temp['slowd'] = NormalizeData(temp['slowd'])
    
    # Calculate STOCHF - Stochastic Fast
    temp['stochf_k'], temp['stochf_d'] = talib.STOCHF(high, low, close)
    #temp['stochf_k'] = NormalizeData(temp['stochf_k'])
    #temp['stochf_d'] = NormalizeData(temp['stochf_d'])
    
    # STOCHRSI - Stochastic Relative Strength Index
    temp['fastk'], temp['fastd'] = talib.STOCHRSI(close, timeperiod=14, fastk_period=5, fastd_period=3, fastd_matype=0)
    #temp['fastk'] = NormalizeData(temp['fastk'])
    #temp['fastd'] = NormalizeData(temp['fastd'])
    
    # TRIX - 1-day Rate-Of-Change (ROC) of a Triple Smooth EMA
    temp['trix'] = talib.TRIX(close, timeperiod=30)
    #temp['trix'] = NormalizeData(temp['trix'])
    
    # ULTOSC - Ultimate Oscillator
    temp['ultosc'] = talib.ULTOSC(high, low, close, timeperiod1=7, timeperiod2=14, timeperiod3=28)
    #temp['ultosc'] = NormalizeData(temp['ultosc'])

    # WILLR - Williams' %R
    # Values below 20 indicate an overbought condition and a sell signal is generated when it crosses the 20 line. Values over 80 indicate an oversold condition and a buy signal is generated when it crosses the 80 line.
    temp['willR'] = talib.WILLR(high, low, close, timeperiod=14)
    #temp['willR'] = NormalizeData(temp['willR'])
    
    ## Volume Indicator Functions
    # Calculate AD - Chaikin A/D Line
    temp['ad'] = talib.AD(high, low, close, volume)
    #temp['ad'] = NormalizeData(temp['ad'])
    
    # Calculate ADOSC - Chaikin A/D Oscillator
    temp['adosc'] = talib.ADOSC(high, low, close, volume, fastperiod=3, slowperiod=10)
    #temp['adosc'] = NormalizeData(temp['adosc'])
    
    # Calculate OBV - On Balance Volume
    temp['obv'] = talib.OBV(close, volume)
    #temp['obv'] = NormalizeData(temp['obv'])
    
    ## Volatility Indicators
    # Calculate ATR - Average True Range
    temp['atr'] = talib.ATR(high, low, close, timeperiod=14)
    #temp['atr'] = NormalizeData(temp['atr'])
    
    # Calculate NATR - Normalized Average True Range
    temp['natr'] = talib.NATR(high, low, close, timeperiod=14)
    #temp['natr'] = NormalizeData(temp['natr'])
    
    # TRANGE - True Range
    temp['Trange'] = talib.TRANGE(high, low, close)
    #temp['Trange'] = NormalizeData(temp['Trange'])
    
    ## Price Transform
    # AVGPRICE - Average Price
    temp['AVGprice'] = talib.AVGPRICE(open_, high, low, close)
    #temp['AVGprice'] = NormalizeData(temp['AVGprice'])
    
    # MEDPRICE - Median Price
    temp['MEDprice'] = talib.MEDPRICE(high, low)
    #temp['MEDprice'] = NormalizeData(temp['MEDprice'])
    
    # TYPPRICE - Typical Price
    temp['TYPprice'] = talib.TYPPRICE(high, low, close)
    #temp['TYPprice'] = NormalizeData(temp['TYPprice'])
    
    # WCLPRICE - Weighted Close Price
    temp['WCLprice'] = talib.WCLPRICE(high, low, close)
    #temp['WCLprice'] = NormalizeData(temp['WCLprice'])
    
    ## Cycle Indicators
    # HT_DCPERIOD - Hilbert Transform - Dominant Cycle Period
    temp['HT_DCperiod'] = talib.HT_DCPERIOD(close)
    #temp['HT_DCperiod'] = NormalizeData(temp['HT_DCperiod'])
    
    # HT_DCPHASE - Hilbert Transform - Dominant Cycle Phase
    temp['HT_DCphase'] = talib.HT_DCPHASE(close)
    #temp['HT_DCphase'] = NormalizeData(temp['HT_DCphase'])
    
    # HT_PHASOR - Hilbert Transform - Phasor Components - to explore
    temp['inphase'], temp['quadrature'] = talib.HT_PHASOR(close)
    #temp['inphase'] = NormalizeData(temp['inphase'])
    #temp['quadrature'] = NormalizeData(temp['quadrature'])
    
    # HT_SINE - Hilbert Transform - SineWave
    temp['sine'], temp['leadsine'] = talib.HT_SINE(close)
    #temp['sine'] = NormalizeData(temp['sine'])
    #temp['leadsine'] = NormalizeData(temp['leadsine'])
    
    # HT_TRENDMODE - Hilbert Transform - Trend vs Cycle Mode
    temp['HT_TRENDMODE'] = talib.HT_TRENDMODE(close)
    #temp['HT_TRENDMODE'] = NormalizeData(temp['HT_TRENDMODE'])
    
    # Trading Rules Creation
    # Trend Indication by EMA Cross
    # Short-term EMA above Long-term EMA = Bullish, else Berish
    periods = multiples(5, 10) # VT updated 20220918
    
    for shortperiod in periods:
        for longperiod in periods:
            if shortperiod < longperiod:
                conditions = [
                    (temp[f'ema_{shortperiod}'] > temp[f'ema_{longperiod}']),
                    (temp[f'ema_{shortperiod}'] < temp[f'ema_{longperiod}'])
                ]
                choices = [100,-100]
                temp[f'trend_ema_{shortperiod}_{longperiod}'] = np.select(conditions, choices, default=0)
                # temp[f'ema_{period}'] = NormalizeData(temp[f'ema_{period}'])
    
    
    # Trend Indication by ADX, plus_di and minus_di
    conditions = [
        (temp['adx'] > 20.0) & (temp['plus_di'] > temp['MINUS_DI']),
        (temp['adx'] > 20.0) & (temp['plus_di'] < temp['MINUS_DI'])
    ]
    
    choices = [100,-100]
    
    temp['trend_adx'] = np.select(conditions, choices, default=0)
    # temp['trend_adx'] = NormalizeData(temp['trend_adx'])
    
    # Trend Indication by BBANDS - Bollinger Bands - 14 days
    temp['trend_bbands'] = (close - temp['middleband']) / (temp['upperband'] - temp['lowerband'])
    
    cols=['upperband', 'middleband', 'lowerband'] 
    temp=temp.drop(columns=cols)
    
    # Trend Indication by aroonup, aroondown
    temp['trend_aroon'] = temp['aroonup'] - temp['aroondown']
    #temp['trend_aroon'] = NormalizeData(temp['trend_aroon'])
    
    # Trend Indication by macd, macdsignal
    conditions = [
        (temp['macd'] > 0.0) & (temp['macd'] > temp['macdsignal']),
        (temp['macd'] < 0.0) & (temp['macd'] < temp['macdsignal'])
    ]
    
    choices = [100,-100]
    
    temp['trend_macd'] = np.select(conditions, choices, default=0)
    #temp['trend_macd'] = NormalizeData(temp['trend_macd'])
    
    # Trend Indication by CMO - above 50 overbought, below -50 oversold
    conditions = [
        (temp['cmo'] < -50),
        (temp['cmo'] > 50)
    ]
    
    choices = [100,-100]
    
    temp['trend_cmo'] = np.select(conditions, choices, default=0)
    
    # Trend Indication by RSI with resistance and support level
    resistance = 70
    support = 30

    temp['rsi_14_before'] = temp['rsi_14'].shift(1)
    
    conditions = [
        (temp['rsi_14_before'] > resistance) & (temp['rsi_14'] < resistance) & (temp['trend_macd']==100), #uptrend pause
        (temp['rsi_14_before'] < support) & (temp['rsi_14'] > support) & (temp['trend_macd']==-100) #downtrend pause
    ]
    
    choices = [-100,100]
    
    temp['trend_rsi'] = np.select(conditions, choices, default=0)
    temp['trend_rsi_macd'] = temp['trend_macd'] + temp['trend_rsi']
    #temp['trend_rsi_macd'] = NormalizeData(temp['trend_rsi_macd'])
    
    cols=['rsi_14_before', 'trend_rsi'] 
    temp=temp.drop(columns=cols)
    
    df_stock_ta = df_stock_ta.append(temp)

In [None]:
# For industries
df_ind_ta = pd.DataFrame()

for ind in inds:
    temp = df_ind_idx[df_ind_idx['inds'] == ind]

    open_ = temp['open']
    close = temp['adjusted_close']
    high = temp['high']
    low = temp['low']
    volume = temp['volume']

    ## Overlap Studies - need to create indicators
    # Calculate BBANDS - Bollinger Bands - to further explore
    temp['upperband'], temp['middleband'], temp['lowerband'] = talib.BBANDS(close, timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)
    
    # Calculate Double Exponential Moving Average
    temp['dema_30'] = talib.DEMA(close, timeperiod=30)
    # temp['dema_30'] = NormalizeData(temp['dema_30'])
    
    # Calculate Exponential Moving Average
    # temp['ema_30'] = talib.EMA(close, timeperiod=30)
    # temp['ema_30'] = NormalizeData(temp['ema_30'])
    
    periods = multiples(5, 10) # VT updated 20220918
    for period in periods:
        temp[f'ema_{period}'] = talib.EMA(close, timeperiod=period)
        # temp[f'ema_{period}'] = NormalizeData(temp[f'ema_{period}'])
    
    # Calculate HT_TRENDLINE - Hilbert Transform - Instantaneous Trendline
    temp['HT'] = talib.HT_TRENDLINE(close)
    # temp['HT'] = NormalizeData(temp['HT'])
    
    # Calculate KAMA - Kaufman Adaptive Moving Average
    temp['kama_30'] = talib.KAMA(close, timeperiod=30)
    # temp['kama_30'] = NormalizeData(temp['kama_30'])
    
    # Calculate MA
    periods = [10, 20, 50] # VT updated 20220828
    for period in periods:
        temp[f'MA_{period}'] = talib.MA(close, period, matype=0)
        # temp[f'MA_{period}'] = NormalizeData(temp[f'MA_{period}'])
        
    # Calculate MAMA - MESA Adaptive Moving Average
    temp['mama'], temp['fama'] = talib.MAMA(close, fastlimit=0.5, slowlimit=0.05)
    # temp['mama'] = NormalizeData(temp['mama'])
    # temp['fama'] = NormalizeData(temp['fama'])

    # Calculate MAVP - Moving average with variable period - skipped
    # temp['mavp'] = talib.MAVP(close, periods, minperiod=2, maxperiod=30, matype=0)
    
    # Calculate MIDPOINT - MidPoint over period
    temp['midpoint'] = talib.MIDPOINT(close, timeperiod=14)
    # temp['midpoint'] = NormalizeData(temp['midpoint'])
    
    # Calculate MIDPRICE - Midpoint Price over period
    temp['midprice'] = talib.MIDPRICE(high, low, timeperiod=14)
    # temp['midprice'] = NormalizeData(temp['midprice'])
    
    # Calculate SAR - Parabolic SAR - to check for 'AOS' stock
    #temp['SAR'] = talib.SAR(high, low, acceleration=0, maximum=0)
    #temp['SAR'] = NormalizeData(temp['SAR'])
    
    # Calculate SAREXT - Parabolic SAR - Extended - to check for 'AOS' stock
    #temp['SAREXT'] = talib.SAREXT(high, low, startvalue=0, offsetonreverse=0, accelerationinitlong=0, accelerationlong=0, accelerationmaxlong=0, accelerationinitshort=0, accelerationshort=0, accelerationmaxshort=0)
    #temp['SAREXT'] = NormalizeData(temp['SAREXT'])
    
    # Calculate SMA
    periods = [30, 50] # VT updated 20220828
    for period in periods:
        temp[f'SMA_{period}'] = talib.SMA(close, period)
        # temp[f'SMA_{period}'] = NormalizeData(temp[f'SMA_{period}'])
        
    # Calculate T3 - Triple Exponential Moving Average (T3)
    temp['T3ema'] = talib.T3(close, timeperiod=5, vfactor=0)
    # temp['T3ema'] = NormalizeData(temp['T3ema'])
    
    # Calculate TEMA - Triple Exponential Moving Average
    temp['tema_30'] = talib.TEMA(close, timeperiod = 30)
    # temp['tema_30'] = NormalizeData(temp['tema_30'])
    
    # Calculate TRIMA - Triangular Moving Average
    temp['trima'] = talib.TRIMA(close, timeperiod=30)
    # temp['trima'] = NormalizeData(temp['trima'])

    # Calculate WMA - Weighted Moving Average
    temp['wma'] = talib.WMA(close, timeperiod=30)
    # temp['wma'] = NormalizeData(temp['wma'])
    
    ## Momentum Indicator Functions
    # Calculate ADX - Average Directional Movement Index
    temp['adx'] = talib.ADX(high, low, close, timeperiod=14)
    # temp['adx'] = NormalizeData(temp['adx'])
    
    # Calculate ADXR - Average Directional Movement Index Rating
    temp['adxr'] = talib.ADXR(high, low, close, timeperiod=14)
    # temp['adxr'] = NormalizeData(temp['adxr'])
    
    # Calculate APO - Absolute Price Oscillator
    temp['apo'] = talib.APO(close, fastperiod=12, slowperiod=26, matype=0)
    # temp['apo'] = NormalizeData(temp['apo'])
    
    # Calculate AROON - Aroon
    temp['aroondown'], temp['aroonup'] = talib.AROON(high, low, timeperiod=14)
    #temp['aroondown'] = NormalizeData(temp['aroondown'])
    #temp['aroonup'] = NormalizeData(temp['aroonup'])
    
    # Calculate AROONOSC - Aroon Oscillator
    temp['aroonosc'] = talib.AROONOSC(high, low, timeperiod=14)
    #temp['aroonosc'] = NormalizeData(temp['aroonosc'])
    
    # Calculate BOP - Balance Of Power
    temp['bop'] = talib.BOP(open_, high, low, close)
    #temp['bop'] = NormalizeData(temp['bop'])
    
    # Calculate CCI - Commodity Channel Index
    temp['cci'] = talib.CCI(high, low, close, timeperiod=14)
    #temp['cci'] = NormalizeData(temp['cci'])
    
    # Calculate CMO - Chande Momentum Oscillator
    temp['cmo'] = talib.CMO(close, timeperiod=14)
    #temp['cmo'] = NormalizeData(temp['cmo'])
    
    # DX - Directional Movement Index
    temp['dxi'] = talib.DX(high, low, close, timeperiod=14)
    #temp['dxi'] = NormalizeData(temp['dxi'])
    
    # Calculate MACD - Moving Average Convergence/Divergence
    temp['macd'], temp['macdsignal'], temp['macdhist'] = talib.MACD(close)
    #temp['macd'] = NormalizeData(temp['macd'])
    #temp['macdsignal'] = NormalizeData(temp['macdsignal'])
    #temp['macdhist'] = NormalizeData(temp['macdhist'])
    
    # MACDEXT - MACD with controllable MA type
    #temp['macd_ext'], temp['macdsignal_ext'], temp['macdhist_ext'] = talib.MACDEXT(close, fastperiod=12, fastmatype=0, slowperiod=26, slowmatype=0, signalperiod=9, signalmatype=0)
    
    # MACDFIX - Moving Average Convergence/Divergence Fix 12/26
    #temp['macd_ext'], temp['macdsignal_ext'], temp['macdhist_ext'] = talib.MACDFIX(close, signalperiod=9)
    
    # MFI - Money Flow Index
    temp['mfi'] = talib.MFI(high, low, close, volume, timeperiod=14)
    #temp['mfi'] = NormalizeData(temp['mfi'])
    
    # MINUS_DI - Minus Directional Indicator
    temp['MINUS_DI'] = talib.MINUS_DI(high, low, close, timeperiod=14)
    #temp['MINUS_DI'] = NormalizeData(temp['MINUS_DI'])
    
    # MINUS_DM - Minus Directional Movement
    temp['MINUS_DM'] = talib.MINUS_DM(high, low, timeperiod=14)
    #temp['MINUS_DM'] = NormalizeData(temp['MINUS_DM'])
    
    # MOM - Momentum
    temp['mom'] = talib.MOM(close, timeperiod=10)
    #temp['mom'] = NormalizeData(temp['mom'])
    
    # PLUS_DI - Plus Directional Indicator
    temp['plus_di'] = talib.PLUS_DI(high, low, close, timeperiod=14)
    #temp['plus_di'] = NormalizeData(temp['plus_di'])
    
    # PLUS_DM - Plus Directional Movement
    temp['plus_dm'] = talib.PLUS_DM(high, low, timeperiod=14)
    #temp['plus_dm'] = NormalizeData(temp['plus_dm'])
    
    # PPO - Percentage Price Oscillator
    temp['ppo'] = talib.PPO(close, fastperiod=12, slowperiod=26, matype=0)
    #temp['ppo'] = NormalizeData(temp['ppo'])
    
    # ROC - Rate of change : ((price/prevPrice)-1)*100
    #temp['roc'] = talib.ROC(close, timeperiod=10)
    #temp['roc'] = NormalizeData(temp['roc'])
    
    # ROCP - Rate of change Percentage: (price-prevPrice)/prevPrice
    #temp['rocp'] = talib.ROCP(close, timeperiod=10)
    #temp['rocp'] = NormalizeData(temp['rocp'])
    
    # ROCR - Rate of change ratio: (price/prevPrice)
    #temp['rocr'] = talib.ROCR(close, timeperiod=10)
    #temp['rocr'] = NormalizeData(temp['rocr'])
    
    # ROCR100 - Rate of change ratio 100 scale: (price/prevPrice)*100
    # temp['rocr100'] = ROCR100(close, timeperiod=10)

    # Calculate RSI
    temp['rsi_14'] = talib.RSI(close, timeperiod = 14)
    #temp['rsi_14'] = NormalizeData(temp['rsi_14'])
    
    # STOCH - Stochastic
    temp['slowk'], temp['slowd'] = talib.STOCH(high, low, close, fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
    #temp['slowk'] = NormalizeData(temp['slowk'])
    #temp['slowd'] = NormalizeData(temp['slowd'])
    
    # Calculate STOCHF - Stochastic Fast
    temp['stochf_k'], temp['stochf_d'] = talib.STOCHF(high, low, close)
    #temp['stochf_k'] = NormalizeData(temp['stochf_k'])
    #temp['stochf_d'] = NormalizeData(temp['stochf_d'])
    
    # STOCHRSI - Stochastic Relative Strength Index
    temp['fastk'], temp['fastd'] = talib.STOCHRSI(close, timeperiod=14, fastk_period=5, fastd_period=3, fastd_matype=0)
    #temp['fastk'] = NormalizeData(temp['fastk'])
    #temp['fastd'] = NormalizeData(temp['fastd'])
    
    # TRIX - 1-day Rate-Of-Change (ROC) of a Triple Smooth EMA
    temp['trix'] = talib.TRIX(close, timeperiod=30)
    #temp['trix'] = NormalizeData(temp['trix'])
    
    # ULTOSC - Ultimate Oscillator
    temp['ultosc'] = talib.ULTOSC(high, low, close, timeperiod1=7, timeperiod2=14, timeperiod3=28)
    #temp['ultosc'] = NormalizeData(temp['ultosc'])

    # WILLR - Williams' %R
    # Values below 20 indicate an overbought condition and a sell signal is generated when it crosses the 20 line. Values over 80 indicate an oversold condition and a buy signal is generated when it crosses the 80 line.
    temp['willR'] = talib.WILLR(high, low, close, timeperiod=14)
    #temp['willR'] = NormalizeData(temp['willR'])
    
    ## Volume Indicator Functions
    # Calculate AD - Chaikin A/D Line
    temp['ad'] = talib.AD(high, low, close, volume)
    #temp['ad'] = NormalizeData(temp['ad'])
    
    # Calculate ADOSC - Chaikin A/D Oscillator
    temp['adosc'] = talib.ADOSC(high, low, close, volume, fastperiod=3, slowperiod=10)
    #temp['adosc'] = NormalizeData(temp['adosc'])
    
    # Calculate OBV - On Balance Volume
    temp['obv'] = talib.OBV(close, volume)
    #temp['obv'] = NormalizeData(temp['obv'])
    
    ## Volatility Indicators
    # Calculate ATR - Average True Range
    temp['atr'] = talib.ATR(high, low, close, timeperiod=14)
    #temp['atr'] = NormalizeData(temp['atr'])
    
    # Calculate NATR - Normalized Average True Range
    temp['natr'] = talib.NATR(high, low, close, timeperiod=14)
    #temp['natr'] = NormalizeData(temp['natr'])
    
    # TRANGE - True Range
    temp['Trange'] = talib.TRANGE(high, low, close)
    #temp['Trange'] = NormalizeData(temp['Trange'])
    
    ## Price Transform
    # AVGPRICE - Average Price
    temp['AVGprice'] = talib.AVGPRICE(open_, high, low, close)
    #temp['AVGprice'] = NormalizeData(temp['AVGprice'])
    
    # MEDPRICE - Median Price
    temp['MEDprice'] = talib.MEDPRICE(high, low)
    #temp['MEDprice'] = NormalizeData(temp['MEDprice'])
    
    # TYPPRICE - Typical Price
    temp['TYPprice'] = talib.TYPPRICE(high, low, close)
    #temp['TYPprice'] = NormalizeData(temp['TYPprice'])
    
    # WCLPRICE - Weighted Close Price
    temp['WCLprice'] = talib.WCLPRICE(high, low, close)
    #temp['WCLprice'] = NormalizeData(temp['WCLprice'])
    
    ## Cycle Indicators
    # HT_DCPERIOD - Hilbert Transform - Dominant Cycle Period
    temp['HT_DCperiod'] = talib.HT_DCPERIOD(close)
    #temp['HT_DCperiod'] = NormalizeData(temp['HT_DCperiod'])
    
    # HT_DCPHASE - Hilbert Transform - Dominant Cycle Phase
    temp['HT_DCphase'] = talib.HT_DCPHASE(close)
    #temp['HT_DCphase'] = NormalizeData(temp['HT_DCphase'])
    
    # HT_PHASOR - Hilbert Transform - Phasor Components - to explore
    temp['inphase'], temp['quadrature'] = talib.HT_PHASOR(close)
    #temp['inphase'] = NormalizeData(temp['inphase'])
    #temp['quadrature'] = NormalizeData(temp['quadrature'])
    
    # HT_SINE - Hilbert Transform - SineWave
    temp['sine'], temp['leadsine'] = talib.HT_SINE(close)
    #temp['sine'] = NormalizeData(temp['sine'])
    #temp['leadsine'] = NormalizeData(temp['leadsine'])
    
    # HT_TRENDMODE - Hilbert Transform - Trend vs Cycle Mode
    temp['HT_TRENDMODE'] = talib.HT_TRENDMODE(close)
    #temp['HT_TRENDMODE'] = NormalizeData(temp['HT_TRENDMODE'])
    
    # Trading Rules Creation
    # Trend Indication by EMA Cross
    # Short-term EMA above Long-term EMA = Bullish, else Berish
    periods = multiples(5, 10) # VT updated 20220918
    
    for shortperiod in periods:
        for longperiod in periods:
            if shortperiod < longperiod:
                conditions = [
                    (temp[f'ema_{shortperiod}'] > temp[f'ema_{longperiod}']),
                    (temp[f'ema_{shortperiod}'] < temp[f'ema_{longperiod}'])
                ]
                choices = [100,-100]
                temp[f'trend_ema_{shortperiod}_{longperiod}'] = np.select(conditions, choices, default=0)
                # temp[f'ema_{period}'] = NormalizeData(temp[f'ema_{period}'])
    
    # Trend Indication by ADX, plus_di and minus_di
    conditions = [
        (temp['adx'] > 20.0) & (temp['plus_di'] > temp['MINUS_DI']),
        (temp['adx'] > 20.0) & (temp['plus_di'] < temp['MINUS_DI'])
    ]
    
    choices = [100,-100]
    
    temp['trend_adx'] = np.select(conditions, choices, default=0)
    # temp['trend_adx'] = NormalizeData(temp['trend_adx'])
    
    # Trend Indication by BBANDS - Bollinger Bands - 14 days
    temp['trend_bbands'] = (close - temp['middleband']) / (temp['upperband'] - temp['lowerband'])
    
    cols=['upperband', 'middleband', 'lowerband'] 
    temp=temp.drop(columns=cols)
    
    # Trend Indication by aroonup, aroondown
    temp['trend_aroon'] = temp['aroonup'] - temp['aroondown']    
    # temp['trend_aroon'] = NormalizeData(temp['trend_aroon'])
    
    # Trend Indication by macd, macdsignal
    conditions = [
        (temp['macd'] > 0.0) & (temp['macd'] > temp['macdsignal']),
        (temp['macd'] < 0.0) & (temp['macd'] < temp['macdsignal'])
    ]
    
    choices = [100,-100]
    
    temp['trend_macd'] = np.select(conditions, choices, default=0)
    # temp['trend_macd'] = NormalizeData(temp['trend_macd'])

    # Trend Indication by CMO - above 50 overbought, below -50 oversold
    conditions = [
        (temp['cmo'] < -50),
        (temp['cmo'] > 50)
    ]
    
    choices = [100,-100]
    
    temp['trend_cmo'] = np.select(conditions, choices, default=0)
    
    # Trend Indication by RSI with resistance and support level
    resistance = 70
    support = 30

    temp['rsi_14_before'] = temp['rsi_14'].shift(1)
    
    conditions = [
        (temp['rsi_14_before'] > resistance) & (temp['rsi_14'] < resistance) & (temp['trend_macd']==100), #uptrend pause
        (temp['rsi_14_before'] < support) & (temp['rsi_14'] > support) & (temp['trend_macd']==-100) #downtrend pause
    ]
    
    choices = [-100,100]
    
    temp['trend_rsi'] = np.select(conditions, choices, default=0)
    temp['trend_rsi_macd'] = temp['trend_macd'] + temp['trend_rsi']
    # temp['trend_rsi_macd'] = NormalizeData(temp['trend_rsi_macd'])
    
    cols=['rsi_14_before', 'trend_rsi'] 
    temp=temp.drop(columns=cols)
    
    df_ind_ta = df_ind_ta.append(temp)

In [None]:
#'upperband','middleband','lowerband'

periods = multiples(5, 10) # VT updated 20220918
EMA = PrefixAddMultiples('ema',periods)
CrossEMA = PrefixAddCrossMultiples('trend_ema',periods)

# Identify the category of technical analysis indicators
Overlap_ind=['dema_30','HT','kama_30','MA_10','MA_20','MA_50',
            'mama','fama','midpoint','midprice','SMA_30',
            'SMA_50','T3ema','tema_30','trima','wma'] + EMA

Momentum_ind=['adx','adxr','apo','aroondown','aroonup',
             'aroonosc','bop','cci','cmo','dxi','macd',
             'macdsignal','macdhist','mfi','MINUS_DI',
             'MINUS_DM','mom','plus_di','plus_dm','ppo',
             'rsi_14','slowk','slowd','stochf_k','stochf_d',
             'fastk','fastd','trix','ultosc','willR']

Volume_ind=['ad','adosc','obv']

Volatility_ind=['atr','natr','Trange']

Cycle_ind=['HT_DCperiod','HT_DCphase','inphase','quadrature',
           'sine','leadsine','HT_TRENDMODE']

Trading_ind=['trend_adx','trend_bbands','trend_aroon','trend_macd','trend_rsi_macd', 'trend_cmo'] + CrossEMA

All = Overlap_ind + Momentum_ind + Volume_ind + Volatility_ind + Cycle_ind + Trading_ind
All_ind = [Overlap_ind, Momentum_ind, Volume_ind, Volatility_ind, Cycle_ind, All, Trading_ind]
All_ind_name = ['Overlap_ind', 'Momentum_ind', 'Volume_ind', 'Volatility_ind', 'Cycle_ind', 'All', 'Trading_ind']

In [None]:
# Feature Importance Summary from Valent

# For stocks
df_imp = pd.read_csv('EDA/Summary/FeatureImportanceSummary_TA_20221001.csv')
df_imp['importance_abs'] = np.log(df_imp['Importance'].abs())
df_imp['importance_scaled'] = (df_imp['importance_abs'] - df_imp['importance_abs'].min())\
    / (df_imp['importance_abs'].max() - df_imp['importance_abs'].min())
df_imp = df_imp.sort_values('importance_scaled', ascending = False)
df_imp = df_imp[df_imp['importance_scaled'] > 0.7]
imp_cols = df_imp['Attribute'].unique()

In [None]:
# Stock TA Null value handling & Normalization
temp_cols = list(imp_cols) + ['report_date', 'ticker']
temp_stock_ta = pd.DataFrame()

for stock in stocks:
    
    temp = df_stock_ta[df_stock_ta['ticker'] == stock].sort_values('report_date')
    
    # Forward fill the null value & drop null
    for col in imp_cols:
        first_value = temp[col].dropna().iloc[0]
        temp[col] = (temp[col].fillna(method = 'ffill') - first_value) / first_value
    temp_stock_ta = temp_stock_ta.append(temp.dropna()[temp_cols])

# Industrial TA Null value handling & Normalization
temp_cols = list(imp_cols) + ['report_date', 'inds']
temp_ind_ta = pd.DataFrame()

for ind in inds:
    
    temp = df_ind_ta[df_ind_ta['inds'] == ind].sort_values('report_date')
    
    for col in imp_cols:
        first_value = temp[col].dropna().iloc[0]
        temp[col] = (temp[col].fillna(method = 'ffill') - first_value) / first_value
    temp_ind_ta = temp_ind_ta.append(temp.dropna()[temp_cols])