### Strategy creation - technical indicators

In [1]:
## This strategy is an all-in technical indicator trader.
## First step is to define technical indicators
## Second step is to calculate technical indicators
## Third step is to demean and standardise technical indicators, so that they can be used in statistical analysis
## Fourth step is to force stationarity upon them
##

In [14]:
import pandas as pd
import os
from datetime import datetime
convert_string_to_datetime = lambda x: datetime.strptime(x,"%Y-%m-%d")

### Parameters

In [63]:
p_contract = "EURUSD"
window_range = range(7,90)
shift_range = range(2,30)
alph_high_range = [0.6,0.7,0.8,0.9,0.95]
alph_low_range = [0.1,0.2,0.3,0.4,0.5]

### Define technical indicators

In [52]:
## Form of function = "calc_"+ indicator name in x / y variables.

indicator_list = ['momentum','rolling_avg_pct_change','RSI','forward_opportunity','ema','ema_dif']

def self_reference(f):
    f.__defaults__ = f.__defaults__[:-1] + (f,)
    return f

@self_reference
def calc_momentum(df,price,window,self=None):
    df['last_%s_momentum' % window] = (df[price] / df[price].shift(window))-1
    string = 'last_%s_momentum' % window
    return df,string

@self_reference
def calc_rolling_avg_pct_change(df,price,window,shift,self=None):
    string = "rolling_"+str(window)+"_avg_"+str(shift)+"_period_pct_change"
    df[string] = df[price].rolling(window).mean()
    df[string] = (df[string]/df[string].shift(shift))-1
    
    return df,string

@self_reference
def calc_rolling_avg(df,price,window,self=None):
    df['rolling_%s_avg_price' % window] = df[price].rolling(window).mean()
    string = 'rolling_%s_avg_price' % window
    return df,string
 
@self_reference    
def calc_rolling_avg_price_dif(df,price,window,self=None):
    df['rolling_%s_avg_price_dif' % window] = df[price] - df[price].rolling(window).mean()
    string = 'rolling_%s_avg_price_dif' % window
    return df,string

@self_reference
def calc_RSI(df,price,window,self=None):
    random = df.copy(deep = True)
    random['status'] = random[price] - random[price].shift(1)
    random['status'] = random['status'].apply(lambda x: "Up" if x > 0 else "Down")
    random['average_for_up'] = random['status'].apply(lambda x: 1 if x == "Up" else 0)
    random['average_for_down'] = random['status'].apply(lambda x: 0 if x == "Up" else 1)
    random['average_price_up'] = (random['price']-random['price'].shift(1)) * random['average_for_up']
    random['average_price_down'] = (random['price']-random['price'].shift(1))* random['average_for_down']*-1
    random['average_price_up'] = random['average_price_up'].apply(lambda x: None if x ==  0 else x )
    random['average_price_down'] = random['average_price_down'].apply(lambda x: None if x == 0 else x )
    random['last_%s_RSI' % window] = random['average_price_up'].rolling(window, min_periods =1).sum() / random['average_price_down'].rolling(window, min_periods =1).sum()
    df['last_%s_RSI' % window] = 1 - 1/(1+random['last_%s_RSI' % window])
    for item in range(0,window): ## Formula above will not give NANs for the first (window-1) terms as min_periods = 1.
        df.at[item,'last_%s_RSI' % window] = None
    output_string = "last_" + str(window)+"_RSI"
    
    return df,output_string

@self_reference
def calc_forward_opportunity(df,price,window,self=None):
    df['forward_%s_opportunity' % window] = df[price].rolling(window).max()
    df['forward_%s_opportunity' % window] = df['forward_%s_opportunity' % window].shift(-window) - df[price]
    string = 'forward_%s_opportunity' % window
    return df,string 



### specify function list; must be ordered:

def calc_ema(df,price,alph,self = None):
    df['last_%s_EMA' % window ] = df[price].ewm(alpha=alph).mean()
    string = 'last_%s_EMA' % window 
    return df,string

def calc_ema_cross(df,price,alph_high,alph_low,self = None):
    rider = df.copy(deep = True)
    rider['high'] =  rider[price].ewm(alpha=alph_high).mean()
    rider['low'] = rider[price].ewm(alpha = alph_low).mean()
    rider['dif'] = rider['high'] - rider['low']
    rider['dif'] = rider['dif'].apply(lambda x: 1 if x>0 else 0)
    rider['dif_lag'] = rider['dif'].shift(1)
    rider['dif_lag'] = rider['dif_lag'].apply(lambda x: 1 if x>0 else 0)
    rider['signal'] = rider['dif'] - rider['dif_lag']
    
    
    string = str(alph_high) + "_high_" + str(alph_low) + "_low_ema_cross"
    df[string] = rider['signal']
    return df,string


def calc_ema_diff(df,price,alph_high,alph_low,self = None):
    rider = df.copy(deep = True)
    rider['high'] =  rider[price].ewm(alpha=alph_high).mean()
    rider['low'] = rider[price].ewm(alpha = alph_low).mean()
    rider['dif'] = rider['high'] - rider['low']
    
    
    string = str(alph_high) + "_high_" + str(alph_low) + "_low_ema_dif"
    df[string] = rider['dif']
    return df,string


calc_momentum.type = "window"
calc_rolling_avg_pct_change.type = "window_shift"
calc_rolling_avg.type = "window"
calc_rolling_avg_price_dif.type = "window"
calc_RSI.type="window"
calc_forward_opportunity.type = "window"
calc_ema.type = "window"
calc_ema_cross.type = "alpha"
calc_ema_diff.type = "alpha"




### Define synthetic cluster dic

In [40]:
momentum_list = []
for window in window_range:
    string = 'last_%s_momentum' % window
    momentum_list.append(string)

rolling_avg_pct_change_list = []
for window in window_range:
    for shift in shift_range:
        string = "rolling_"+str(window)+"_avg_"+str(shift)+"_period_pct_change"
        rolling_avg_pct_change_list.append(string)

rolling_avg_list = []
for window in window_range:
    string = 'rolling_%s_avg_price' % window
    rolling_avg_list.append(string)
    
rolling_avg_price_dif_list = []
for window in window_range:
    string = 'rolling_%s_avg_price_dif' % window
    rolling_avg_price_dif_list.append(string)
    
RSI_list = []
for window in window_range:
    string =  "last_" + str(window)+"_RSI"
    RSI_list.append(string)
    
EMA_list = []
for window in window_range:
    for a in alph_high_range:
        for b in alph_low_range:
            string = "last_"+str(window)+"_" +str(a) + "_high_" + str(b) + "_low_ema_dif"
            EMA_list.append(string)

synthetic_clstrs = {
    0:momentum_list,
    1:rolling_avg_pct_change_list,
    #2:rolling_avg_list,
    2:rolling_avg_price_dif_list,
    3:RSI_list,
    4: EMA_list
    

    
}
    
    

In [65]:
### Specify what indicators to use
### These variables should be iterable based on either a shift or a rolling window.It may be iterable multiple times
x_variables = ['momentum','rolling_avg_pct_change','RSI','ema_diff']
y_variables = ['forward_opportunity']
all_variables = x_variables + y_variables
x_dic = {}
for item in x_variables:
    x_dic[item] = eval("calc_"+str(item))
    
y_dic = {}
for item in y_variables:
    y_dic[item] = eval("calc_"+str(item))

### Upload data

In [15]:
## Directory params
p_file_path =  r"C:\Users\hugo.lu\repos\repos_backup\ib_insync\Raw data\\" + p_contract + "\\"
data_list = os.listdir(path=p_file_path)
data_sub_list = []
if len(data_list) == 0:
    print("Nothing in directory")
    directory_empty = True
else:
    for item in data_list:
        try:
            data_sub_list.append(datetime.strptime(item.split("_")[0],"%Y-%m-%d"))
        except:
            data_sub_list.append(datetime.strptime('1000-01-01',"%Y-%m-%d"))
### Take latest file            
    m = max(data_sub_list)
    index_out = [i for i, j in enumerate(data_sub_list) if j == m]

    print("The index of the latest data is: " + str(index_out))

    data_load_string = data_list[0]
    data_in = pd.read_csv(filepath_or_buffer = r'C:\Users\hugo.lu\repos\repos_backup\ib_insync\Raw data\\' + p_contract + "\\" + data_load_string

                         ,index_col=0)
    ### Convert date column to datetime64
    data_in['date'] = data_in['date'].astype(str) 
    data_in['date']=data_in['date'].apply(convert_string_to_datetime)

The index of the latest data is: [0]


In [67]:
data_columns = list(data_in.columns)
data_copy = data_in.copy(deep = True)
data_copy = data_copy.rename( columns = {
    
    'date': 'date',
    'open':'price',
    #'Ticker':'ticker'
    
})
data_copy['ticker'] = "EURUSD"
## Cast all relevant objects to floats

float_list = ['price','close']

for item in float_list:
    data_copy[item] = data_copy[item].astype('float64')
    ## Cast all relevant objects to dates

float_list = ['date']
for item in float_list:
    data_copy[item] = pd.to_datetime(data_copy[item])

### Calculate indicators

In [68]:
x_variables_actual = []
y_variables_actual = []

for key,value in x_dic.items():
    #print("calculating indicators for: " + str(key))
    if value.type == "window":
        for item in window_range:

            [data_copy,b] = value(data_copy,'price',item)
            x_variables_actual.append(b)
    elif value.type == "window_shift":
        for item in window_range:
            for jtem in shift_range:
                [data_copy,b] = value(data_copy,'price',item,jtem)
                x_variables_actual.append(b)
    elif value.type == "alpha":
            for a in alph_high_range:
                for c in alph_low_range:
                    [data_copy,b] = value(data_copy,'price',a,c)
                    x_variables_actual.append(b)
    else:
        print("Functions specified incorrectly")
        break
            

for key,value in y_dic.items():
    #print("calculating indicators for: " + str(key))
    if value.type == "window":
        for item in window_range:

            [data_copy,b] = value(data_copy,'price',item)
            y_variables_actual.append(b)
    elif value.type == "window_shift":
        for jtem in shift_range:
            for item in window_range:
                [data_copy,b] = value(data_copy,'price',item,jtem)
                y_variables_actual.append(b)
    else:
        print("Functions specified incorrectly")
        break

In [72]:
all_variables_list = x_variables_actual+y_variables_actual
data_reg = data_copy[all_variables_list]
data_reg = data_reg.dropna() ### Simply dropna as data availability is not an issue

In [73]:
len(data_reg)

2374

In [33]:
a

Unnamed: 0,date,price,high,low,close,volume,average,barCount,ticker,0.8_high_0.2_low_ema_cross,0.8_high_0.2_low_ema_dif
0,2010-05-10,1.290875,1.309450,1.275950,1.278700,-1,-1.0,-1,EURUSD,0,0.000000
1,2010-05-11,1.278325,1.280325,1.265650,1.266200,-1,-1.0,-1,EURUSD,0,-0.003486
2,2010-05-12,1.266075,1.273925,1.260550,1.261350,-1,-1.0,-1,EURUSD,0,-0.007746
3,2010-05-13,1.261850,1.268450,1.251850,1.253475,-1,-1.0,-1,EURUSD,0,-0.008360
4,2010-05-14,1.252975,1.257575,1.235450,1.235900,-1,-1.0,-1,EURUSD,0,-0.011035
5,2010-05-17,1.235575,1.241450,1.223475,1.239525,-1,-1.0,-1,EURUSD,0,-0.018333
6,2010-05-18,1.239475,1.244450,1.216150,1.220225,-1,-1.0,-1,EURUSD,0,-0.013687
7,2010-05-19,1.218875,1.242375,1.214425,1.241550,-1,-1.0,-1,EURUSD,0,-0.021926
8,2010-05-20,1.242900,1.259775,1.229700,1.248625,-1,-1.0,-1,EURUSD,0,-0.005535
9,2010-05-21,1.246575,1.267250,1.245575,1.257075,-1,-1.0,-1,EURUSD,1,0.000115
