In [4]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Activation,GRU, SimpleRNN
from sklearn.preprocessing import StandardScaler
from keras import regularizers
from keras.engine.topology import Layer, InputSpec
from keras import initializers
from keras import backend as K
from keras.layers import GRU, K, TimeDistributed
from keras.models import model_from_json
from copy import copy
import warnings
import keras
import tensorflow as tf
warnings.simplefilter('ignore')


T = 21*2
i_dim = 20
d_in = 0.2
batch_size = 200
N_range = 60*2
profit_target_mul = 50
stop_loss_mul = 25
profit_target_vol_range_mul = 1.0
stop_loss_vol_range_mul = 0.5
t_hold = 30
LONG = True
return_T = 14
dividends_df = {}

In [10]:
def make_dataset(X, y, T):
    
    pack = []
    X_len = len(X)
    for i in range(T, X_len+1):
        pack.append([X[(i-T):i], y[i-1]])        
    return pack


def separate_train_val_test(arr, train_L, val_L):
    
    arr = np.array(arr)
    return arr[:train_L], arr[train_L:(train_L+val_L)], arr[(train_L+val_L):]


def load_dividends():
    
    global dividends_df
    
    sectors_info = pd.read_csv("sectors.csv")
    tickers = sectors_info["TICKER"].values
    
    for ticker in tickers:
        
        try:
            div_df = pd.read_csv(os.path.join(os.path.abspath("div_data"),\
                                                       ticker+".csv"))
            div_df.index = pd.to_datetime(div_df["Ex-Date"])
            dividends_df[ticker] = div_df
        except Exception as e:
            pass


def load_dataset(ticker1 = "ADSK", ticker2 = "AMAT", train_L_coef = 6, val_L_coef = 1):
    
    global profit_target_mul, dividends_df
    
    market_data = pd.read_csv("market_data_daily_resample_close.csv", index_col = 0).ffill()
    market_data.index = pd.to_datetime(market_data.index)
    data = pd.DataFrame(index = market_data.index)
    dates = market_data.index
    S1 = market_data[ticker1]
    S2 = market_data[ticker2]
    S1_px = np.array(market_data[ticker1].values)
    S2_px = np.array(market_data[ticker2].values)
    S1_long_returns = 0.5 * (1.0 + (S1 - S1.shift(return_T)) / S1.shift(return_T))
    S2_short_returns = 0.5 * (1.0 + (-S2 + S2.shift(return_T)) / S2.shift(return_T))
    
    train_L = round((len(S1_px) * train_L_coef) / (train_L_coef + val_L_coef))
    val_L = round((len(S1_px) * val_L_coef) / (train_L_coef + val_L_coef))
    
    returns = S1_long_returns + S2_short_returns
    log_returns = np.log1p(returns)
    returns_series = pd.Series(data = returns)
    returns_series_ewm = np.array(returns_series.ewm(span=return_T).std().values)
    returns_series_ewm[0] = 0
    
    price_range_mean = returns_series_ewm
    volatility_log_returns = [0] * len(returns)
    volatility_returns = [0] * len(returns)
    
    for j in range(N_range-1, len(price_range_mean)):
        volatility_log_returns = np.std(log_returns[j - N_range:j])+.0
        volatility_returns = np.std(returns[j - N_range:j])+.0
    
    data["Returns"] = returns
    data['PriceRangeMean'] = price_range_mean
    data['VolatilityLogReturns'] = volatility_log_returns
    data['VolatilityReturns'] = volatility_returns
    data['ClosePriceDiffS1'] = S1 - S1.shift(1)
    data['ClosePriceDiffS2'] = S2 - S2.shift(1)
    data['Ratio'] = (S1 / S2) > (S1 / S2).mean()
    data['S1'] = S1
    data['S2'] = S2
    
    stop_loss = np.array([0.0] * len(S1_px))
    profit_target = np.array([0.0] * len(S1_px))
    y_label = np.array([0] * len(S1_px))
    price_range_mean = np.array(price_range_mean)
    
    for i in range(len(S1_px) - t_hold):
        
        profit_target_bips = 1.0 + 1.0 * profit_target_mul / 10000.0
        stop_loss_bips = 1.0 - 1.0 * stop_loss_mul / 10000.0
        profit_target_vol = 1.0 + price_range_mean[i] * profit_target_vol_range_mul
        stop_loss_vol = 1.0 - price_range_mean[i] * stop_loss_vol_range_mul
        
        if i >= (N_range - 1):
            profit_target[i] = min(profit_target_bips, profit_target_vol)
            stop_loss[i] = max(stop_loss_bips, stop_loss_vol)
        else:
            profit_target[i] = profit_target_bips
            stop_loss[i] = stop_loss_bips
            
        for j in range(1,t_hold + 1):
            
            PnL_long_cur = 0.5 * (S1_px[i + j] - S1_px[i]) / S1_px[i] +\
                0.5 * (-S2_px[i + j] + S2_px[i]) / S2_px[i] + 1.0
                
            if PnL_long_cur >= profit_target[i]:
                y_label[i] = 1
                break
            elif PnL_long_cur <= stop_loss[i]:
                y_label[i] = 0
                break
    
    X = np.array(data.values) 
    sc = StandardScaler()
    sc.fit(X)
    X = sc.transform(X)
    
    X = X[(return_T+1):]
    y_label = y_label[(return_T+1):]
    stop_loss = stop_loss[(return_T+1):]
    profit_target = profit_target[(return_T+1):]
    S1_px = S1_px[(return_T+1):]
    S2_px = S2_px[(return_T+1):]
    price_range_mean = price_range_mean[(return_T+1):]
    dates = dates[(return_T+1):]
    
    w_neu = len(y_label[y_label==0])/len(y_label)  
    w_pos = len(y_label[y_label==1])/len(y_label)
    class_weight = {0:1.0/w_neu,1:1.0/w_pos}

    y_label = np.reshape(y_label, (len(y_label),1))
    num_classes = 2
    y_label = keras.utils.to_categorical(y_label, num_classes)
    
    X_train, X_val, X_test = separate_train_val_test(X, train_L, val_L)
    y_label_train, y_label_val, y_label_test = separate_train_val_test(y_label, train_L, val_L)
    stop_loss_train, stop_loss_val, stop_loss_test =\
            separate_train_val_test(stop_loss, train_L, val_L)
    profit_target_train, profit_target_val, profit_target_test =\
            separate_train_val_test(profit_target, train_L, val_L)
    dates_train, dates_val, dates_test = separate_train_val_test(dates, train_L, val_L)
    s1_px_train, s1_px_val, s1_px_test = separate_train_val_test(S1_px, train_L, val_L)
    s2_px_train, s2_px_val, s2_px_test = separate_train_val_test(S2_px, train_L, val_L)
    price_range_mean_train, price_range_mean_val, price_range_mean_test =\
            separate_train_val_test(price_range_mean, train_L, val_L)

    
    
    return X_train, X_val, X_test, y_label_train, y_label_val, y_label_test,\
            stop_loss_train, stop_loss_val, stop_loss_test,\
            profit_target_train, profit_target_val, profit_target_test,\
            dates_train, dates_val, dates_test,\
            s1_px_train, s1_px_val, s1_px_test,\
            s2_px_train, s2_px_val, s2_px_test,\
            price_range_mean_train, price_range_mean_val, price_range_mean_test, class_weight
            
            
def treshold_plot(y_label_val_pred, y_label_val_true, width,\
                  layers, comm_coef = 10.0/10000.0):
    
    global dates_val, s1_px_val, s2_px_val, profit_target_val, stop_loss_val
    
    PnL_val, PnL_vecs_comm, commission_sum = calc_profit(y_label_val_pred, y_label_val_true,\
                    s1_px_val, s2_px_val, profit_target_val,\
                    stop_loss_val, comm_coef = 10.0/10000.0)
    
    
    config_name = str(layers)+","+str(width)+","+str(profit_target_mul)+","+str(stop_loss_mul)+\
                ","+str(profit_target_vol_range_mul)+","+str(stop_loss_vol_range_mul)+",2h,"
        
    PnL_acc = np.sum(np.array(PnL_vecs_comm).T, axis = 1)
    PnL_acc_comm = np.sum(np.array(PnL_vecs_comm).T, axis = 1)
        
        
    for i in range(1,len(PnL_acc)):
        PnL_acc[i] += PnL_acc[i-1]
        if y_label_val_pred[i] == 1:
            PnL_acc_comm[i] += PnL_acc_comm[i-1] - 2 * comm_coef
        else:
            PnL_acc_comm[i] += PnL_acc_comm[i-1]
                        
    PnL_acc_series = pd.Series(data=PnL_acc)
    PnL_acc_series.index = dates_val
    fig = PnL_acc_series.plot(title=config_name+"no commission",label="PnL,"+config_name+"no commision",\
                        figsize = (12, 7), legend = True)
    fig = fig.get_figure()
        
    PnL_acc_series = pd.Series(data=PnL_acc_comm)
    PnL_acc_series.index = dates_val
    fig = PnL_acc_series.plot(title=config_name+"with commision",label="PnL,"+config_name+"with commision",\
                    figsize = (12, 7), legend = True)
    fig = fig.get_figure()
        

    plt.show()
    
                         
                         
def calc_profit(dates_val, y_label_pred, y_label_true, s1_px, s2_px, profit_target,\
                    stop_loss, comm_coef = 10.0/10000.0, ticker1 = "ADSK", ticker2 = "AMAT"):
    
    global dividends_df
    
    PnL = .0
    y_label_pred = np.array(y_label_pred).argmax(axis = 1)
    y_label_true = np.array(y_label_true).argmax(axis = 1)
    PnL_vecs_comm = []
    commission_sum =.0
    
    for i in range(T - 1, len(y_label_true) - t_hold - 1):
        PnL_vec_comm = np.zeros(len(s1_px))
        
        if y_label_pred[i] == 1:
            
            trig_sl = 0
            commission_sum += -2*comm_coef
            
            for j in range(1, t_hold + 1):
                                
                PnL_priv = 0.5 * (s1_px[i + j - 1] - s1_px[i]) / s1_px[i] +\
                    0.5 * (-s2_px[i + j - 1] + s2_px[i]) / s2_px[i] + 1.0
                    
                PnL_cur = 0.5 * (s1_px[i + j] - s1_px[i]) / s1_px[i] +\
                    0.5 * (-s2_px[i + j] + s2_px[i]) / s2_px[i] + 1.0
                        
    
                if PnL_cur <= stop_loss[i]:
                    
                    PnL += (stop_loss[i] - PnL_priv)
                    PnL_vec_comm[i+j] = (stop_loss[i] - PnL_priv)
                    trig_sl = 1
                    break
                
                elif PnL_cur >= profit_target[i]:
                    
                    PnL += (profit_target[i] - PnL_priv)
                    PnL_vec_comm[i+j] = (profit_target[i] - PnL_priv)
                    trig_sl = 1
                    break
                    
                PnL_dividends = .0
                date = dates_val[i + j]
                
                if ticker1 in dividends_df.keys():
                    
                    if date in dividends_df[ticker1].index:
                        
                        if dividends_df[ticker1].loc[date]["Dividend Type"] == "Regular Cash":

                            PnL_dividends += dividends_df[ticker1].loc[date]["Dividend Amount"] / s1_px[i]
                            
                if ticker2 in dividends_df.keys():
                    
                    if date in dividends_df[ticker2].index:
                        
                        if dividends_df[ticker2].loc[date]["Dividend Type"] == "Regular Cash":

                            PnL_dividends -= dividends_df[ticker2].loc[date]["Dividend Amount"] / s2_px[i]
                            
                
                PnL_vec_comm[i+j] = PnL_cur - PnL_priv + PnL_dividends
                PnL += PnL_cur - PnL_priv + PnL_dividends
                    
            if trig_sl == 0:
                
                PnL += 0.5 * (s1_px[i + t_hold + 1] - s1_px[i + t_hold]) / s1_px[i] +\
                    0.5 * (-s2_px[i + t_hold + 1] + s2_px[i + t_hold]) / s2_px[i]
                    
                PnL_vec_comm[i+j] = 0.5 * (s1_px[i + t_hold + 1] - s1_px[i + t_hold]) / s1_px[i] +\
                    0.5 * (-s2_px[i + t_hold + 1] + s2_px[i + t_hold]) / s2_px[i] 
                
        PnL_vecs_comm.append(PnL_vec_comm)
        
    return PnL, PnL_vecs_comm, commission_sum

    
def schedule(ind, lr):
    
    return lr * (0.98)**ind


def save_model(model, name = "short,noSL"):
    
    model_json = model.to_json()
    with open("model"+name+".json", "w") as json_file:
        json_file.write(model_json)
    model.save_weights("model"+name+".h5")
    
    
def load_model(name):
    
    json_file = open('model'+name+'.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    loaded_model.load_weights("model"+name+".h5")
    return model


class My_Callback(keras.callbacks.Callback):
    
    def on_train_begin(self, logs={}):
        return
 
    def on_train_end(self, logs={}):
        return
 
    def on_epoch_begin(self, epoch, logs={}):
        return
 
    def on_epoch_end(self, epoch, logs={}):
        global X_val, y_label_val_true, dates_val, s1_px_val,\
            s2_px_val, profit_target_val, stop_loss_val
              
        y_label_val_pred = self.model.predict(x=X_val)        
        PnL_val, PnL_comm_matrix, commission_sum = calc_profit(dates_val, y_label_val_pred, y_label_val_true,\
                                                               s1_px_val, s2_px_val, profit_target_val,\
                    stop_loss_val, comm_coef = 10.0/10000.0, ticker1 = "ADSK", ticker2 = "AMAT")
        
        print("Return with transaction costs: ", np.round(PnL_val+commission_sum, 2), '\n',\
              "Return without transaction costs: ", np.round(PnL_val, 2))
 
    def on_batch_begin(self, batch, logs={}):
        return
 
    def on_batch_end(self, batch, logs={}):
        return

In [12]:
load_dividends()

epochs = 20
optimizer = keras.optimizers.Adam(lr=0.0001, beta_1=0.9,\
                                    beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True, clipvalue = 0.5)

X_train_arr, X_val_arr, X_test_arr, y_label_train_arr, y_label_val_arr, y_label_test_arr,\
            stop_loss_train, stop_loss_val, stop_loss_test,\
            profit_target_train, profit_target_val, profit_target_test,\
            dates_train, dates_val, dates_test,\
            s1_px_train, s1_px_val, s1_px_test,\
            s2_px_train, s2_px_val, s2_px_test,\
            price_range_mean_train, price_range_mean_val, price_range_mean_test, class_weight = load_dataset()
            

train_set = make_dataset(X_train_arr, y_label_train_arr, T)
val_set = make_dataset(X_val_arr, y_label_val_arr, T)
test_set = make_dataset(X_test_arr, y_label_test_arr, T)

X_train = np.array([x[0] for x in train_set])
y_label_train_true = np.array([x[1] for x in train_set])
X_val = np.array([x[0] for x in val_set])
y_label_val_true = np.array([x[1] for x in val_set])
X_test = np.array([x[0] for x in test_set])
y_label_test_true = np.array([x[1] for x in test_set])
             
i_dim = np.shape(X_train)[2]

results = []
profit_target_mul_arr = [50]
stop_loss_mul_arr = [25]
profit_target_vol_range_mul_arr = np.array([1.8])
stop_loss_vol_range_mul_arr = np.array([0.9])

for layers in range(3, 4):
    for width_exp in range(6, 7):
        for profit_loss_config in range(1):
            
                profit_target_mul = profit_target_mul_arr[profit_loss_config]
                stop_loss_mul = stop_loss_mul_arr[profit_loss_config]
                profit_target_vol_range_mul = profit_target_vol_range_mul_arr[profit_loss_config]
                stop_loss_vol_range_mul = stop_loss_vol_range_mul_arr[profit_loss_config]
                
                width_layer = 2**(width_exp)
                model = Sequential()
                model.add(TimeDistributed(Dense(64), input_shape=(T, i_dim)))
                
                print((layers, width_layer, profit_loss_config))
                
                X_train_arr, X_val_arr, X_test_arr, y_label_train_arr, y_label_val_arr, y_label_test_arr,\
                stop_loss_train, stop_loss_val, stop_loss_test,\
                profit_target_train, profit_target_val, profit_target_test,\
                dates_train, dates_val, dates_test,\
                s1_px_train, s1_px_val, s1_px_test,\
                s2_px_train, s2_px_val, s2_px_test,\
                price_range_mean_train, price_range_mean_val,\
                    price_range_mean_test, class_weight = load_dataset()
                    
                train_set = make_dataset(X_train_arr, y_label_train_arr, T)
                val_set = make_dataset(X_val_arr, y_label_val_arr, T)
                test_set = make_dataset(X_test_arr, y_label_test_arr, T)
                
                X_train = np.array([x[0] for x in train_set])
                y_label_train_true = np.array([x[1] for x in train_set])
                X_val = np.array([x[0] for x in val_set])
                y_label_val_true = np.array([x[1] for x in val_set])
                X_test = np.array([x[0] for x in test_set])
                y_label_test_true = np.array([x[1] for x in test_set])
                
                
                if layers > 1:
                    model.add(GRU(width_layer,return_sequences=True, dropout=d_in)) 
                else:
                    model.add(GRU(width_layer,return_sequences=False, dropout=d_in))
                    
                for w in range(1,layers-1):
                    model.add(GRU(width_layer,return_sequences=True, dropout=d_in))  #75 hidden neurons
                        
                if layers > 1:
                    model.add(GRU(width_layer,return_sequences=False, dropout=d_in))
                
                model.add(Dense(64, activation='relu'))
                early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0,\
                                              patience=10, verbose=0, mode='auto',\
                                              baseline=None)
                lr_sched = keras.callbacks.LearningRateScheduler(schedule, verbose=0)
                reduce_lr_plateau = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2,\
                                                                      patience=3, verbose=0,\
                                                                      mode='auto', min_delta=0.0001,\
                                                                      cooldown=0, min_lr=0.00001)
                model.add(Dense(2, activation='softmax'))
                model.compile(loss=keras.losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy'])
                model.fit(x=X_train, y=y_label_train_true,\
                          validation_data = (X_val,y_label_val_true), epochs=epochs,\
                                 shuffle = False, callbacks=[early_stopping,lr_sched,My_Callback()],\
                                 class_weight = class_weight)
                    
                
                eval_val = model.evaluate(x=X_val, y=y_label_val_true)
                y_label_val_pred = model.predict(x=X_val)
                
                PnL_val, PnL_comm_matrix, commission_sum = calc_profit(dates_val, y_label_val_pred, y_label_val_true,\
                                                               s1_px_val, s2_px_val, profit_target_val,\
                    stop_loss_val, comm_coef = 10.0/10000.0, ticker1 = "ADSK", ticker2 = "AMAT")

                print(layers, width_layer, profit_loss_config, PnL_val + commission_sum)

(3, 64, 0)
Train on 2492 samples, validate on 366 samples
Epoch 1/20
Return with transaction costs:  -0.53 
 Return without transaction costs:  0.06
Epoch 2/20
Return with transaction costs:  -0.53 
 Return without transaction costs:  0.05
Epoch 3/20
Return with transaction costs:  -0.49 
 Return without transaction costs:  0.07
Epoch 4/20
Return with transaction costs:  -0.36 
 Return without transaction costs:  0.05
Epoch 5/20
Return with transaction costs:  -0.18 
 Return without transaction costs:  0.09
Epoch 6/20
Return with transaction costs:  -0.02 
 Return without transaction costs:  0.07
Epoch 7/20
Return with transaction costs:  -0.04 
 Return without transaction costs:  0.07
Epoch 8/20
Return with transaction costs:  -0.07 
 Return without transaction costs:  0.08
Epoch 9/20
Return with transaction costs:  -0.12 
 Return without transaction costs:  0.07
Epoch 10/20
Return with transaction costs:  -0.1 
 Return without transaction costs:  0.07
Epoch 11/20
Return with transact