In [34]:
from math import gamma
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report , fbeta_score , confusion_matrix
from matplotlib import pylab as plt
import tensorflow as tf
import tensorflow.keras.layers as tfl
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.callbacks import LearningRateScheduler ,EarlyStopping 
tf.random.set_seed(42)

In [35]:
allData = pd.read_excel('Dataset.xlsx')

In [36]:
date = pd.to_datetime(allData['EnglishDate'])
allData['Year'] = date.dt.year
allData['Month'] = date.dt.month
allData['Day'] = date.dt.day
allData['DayOfWeek'] = date.dt.day_of_week

In [37]:
InsID = 38
trainData_all = allData[allData['InstrumentID']==InsID].reset_index(drop=True)
trainData_all = trainData_all.drop(['InstrumentID'] , axis = 1)
trainData_all.fillna(0,inplace=True)
neg, pos = np.bincount(trainData_all['Target1'])
print(neg,pos) 

2296 337


In [None]:
trainData = []
trainData = trainData_all[:trainData_all.shape[0]-10]
Targets = trainData[['Target1']]
trainData = trainData.drop(['Target1'] , axis = 1)
print(trainData.shape)

In [None]:
trainData_seq = trainData.loc[:,['AdjustedClosePrice','AdjustedOpenPrice','AdjustedHighPrice','AdjustedLowPrice',
'EMA12','EMA21','EMA26','EMA50','EMA100','EMA200','EMA260','EMA365','MACD','MACDSignal','MACDGap',
'LNAdjustedClosePrice','LNAdjustedRealClosePrice','LNAdjustedOpenPrice','LNAdjustedHighPrice','LNAdjustedLowPrice',]]

sc=StandardScaler()
trainData_seq = sc.fit_transform(trainData_seq)
print(trainData.shape)
print(trainData_seq.shape)
print('Done')

In [None]:
Encoder_input = []
Decoder_input = []

days = 30
j = 0
for i in range(days,np.size(trainData,0)):
    Encoder_input.append(trainData_seq[i-days:i])
    Decoder_input.append(trainData.loc[i ,['Year' , 'Month' , 'Day' , 'DayOfWeek']])
    j += 1
Target = np.asarray(Targets[days:])
Encoder_input = np.asarray(Encoder_input).reshape(-1,days,trainData_seq.shape[1]).astype('float64')
Conv_input = Encoder_input.reshape(-1,days,trainData_seq.shape[1],1).astype('float64')
Decoder_input = np.asarray(Decoder_input).reshape(-1,1,4).astype('float64')
print('trainData_seq shape:' , trainData_seq.shape)
print('Encoder input shape:' , Encoder_input.shape)
print('Conv input shape:' , Conv_input.shape)
print('Decoder input shape:' , Decoder_input.shape)
print('Target shape:' , Target.shape)
print(j)

In [None]:
train_data_encoder , test_data_encoder , train_data_decoder , test_data_decoder ,train_data_conv ,test_data_conv, train_target , test_target   = train_test_split(
    Encoder_input,Decoder_input,Conv_input,Target,test_size = 0.3 , random_state = 42)
print(train_data_encoder.shape)
print(test_data_decoder.shape)

In [None]:
def lr_exp_decay(epoch, lr):
    k = 0.02
    initial_learning_rate = 0.001
    
    return initial_learning_rate * np.exp(-k*epoch)

In [None]:
bias = np.log([pos/neg])
initializer_bias = tf.keras.initializers.Constant(bias)
LSTM1Inputs = tfl.Input(shape=(np.size(train_data_encoder,1), np.size(train_data_encoder,2)))
ConvInput = tfl.Input(shape=(np.size(train_data_conv,1), np.size(train_data_conv,2), np.size(train_data_conv,3)))
LSTM1,_,_ = tfl.LSTM(64, return_state=True ,return_sequences=True,dropout=0.2)(LSTM1Inputs)
Conv1 = tfl.Conv2D(30,(1,3),padding='valid')(ConvInput)
LSTM2Inputs = tfl.Reshape((np.size(train_data_encoder,1),-1))(Conv1)
LSTM2,_,_ =  tfl.LSTM(15, return_state=True ,return_sequences=True,dropout=0.2)(LSTM2Inputs)
Conv2 = tfl.Conv2D(60,(1,5),padding='valid')(Conv1)
LSTM3Inputs = tfl.Reshape((np.size(train_data_encoder,1),-1))(Conv2)
LSTM3,_,_ = tfl.LSTM(15, return_state=True ,return_sequences=True,dropout=0.2)(LSTM3Inputs)
EncoderInputs = tfl.concatenate([LSTM1,LSTM2,LSTM3],axis = 2)
encoder = tfl.LSTM(256, return_state=True)
encoder_outputs, state_h, state_c = encoder(EncoderInputs)
encoder_states = [state_h, state_c]
decoder = tfl.LSTM(256, return_sequences=True, return_state=True,dropout=0.2)
decoderInputs = tfl.Input(shape=(np.size(train_data_decoder,1), np.size(train_data_decoder,2)))
decoder_output, _, _ = decoder(decoderInputs,initial_state=encoder_states)
Dense1 = tfl.Dense(100, activation='relu')(decoder_output)
Drop1 = tfl.Dropout(0.2)(Dense1)
Dense2 = tfl.Dense(30, activation='relu' )(Drop1)
Dense3 = tfl.Dense(1, activation='sigmoid' , kernel_initializer=initializer_bias)(Dense2)

early_stopper = EarlyStopping(patience=30, monitor='loss')

Mod = tf.keras.Model([LSTM1Inputs ,ConvInput, decoderInputs], Dense3)
Mod.compile(loss='binary_crossentropy', optimizer='adam' , metrics=['AUC'] )

In [None]:
History = Mod.fit(
    [train_data_encoder ,train_data_conv, train_data_decoder] ,
    train_target ,
    epochs = 300 ,
    batch_size=500 ,
    validation_split=0.1,
 callbacks=[
 LearningRateScheduler(lr_exp_decay, verbose=1),
 early_stopper,
 ])

In [None]:
ytest = np.floor(Mod.predict([test_data_encoder ,test_data_conv, test_data_decoder])+0.5).reshape(-1,1)
ytrain = np.floor(Mod.predict([train_data_encoder ,train_data_conv, train_data_decoder])+0.5).reshape(-1,1)
print(confusion_matrix(train_target.reshape(-1,1), ytrain))
print(confusion_matrix(test_target.reshape(-1,1), ytest))
print(classification_report(train_target.reshape(-1,1), ytrain))
print(classification_report(test_target.reshape(-1,1), ytest))
f2_test = fbeta_score(test_target.reshape(-1,1), ytest,beta=.2)
f2_train = fbeta_score(train_target.reshape(-1,1), ytrain,beta=.2)

print(f2_test)
print(f2_train)

In [None]:
def plot_loss(history):
  plt.semilogy(history.epoch, history.history['loss'],
                label='Train')
  plt.semilogy(history.epoch, history.history['val_loss'],
                label='Val ',
               linestyle="--")
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  

In [None]:
plot_loss(History)