### **Importing the Libraries**

In [1]:
import pandas as pd
import numpy as np
import random
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import mean_squared_error,accuracy_score
import pickle
import matplotlib.pyplot as plt
import tensorflow_probability as tfp
from Functions import *
tf.keras.utils.set_random_seed(0)

data_path = "M:/Dissertation/Data/"
test_data_path = "M:/Dissertation/Return_Prediction/Deep_Learning/Results/"
results_path = "M:/Dissertation/Price_Prediction/Results/"

### **Extracting Test Portions**  

In [2]:
test_portions = pd.read_csv(test_data_path+"Test_Results.csv")
test_portions["Date"] = pd.to_datetime(test_portions["Date"],format="%Y-%m-%d %H:00:00")
test_portions = pd.concat([test_portions.groupby(by=["Portion"],as_index=False).agg({"Date":"min"}),
                           test_portions.groupby(by=["Portion"]).agg({"Date":"max"})],ignore_index=True,axis=1).rename(columns={0:"Portion",1:"MinDate",2:"MaxDate"})
test_portions

Unnamed: 0,Portion,MinDate,MaxDate
0,0,2016-06-20 23:00:00,2016-06-24 01:00:00
1,1,2017-11-06 08:00:00,2017-11-09 10:00:00
2,2,2017-09-07 10:00:00,2017-09-12 13:00:00
3,3,2016-07-22 08:00:00,2016-07-27 11:00:00
4,4,2016-10-31 06:00:00,2016-11-03 08:00:00


In [4]:
# Reading the Data
all_data = pd.read_csv(data_path+"Forex_Data.csv")
all_data["Date"] = pd.to_datetime(all_data["Date"],format="%Y-%m-%d %H:00:00")
all_data["Test_Portion"] = 0

# Iterating over the Test Portions
final_results = pd.DataFrame()
for p in list(test_portions.Portion.unique()):

    min_date = test_portions.loc[test_portions.Portion==p,"MinDate"].iloc[0]
    max_date = test_portions.loc[test_portions.Portion==p,"MaxDate"].iloc[0]

    all_data.loc[(all_data.Date>=min_date)&(all_data.Date<=max_date),"Test_Portion"] = 1

all_data.to_csv("TEST_PORTIONS.csv",index=False)

### **RNN Params**

In [3]:
# Data Related
window_size = 12
train_size = 0.95

# Model Related
n_neurons = 164
learning_rate = 0.005
batch_size = 48
epochs = 500

### **Training the Model and Generating Predictions for Each Test Portion**

In [4]:
# Reading the Data
all_data = pd.read_csv(data_path+"Forex_Data.csv")
all_data["Date"] = pd.to_datetime(all_data["Date"],format="%Y-%m-%d %H:00:00")

# Iterating over the Test Portions
final_results = pd.DataFrame()
for p in list(test_portions.Portion.unique()):

    # Slicing the Data as per the Min and Max Dates of Each Test Portion
    min_date = test_portions.loc[test_portions.Portion==p,"MinDate"].iloc[0]
    max_date = test_portions.loc[test_portions.Portion==p,"MaxDate"].iloc[0]
    
    data = all_data.loc[(all_data.Date>=min_date-pd.DateOffset(years=3))&(all_data.Date<=max_date)].reset_index(drop=True)

    # Making Return as Feature
    for col in ['EUR/USD_R','EUR/GBP_R','GBP/USD_R','XAU/USD_R']:
        data[col] = data[col].shift(1)
    data = data.dropna(subset=['EUR/USD_R','EUR/GBP_R','GBP/USD_R','XAU/USD_R'])
    data = data.sort_values(by=["Date"]).reset_index(drop=True)
    orig_data = data.copy()

    # Setting Test Values to NaN
    data.loc[(data.Date>=min_date)&(data.Date<=max_date),["GBP/USD_T","EUR/GBP_T","EUR/USD_T","XAU/USD_T"]] = np.nan

    # Features
    x = data[["GBP/USD","EUR/USD","EUR/GBP","XAU/USD","GBP/USD_R","EUR/USD_R","EUR/GBP_R","XAU/USD_R"]]

    # Targets
    y_1 = data[["GBP/USD_T"]]
    y_2 = data[["EUR/USD_T"]]
    y_3 = data[["EUR/GBP_T"]]
    y_4 = data[["XAU/USD_T"]]

    # Generating Data for LSTM
    train_x,valid_x,test_x,orig_train_x,orig_valid_x,orig_test_x,train_y1,valid_y1 = rnn_data_prep(x,y_1,window_size,train_size,TSScaler(),orig_data,'1')
    train_x,valid_x,test_x,orig_train_x,orig_valid_x,orig_test_x,train_y2,valid_y2 = rnn_data_prep(x,y_2,window_size,train_size,TSScaler(),orig_data,'2')
    train_x,valid_x,test_x,orig_train_x,orig_valid_x,orig_test_x,train_y3,valid_y3 = rnn_data_prep(x,y_3,window_size,train_size,TSScaler(),orig_data,'3')
    train_x,valid_x,test_x,orig_train_x,orig_valid_x,orig_test_x,train_y4,valid_y4 = rnn_data_prep(x,y_4,window_size,train_size,TSScaler(),orig_data,'4')

    # Defining the Model
    model = RNN(window_size,train_x.shape[2],n_neurons,'tanh',weighted_mse,tf.keras.optimizers.Adam(learning_rate=learning_rate))

    # Early stopping and Model Checkpoint
    es = tf.keras.callbacks.EarlyStopping(monitor='val_loss',mode='min',verbose=0,patience=30)
    mc = tf.keras.callbacks.ModelCheckpoint('lstm.h5',monitor='val_loss',mode='min',verbose=0,save_best_only=True)

    # Training the Model
    model.fit([train_x],[train_y1,train_y2,train_y3,train_y4],validation_data=([valid_x],[valid_y1,valid_y2,valid_y3,valid_y4]),batch_size=batch_size,epochs=epochs,callbacks=[es,mc],use_multiprocessing=True,verbose=0)
    tf.keras.backend.clear_session()

    # Loading Scaler Objects
    scaler_objs = []
    with open('scaler_1.pkl','rb') as file:
        y_scaler_1 = pickle.load(file)
        scaler_objs.append(y_scaler_1)
    with open('scaler_2.pkl','rb') as file:
        y_scaler_2 = pickle.load(file)
        scaler_objs.append(y_scaler_2)
    with open('scaler_3.pkl','rb') as file:
        y_scaler_3 = pickle.load(file)
        scaler_objs.append(y_scaler_3)
    with open('scaler_4.pkl','rb') as file:
        y_scaler_4 = pickle.load(file)
        scaler_objs.append(y_scaler_4)

    # Loading Model
    model = tf.keras.models.load_model('lstm.h5',custom_objects={'attention':attention,'weighted_mse':weighted_mse})

    # Predictions on Validation Set
    valid_pred = model.predict([valid_x],verbose=0)
    valid_pred = compile_predictions(valid_pred,len(valid_pred),scaler_objs,["GBP/USD_P","EUR/USD_P","EUR/GBP_P","XAU/USD_P"])
    orig_valid_x = pd.concat([orig_valid_x,valid_pred],axis=1).reset_index(drop=True)
    orig_valid_x = orig_valid_x[['Date','EUR/USD_P','EUR/GBP_P','GBP/USD_P','XAU/USD_P','EUR/USD_T','EUR/GBP_T','GBP/USD_T','XAU/USD_T']]
    
    valid_accs = []
    for col in ["GBP/USD","EUR/USD","EUR/GBP","XAU/USD"]:
        orig_valid_x[col+"_R"] = (orig_valid_x[col+"_T"].shift(-1)/orig_valid_x[col+"_T"])-1
        orig_valid_x[col+"_PR"] = (orig_valid_x[col+"_P"].shift(-1)/orig_valid_x[col+"_P"])-1
        
        orig_valid_x["T1"] = orig_valid_x[col+"_T"].shift(-1)
        orig_valid_x["P1"] = orig_valid_x[col+"_P"].shift(-1)

        orig_valid_x["T_Inc"] = 0
        orig_valid_x.loc[orig_valid_x["T1"] > orig_valid_x[col+"_T"],"T_Inc"] = 1

        orig_valid_x["P_Inc"] = 0
        orig_valid_x.loc[orig_valid_x["P1"] > orig_valid_x[col+"_P"],"P_Inc"] = 1

        valid_accs.append(accuracy_score(orig_valid_x["T_Inc"],orig_valid_x["P_Inc"]))
    
    # Predictions on Test Set
    test_pred = model.predict([test_x],verbose=0)
    test_pred = compile_predictions(test_pred,len(test_pred),scaler_objs,["GBP/USD_P","EUR/USD_P","EUR/GBP_P","XAU/USD_P"])
    orig_test_x = pd.concat([orig_test_x,test_pred],axis=1).reset_index(drop=True)
    orig_test_x = orig_test_x[['Date','EUR/USD_P','EUR/GBP_P','GBP/USD_P','XAU/USD_P','EUR/USD_T','EUR/GBP_T','GBP/USD_T','XAU/USD_T']]
    orig_test_x["Portion"] = p

    test_accs = []
    for col in ["GBP/USD","EUR/USD","EUR/GBP","XAU/USD"]:

        orig_test_x[col+"_R"] = (orig_test_x[col+"_T"].shift(-1)/orig_test_x[col+"_T"])-1
        orig_test_x[col+"_PR"] = (orig_test_x[col+"_P"].shift(-1)/orig_test_x[col+"_P"])-1

        orig_test_x["T1"] = orig_test_x[col+"_T"].shift(-1)
        orig_test_x["P1"] = orig_test_x[col+"_P"].shift(-1)

        orig_test_x["T_Inc"] = 0
        orig_test_x.loc[orig_test_x["T1"] > orig_test_x[col+"_T"],"T_Inc"] = 1

        orig_test_x["P_Inc"] = 0
        orig_test_x.loc[orig_test_x["P1"] > orig_test_x[col+"_P"],"P_Inc"] = 1

        test_accs.append(accuracy_score(orig_test_x["T_Inc"],orig_test_x["P_Inc"]))

    orig_test_x = orig_test_x.drop(["T1","P1","T_Inc","P_Inc"],axis=1)
    final_results = pd.concat([final_results,orig_test_x])

    print('#' * 25)
    print('### Portion', p + 1)
    print('### Avg Validation Accuracy Score:', np.mean(valid_accs))
    print('### Avg Test Accuracy Score:', np.mean(test_accs))
    print('#' * 25)

# Saving the Results
final_results.to_csv(results_path+"Test_Results.csv",index=False)

#########################
### Portion 1
### Avg Validation Accuracy Score: 0.5558312655086848
### Avg Test Accuracy Score: 0.5902777777777778
#########################
#########################
### Portion 2
### Avg Validation Accuracy Score: 0.576302729528536
### Avg Test Accuracy Score: 0.5416666666666666
#########################
#########################
### Portion 3
### Avg Validation Accuracy Score: 0.5517990074441688
### Avg Test Accuracy Score: 0.5729166666666667
#########################
#########################
### Portion 4
### Avg Validation Accuracy Score: 0.5603341584158416
### Avg Test Accuracy Score: 0.5381944444444444
#########################
#########################
### Portion 5
### Avg Validation Accuracy Score: 0.5490074441687345
### Avg Test Accuracy Score: 0.5277777777777778
#########################
