In [None]:
import pandas as pd
import numpy as np
import random
import os
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.base import BaseEstimator, TransformerMixin
import pickle
import tensorflow_probability as tfp
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from Functions import *
tf.keras.utils.set_random_seed(0)

data_path = "M:/Dissertation/Data/"
results_path = "M:/Dissertation/Return_Prediction/Deep_Learning/Results/"

In [None]:
# Reading the Data
orig_data = pd.read_csv(data_path+"Forex_Data.csv")
orig_data["Date"] = pd.to_datetime(orig_data["Date"],format="%Y-%m-%d %H:00:00")
orig_data = orig_data.loc[(orig_data.Date>='2016-01-01')&(orig_data.Date<'2018-01-01')].reset_index(drop=True)

for col in ['EUR/USD_T','EUR/GBP_T','GBP/USD_T','XAU/USD_T']:
    orig_data[col] = orig_data[col.split('_')[0]+'_R']

for col in ['EUR/USD_R','EUR/GBP_R','GBP/USD_R','XAU/USD_R']:
    orig_data[col] = orig_data[col].shift(1)
    
orig_data = orig_data.dropna(subset=['EUR/USD_R','EUR/GBP_R','GBP/USD_R','XAU/USD_R'])
orig_data = orig_data.sort_values(by=["Date"]).reset_index(drop=True)

In [None]:
# HyperParameter Tuning using Time-Series CrossValidation
eval_window_sizes = range(6,1,-1)
eval_n_neurons = [4,6,8,10,12]
eval_n_layers = [2,3,4,5,6]
eval_learning_rate = [0.001,0.0025,0.005]
eval_batch_size = [12,24,48,72]

total_iterations = len(eval_window_sizes)*len(eval_n_neurons)*len(eval_n_layers)*len(eval_learning_rate)*len(eval_batch_size)
counter = 1

for window_size in eval_window_sizes:
    for n_neurons in eval_n_neurons:
        for n_layers in eval_n_layers:
            for learning_rate in eval_learning_rate:
                for batch_size in eval_batch_size:
                    print(f"##### Iteration: {counter} of {total_iterations} #####")
                    counter+=1

                    data = orig_data.copy()

                    # RNN Data Prep
                    FEATURES = list(data.drop(['Date','EUR/USD_T','EUR/GBP_T','GBP/USD_T','XAU/USD_T'],axis=1).columns)
                    TARGETS = ['EUR/USD_T','EUR/GBP_T','GBP/USD_T','XAU/USD_T']
                    x,y,data = rnn_data_prep(data[FEATURES],data[TARGETS],window_size,data)

                    # Extracting Test Sets for Evaluation
                    test_portions = []
                    test_portions_x = []
                    test_portions_y = []
                    TEST_SIZE = 71
                    TEST_PORTIONS = 5
                        
                    for portion in generate_test_portions(data,TEST_SIZE,TEST_PORTIONS):
                        test_portions.append(data.loc[portion,:].reset_index(drop=True))
                        test_portions_x.append(x[portion,:])
                        test_portions_y.append(y[portion,:])
                        
                        data = data.loc[~(data.index.isin(portion)),:]
                        x = np.delete(x,portion,axis=0)
                        y = np.delete(y,portion,axis=0)
                    train_data = data.reset_index(drop=True).copy()


                    # Getting Train and Validation Sets for Training
                    FOLDS = 5
                    SELECTED_FOLDS = 5
                    train_portions_x,train_portions_y,valid_portions_x,valid_portions_y = get_folds(x,y,train_data,FOLDS,SELECTED_FOLDS)
                    
                    valid_mses = []
                    for fold in range(SELECTED_FOLDS):
                        # Scale Features
                        x_train = train_portions_x[fold]
                        x_valid = valid_portions_x[fold]
                        x_test = test_portions_x
                        x_train_scaled,x_valid_scaled,x_test_scaled = x_scaler(x_train,x_valid,x_test,TSScaler())

                        # Scale Targets
                        y_train = train_portions_y[fold]
                        y_valid = valid_portions_y[fold]
                        y_test = test_portions_y
                        y_train_scaled,y_valid_scaled,y_test_scaled = y_scaler(y_train,y_valid,y_test,TSScaler(range=(-1,1)))
                    
                        # Training the Model
                        model = RNN(window_size,x_train_scaled.shape[2],n_neurons,n_layers,'tanh','mse',tf.keras.optimizers.Adam(learning_rate=learning_rate))
                        model.fit([x_train_scaled],[y_train_scaled],verbose=False,use_multiprocessing=True,batch_size=batch_size,epochs=3,shuffle=True)

                            # Loading Scaler Objects
                        with open('scaler_y.pkl','rb') as file:
                            y_scaler_obj = pickle.load(file)

                        # Predicting the Validation Set
                        valid_pred = model.predict([x_valid_scaled],verbose=False)
                        valid_pred = y_scaler_obj.inverse_transform(valid_pred)
                        valid_mse = 0
                        for i in range(valid_pred.shape[1]):
                            valid_mse += mean_squared_error(y_valid[:,i],valid_pred[:,i])
                        valid_mses.append(valid_mse)
                    tf.keras.backend.clear_session()

                    results = {"Total_Error":[np.mean(valid_mses)],"Features":"CurrentReturns+CurrentPrices",
                                "Attention":"Removed","LR,Layers,Neurons,BatchSize":str((learning_rate,n_layers,n_neurons,batch_size)),
                                "Window":[window_size],"Model":"LSTM"}
                    results = pd.DataFrame(results)

                    # Saving Results
                    if os.path.isfile(results_path+'result_metrics.csv'):
                        results.to_csv(results_path+'result_metrics.csv', mode='a', header=False, index=False)
                    else:
                        results.to_csv(results_path+'result_metrics.csv', index=False)