In [1]:
import pandas as pd
import numpy as np
import random
import os
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.base import BaseEstimator, TransformerMixin
import pickle
import tensorflow_probability as tfp
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from keras.utils.vis_utils import plot_model
from Functions import *
tf.keras.utils.set_random_seed(0)

data_path = "M:/Dissertation/Data/"
results_path = "M:/Dissertation/Return_Prediction/Deep_Learning/Results/"

In [2]:
# RNN Parameters
window_size = 1
learning_rate = 0.001
n_neurons = 12
n_layers = 2
batch_size = 12
epochs = 3

In [3]:
# Reading the Data
data = pd.read_csv(data_path+"Forex_Data.csv")
data["Date"] = pd.to_datetime(data["Date"],format="%Y-%m-%d %H:00:00")
data = data.loc[(data.Date>='2016-01-01')&(data.Date<'2018-01-01')].reset_index(drop=True)

for col in ['EUR/USD_T','EUR/GBP_T','GBP/USD_T','XAU/USD_T']:
    data[col] = data[col.split('_')[0]+'_R']

for col in ['EUR/USD_R','EUR/GBP_R','GBP/USD_R','XAU/USD_R']:
    data[col] = data[col].shift(1)
    
data = data.dropna(subset=['EUR/USD_R','EUR/GBP_R','GBP/USD_R','XAU/USD_R'])
data = data.sort_values(by=["Date"]).reset_index(drop=True)


In [4]:
# RNN Data Prep
FEATURES = list(data.drop(['Date','EUR/USD_T','EUR/GBP_T','GBP/USD_T','XAU/USD_T'],axis=1).columns)
TARGETS = ['EUR/USD_T','EUR/GBP_T','GBP/USD_T','XAU/USD_T']

x,y,data = rnn_data_prep(data[FEATURES],data[TARGETS],window_size,data)
print("X Shape is: ",x.shape)
print("Y Shape is: ",y.shape)
print("Data Shape is: ",data.shape)

X Shape is:  (11678, 1, 8)
Y Shape is:  (11678, 4)
Data Shape is:  (11678, 13)


In [5]:
# Extracting Test Sets for Evaluation
test_portions = []
test_portions_x = []
test_portions_y = []
TEST_SIZE = 71
TEST_PORTIONS = 5

for portion in generate_test_portions(data,TEST_SIZE,TEST_PORTIONS):
    test_portions.append(data.loc[portion,:].reset_index(drop=True))
    test_portions_x.append(x[portion,:])
    test_portions_y.append(y[portion,:])
    
    data = data.loc[~(data.index.isin(portion)),:]
    x = np.delete(x,portion,axis=0)
    y = np.delete(y,portion,axis=0)
    
train_data = data.reset_index(drop=True).copy()
print("X Shape is: ",x.shape)
print("Y Shape is: ",y.shape)
print("Data Shape is: ",train_data.shape)

X Shape is:  (11318, 1, 8)
Y Shape is:  (11318, 4)
Data Shape is:  (11318, 13)


In [6]:
# Defining the Model
model = RNN(window_size,x.shape[2],n_neurons,n_layers,'tanh','mse',tf.keras.optimizers.Adam(learning_rate=learning_rate))
plot_model(model,to_file=results_path+'rnn_plot.png',show_shapes=True,show_layer_names=True)
print(model.summary())

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 1, 8)]            0         
                                                                 
 lstm (LSTM)                 (None, 1, 12)             1008      
                                                                 
 dropout (Dropout)           (None, 1, 12)             0         
                                                                 
 lstm_1 (LSTM)               (None, 12)                1200      
                                                                 
 dropout_1 (Dropout)         (None, 12)                0         
                                                                 
 dense (Dense)               (None, 4)                 52        
                                                                 
Total params: 2,260
Trainable params: 2,260
Non-trainable par

In [7]:
# Getting Train and Validation Sets for Training
FOLDS = 5
SELECTED_FOLDS = 5

train_portions_x,train_portions_y,valid_portions_x,valid_portions_y = get_folds(x,y,train_data,FOLDS,SELECTED_FOLDS)

valid_mses = []
test_preds_all = pd.DataFrame()
for fold in range(SELECTED_FOLDS):

    # Scale Features
    x_train = train_portions_x[fold]
    x_valid = valid_portions_x[fold]
    x_test = test_portions_x
    x_train_scaled,x_valid_scaled,x_test_scaled = x_scaler(x_train,x_valid,x_test,TSScaler())

    # Scale Targets
    y_train = train_portions_y[fold]
    y_valid = valid_portions_y[fold]
    y_test = test_portions_y
    y_train_scaled,y_valid_scaled,y_test_scaled = y_scaler(y_train,y_valid,y_test,TSScaler(range=(-1,1)))
    
    # Training the Model
    model = RNN(window_size,x.shape[2],n_neurons,n_layers,'tanh','mse',tf.keras.optimizers.Adam(learning_rate=learning_rate))
    model.fit([x_train_scaled],[y_train_scaled],validation_data=([x_valid_scaled],[y_valid_scaled]),batch_size=batch_size,use_multiprocessing=True,verbose=False,epochs=epochs,shuffle=True)

    # Loading Scaler Objects
    with open('scaler_y.pkl','rb') as file:
        y_scaler_obj = pickle.load(file)

    # Predicting the Validation Set
    valid_pred = model.predict([x_valid_scaled],verbose=False)
    valid_pred = y_scaler_obj.inverse_transform(valid_pred)
    valid_mse = 0
    for i in range(valid_pred.shape[1]):
        valid_mse += mean_squared_error(y_valid[:,i],valid_pred[:,i])
    valid_mses.append(valid_mse)

    # Predicting the Test Set
    test_pred_df = pd.DataFrame()
    test_portions_copy = test_portions.copy()
    for i in range(TEST_PORTIONS):
        test_pred = model.predict([x_test_scaled[i]],verbose=False)
        test_pred = y_scaler_obj.inverse_transform(test_pred)
        test_pred = pd.DataFrame(test_pred,columns=['EUR/USD_P','EUR/GBP_P','GBP/USD_P','XAU/USD_P'])
        test_portions_copy[i] = pd.concat([test_portions_copy[i],test_pred],axis=1)

        # Saving Predictions
        for col in ["GBP/USD","EUR/USD","EUR/GBP","XAU/USD"]:
            test_portions_copy[i][col+'_PP'] = (test_portions_copy[i][col+'_P']+1) * test_portions_copy[i][col]
            test_portions_copy[i][col+'_PP'] = test_portions_copy[i][col+'_PP'].shift(1)

        test_portions_copy[i]["Portion"] = i
        test_pred_df = pd.concat([test_pred_df,test_portions_copy[i][['Date','Portion']+TARGETS+['EUR/USD_P','EUR/GBP_P','GBP/USD_P','XAU/USD_P']+['EUR/USD_PP','EUR/GBP_PP','GBP/USD_PP','XAU/USD_PP']+['EUR/USD','EUR/GBP','GBP/USD','XAU/USD']]])
    test_preds_all = pd.concat([test_preds_all,test_pred_df])

    print('#' * 25)
    print('### Fold', fold + 1)
    print('### Train size:', len(x_train_scaled), 'Valid size:', len(x_valid_scaled), 'Test size:', len(x_test_scaled[0])*TEST_PORTIONS)
    print('### Validation Loss:', valid_mse)
    print('#' * 25)

tf.keras.backend.clear_session()
print("\n")
print('#' * 25)
print('### Avg Validation Loss:', np.mean(valid_mses))
print('#' * 25)

#########################
### Fold 1
### Train size: 9054 Valid size: 2264 Test size: 360
### Validation Loss: 3.17849483948998e-06
#########################
#########################
### Fold 2
### Train size: 9054 Valid size: 2264 Test size: 360
### Validation Loss: 3.2320212781657987e-06
#########################
#########################
### Fold 3
### Train size: 9054 Valid size: 2264 Test size: 360
### Validation Loss: 2.9379789019672656e-06
#########################
#########################
### Fold 4
### Train size: 9055 Valid size: 2263 Test size: 360
### Validation Loss: 3.177204961766859e-06
#########################
#########################
### Fold 5
### Train size: 9055 Valid size: 2263 Test size: 360
### Validation Loss: 3.2537450930242318e-06
#########################


#########################
### Avg Validation Loss: 3.1558890148828276e-06
#########################


In [8]:
# Averaging the Predictions of all Folds
test_preds_all = test_preds_all.groupby(by=["Date","Portion"],as_index=False).mean()
test_preds_all.to_csv(results_path+"Test_Results.csv",index=False)

# Calculating MSE for each Portion
res = {}
results = pd.DataFrame()
for i in range(TEST_PORTIONS):
    for col in ["GBP/USD","EUR/USD","EUR/GBP","XAU/USD"]:
        res[col] = [mean_squared_error(test_preds_all.loc[test_preds_all.Portion==i,col+'_P'],test_preds_all.loc[test_preds_all.Portion==i,col+'_T'])]
    results = pd.concat([results,pd.DataFrame(res)])

# Average Error of Test Portions
results = results.reset_index(drop=True)
results = pd.DataFrame(results.mean()).transpose().reset_index(drop=True)
results.loc[0].sum()

3.421605989423089e-06