In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import random
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from Functions import *
import lightgbm as lgb
tf.keras.utils.set_random_seed(1)

data_path = "M:/Dissertation/Data/"
results_path = "M:/Dissertation/Return_Prediction/Machine_Learning/Results/"

In [2]:
# Reading the Data
data = pd.read_csv(data_path+"Forex_Data.csv")
data["Date"] = pd.to_datetime(data["Date"],format="%Y-%m-%d %H:00:00")
data = data.loc[(data.Date>='2016-01-01')&(data.Date<'2018-01-01')].reset_index(drop=True)

for col in ['EUR/USD_T','EUR/GBP_T','GBP/USD_T','XAU/USD_T']:
    data[col] = data[col.split('_')[0]+'_R']

for col in ['EUR/USD_R','EUR/GBP_R','GBP/USD_R','XAU/USD_R']:
    data[col] = data[col].shift(1)
    
data = data.dropna(subset=['EUR/USD_R','EUR/GBP_R','GBP/USD_R','XAU/USD_R'])
data = data.sort_values(by=["Date"]).reset_index(drop=True)


In [3]:
# LGBM Data Prep
FEATURES = list(data.drop(['Date','EUR/USD_T','EUR/GBP_T','GBP/USD_T','XAU/USD_T'],axis=1).columns)
TARGETS = ['EUR/USD_T','EUR/GBP_T','GBP/USD_T','XAU/USD_T']

x,y = data[FEATURES].to_numpy(),data[TARGETS].to_numpy()
print("X Shape is: ",x.shape)
print("Y Shape is: ",y.shape)
print("Data Shape is: ",data.shape)

X Shape is:  (11678, 8)
Y Shape is:  (11678, 4)
Data Shape is:  (11678, 13)


In [4]:
# Extracting Test Sets for Evaluation
test_portions = []
test_portions_x = []
test_portions_y = []
TEST_SIZE = 71
TEST_PORTIONS = 5

for portion in generate_test_portions(data,TEST_SIZE,TEST_PORTIONS):
    test_portions.append(data.loc[portion,:].reset_index(drop=True))
    test_portions_x.append(x[portion,:])
    test_portions_y.append(y[portion,:])
    
    data = data.loc[~(data.index.isin(portion)),:]
    x = np.delete(x,portion,axis=0)
    y = np.delete(y,portion,axis=0)
    
train_data = data.reset_index(drop=True).copy()
print("X Shape is: ",x.shape)
print("Y Shape is: ",y.shape)
print("Data Shape is: ",train_data.shape)

X Shape is:  (11318, 8)
Y Shape is:  (11318, 4)
Data Shape is:  (11318, 13)


In [5]:
# Getting Train and Validation Sets for Training
FOLDS = 5
SELECTED_FOLDS = 5

train_portions_x,train_portions_y,valid_portions_x,valid_portions_y = get_folds(x,y,train_data,FOLDS,SELECTED_FOLDS)

valid_mses = []
test_mses = []
test_preds_all = pd.DataFrame()
for fold in range(SELECTED_FOLDS):

    # Scale Features
    x_train = train_portions_x[fold]
    x_valid = valid_portions_x[fold]
    x_test = test_portions_x
    x_train_scaled,x_valid_scaled,x_test_scaled = x_scaler(x_train,x_valid,x_test,TSScaler())

    # Scale Targets
    y_train = train_portions_y[fold]
    y_valid = valid_portions_y[fold]
    y_test = test_portions_y
    y_train_scaled,y_valid_scaled,y_test_scaled = y_scaler(y_train,y_valid,y_test,TSScaler(range=(-1,1)))
    
    # Modelling
    valid_pred = np.zeros(y_valid.shape)
    test_pred = y_test_scaled.copy()

    for t in range(y_train.shape[1]):
        
        # Defining Model
        params = {'n_estimators': 225,
                  'boosting_type': 'gbdt',
                  'verbosity': -1,
                  'objective': 'l2',
                  'colsample_bytree': 0.9,
                  'colsample_bynode': 0.1,
                  'max_depth': 8,
                  'learning_rate': 0.003647749926797374,
                  'reg_lambda': 0.5,
                  'num_leaves': 61,
                  'n_jobs': -1,
                  'seed': 42}

        lgb_model = lgb.LGBMRegressor(**params)
        
        # Model Training
        lgb_model.fit(x_train_scaled,y_train_scaled[:,t])

        # Saving Predictions on Validation set
        valid_pred[:,t] = lgb_model.predict(x_valid_scaled)

        # Saving Predictions on Test set
        for i in range(TEST_PORTIONS):
            test_pred[i][:,t] = lgb_model.predict(x_test_scaled[i])
    
    # Loading Scaler Objects
    with open('scaler_y.pkl','rb') as file:
        y_scaler_obj = pickle.load(file)

    # Validation Set Loss
    valid_mse = []
    valid_pred = y_scaler_obj.inverse_transform(valid_pred)
    for i in range(valid_pred.shape[1]):
        valid_mse.append(mean_squared_error(y_valid[:,i],valid_pred[:,i]))
    valid_mses.append(np.mean(valid_mse))

    # Predicting the Test Set
    test_mse = []
    test_pred_df = pd.DataFrame()
    test_portions_copy = test_portions.copy()
    for i in range(TEST_PORTIONS):
        tp = y_scaler_obj.inverse_transform(test_pred[i])
        tp = pd.DataFrame(tp,columns=['EUR/USD_P','EUR/GBP_P','GBP/USD_P','XAU/USD_P'])
        test_portions_copy[i] = pd.concat([test_portions_copy[i],tp],axis=1)

        # Saving Predictions
        for col in ["GBP/USD","EUR/USD","EUR/GBP","XAU/USD"]:
            test_mse.append(mean_squared_error(test_portions_copy[i][col+'_T'],test_portions_copy[i][col+'_P']))
            test_portions_copy[i][col+'_PP'] = (test_portions_copy[i][col+'_P']+1) * test_portions_copy[i][col]
            test_portions_copy[i][col+'_PP'] = test_portions_copy[i][col+'_PP'].shift(1)

        test_portions_copy[i]["Portion"] = i
        test_pred_df = pd.concat([test_pred_df,test_portions_copy[i][['Date','Portion']+TARGETS+['EUR/USD_P','EUR/GBP_P','GBP/USD_P','XAU/USD_P']+['EUR/USD_PP','EUR/GBP_PP','GBP/USD_PP','XAU/USD_PP']+['EUR/USD','EUR/GBP','GBP/USD','XAU/USD']]])
    test_mses.append(np.mean(test_mse))
    test_preds_all = pd.concat([test_preds_all,test_pred_df])

    print('#' * 25)
    print('### Fold', fold + 1)
    print('### Train size:', len(x_train_scaled), 'Valid size:', len(x_valid_scaled), 'Test size:', len(x_test_scaled[0])*TEST_PORTIONS)
    print('### Validation MSE:', np.mean(valid_mse))
    print('### Test MSE:', np.mean(test_mse))
    print('#' * 25)

# # Averaging the Predictions of all Folds
test_preds_all = test_preds_all.groupby(by=["Date","Portion"],as_index=False).mean()
test_preds_all.to_csv(results_path+"Test_Results.csv",index=False)

print("\n")
print('#' * 25)
print('### Avg Validation MSE:', np.mean(valid_mses))
print('### Avg Test MSE:', np.mean(test_mses))
print('#' * 25)

#########################
### Fold 1
### Train size: 9054 Valid size: 2264 Test size: 360
### Validation MSE: 7.884903276349632e-07
### Test MSE: 8.561681735864634e-07
#########################
#########################
### Fold 2
### Train size: 9054 Valid size: 2264 Test size: 360
### Validation MSE: 8.048835444172296e-07
### Test MSE: 8.593108315640608e-07
#########################
#########################
### Fold 3
### Train size: 9054 Valid size: 2264 Test size: 360
### Validation MSE: 7.330231381890752e-07
### Test MSE: 8.53376093802567e-07
#########################
#########################
### Fold 4
### Train size: 9055 Valid size: 2263 Test size: 360
### Validation MSE: 7.92280085709623e-07
### Test MSE: 8.535933646636935e-07
#########################
#########################
### Fold 5
### Train size: 9055 Valid size: 2263 Test size: 360
### Validation MSE: 8.138000348464475e-07
### Test MSE: 8.574075141330526e-07
#########################


#########################
### 