<a href="https://colab.research.google.com/github/African-Quant/WQU_MScFE_Capstone_Grp9/blob/master/model_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -Uqq fastbook  --quiet
! pip install pyfolio --quiet
#! pip install fracdiff --quiet
import fastbook
# fastbook.setup_book()

In [None]:
import os
import re
#import fracdiff as fd     
import random
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
import pandas as pd
from pylab import mpl, plt
plt.style.use('seaborn')
mpl.rcParams['font.family'] = 'serif'
os.environ['PYTHONHASHSEED'] = '0'

import warnings
warnings.filterwarnings('ignore')

In [None]:
from fastbook import *

from pandas.api.types import is_string_dtype, is_numeric_dtype, is_categorical_dtype
from fastai.tabular.all import *
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from pyfolio.timeseries import perf_stats 
from pyfolio import create_simple_tear_sheet, create_returns_tear_sheet

In [None]:
pairs = ['AUDCAD', 'AUDCHF', 'AUDJPY', 'AUDNZD', 'AUDUSD', 'CAD', 'CADCHF', 
        'CADJPY', 'CHF', 'CHFJPY', 'EURAUD', 'EURCAD', 'EURCHF', 'EURGBP', 
        'EURJPY', 'EURNZD', 'EURUSD', 'GBPAUD', 'GBPCAD', 'GBPCHF', 'GBPJPY', 
        'GBPNZD', 'GBPUSD', 'JPY', 'NZDCAD', 'NZDCHF', 'NZDJPY', 'NZDUSD']

def get_data(pair):
        ''' Retrieves (from a github repo) and prepares the data.
        '''
        url = f'https://raw.githubusercontent.com/African-Quant/WQU_MScFE_Capstone_Grp9/master/Datasets/{pair}%3DX.csv'
        raw = pd.read_csv(url)
        raw = pd.DataFrame(raw).drop(['Adj Close', 'Volume'], axis=1)
        raw.iloc[:,0] = pd.to_datetime(raw.iloc[:,0])
        raw.set_index('Date', inplace=True)
        return raw

In [None]:
d = {a:b for a, b in enumerate(pairs)}
print(d)

{0: 'AUDCAD', 1: 'AUDCHF', 2: 'AUDJPY', 3: 'AUDNZD', 4: 'AUDUSD', 5: 'CAD', 6: 'CADCHF', 7: 'CADJPY', 8: 'CHF', 9: 'CHFJPY', 10: 'EURAUD', 11: 'EURCAD', 12: 'EURCHF', 13: 'EURGBP', 14: 'EURJPY', 15: 'EURNZD', 16: 'EURUSD', 17: 'GBPAUD', 18: 'GBPCAD', 19: 'GBPCHF', 20: 'GBPJPY', 21: 'GBPNZD', 22: 'GBPUSD', 23: 'JPY', 24: 'NZDCAD', 25: 'NZDCHF', 26: 'NZDJPY', 27: 'NZDUSD'}


In [None]:
# ATR
def eATR(df1,n=14):
    """This calculates the exponential Average True Range of of a dataframe of the open,
    high, low, and close data of an instrument"""

    df = df1[['Open',	'High',	'Low',	'Close']].copy()
    # True Range
    df['TR'] = 0
    for i in range(len(df)):
      try:
        df.iloc[i, 4] = max(df.iat[i,1] - df.iat[i,2],
                         abs(df.iat[i,1] - df.iat[i-1,3]),
                         abs(df.iat[i,2] - df.iat[i-1,3]))
      except ValueError:
        pass

    # eATR
    df['eATR'] = df['TR'].ewm(span=n, adjust=False).mean()
           
    return df['eATR']

In [None]:
data = get_data(pairs[0])
data.head(1)

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003-12-01,0.93995,0.94965,0.93789,0.94888


In [None]:
def ssl(df1):
  """This function adds the ssl indicator as features to a dataframe
    """
  df = df1.copy()
  df['smaHigh'] = df['High'].rolling(window=10).mean()
  df['smaLow'] = df['Low'].rolling(window=10).mean()
  df['hlv'] = 0
  df['hlv'] = np.where(df['Close'] > df['smaHigh'],1,np.where(df['Close'] < df['smaLow'],-1,df['hlv'].shift(1)))
  df['sslDown'] = np.where(df['hlv'] < 0, df['smaHigh'], df['smaLow'])
  df['sslUp'] = np.where(df['hlv'] < 0, df['smaLow'], df['smaHigh'])
  df['sslPosition'] = np.where(df['Close'] > df['sslUp'], 1,
                               np.where(df['Close'] < df['sslDown'], -1, 0))
  return df[['sslDown', 'sslUp', 'sslPosition']]

In [None]:
# Waddah Attar
def WAE(df1):
  """This function creates adds the indicator Waddah Attar features to a dataframe
    """
  df = df1.copy()

  # EMA
  long_ema = df.loc[:,'Close'].ewm(span=40, adjust=False).mean()
  short_ema = df.loc[:,'Close'].ewm(span=20, adjust=False).mean()

  # MACD
  MACD = short_ema - long_ema
  
  # bBands
  sma20 = df.loc[:,'Close'].rolling(window=20).mean()  # 20 SMA
    
  stddev = df.loc[:,'Close'].rolling(window=20).std() # 20 STDdev
  lower_band = sma20 - (2 * stddev)
  upper_band = sma20 + (2 * stddev)

  #Waddah Attar
  t1 = (MACD - MACD.shift(1))* 150
  #t2 = MACD.shift(2) - MACD.shift(3)
  df['e1'] = upper_band - lower_band
  df['e2'] = -1 *df['e1']
  #e2 = upper_band.shift(1) - lower_band.shift(1)

  df['trendUp'] = np.where(t1 > 0, t1, 0)
  df['trendDown'] =  np.where(t1 < 0, t1, 0)

  df['waePosition'] = np.where(df['trendUp'] > 0, 1,
                               np.where(df['trendDown'] < 0, -1, 0))
  
  
  return df[['e1','e2','trendUp', 'trendDown', 'waePosition']]


In [None]:
def lag_feat(data1):
  """This function adds lag returns as features to a dataframe
    """
  data = data1.copy()
  lags = 8
  cols = []
  for lag in range(1, lags + 1):
    col = f'lag_{lag}'
    data[col] = data['ret'].shift(lag)
    cols.append(col)
  return data[cols]

In [None]:

def datepart_feat(df0, colname = 'Date'):
    """This function adds some common pandas date parts like 'year',
        'month' etc as features to a dataframe
    """
    df = df0.copy()
    df.reset_index(inplace=True)
    df1 = df.loc[:,colname]
    nu_feats = ['Day', 'Dayofweek', 'Dayofyear']
    
    targ_pre = re.sub('[Dd]ate$', '', colname)
    for n in nu_feats:
        df[targ_pre+n] = getattr(df1.dt,n.lower())

    df[targ_pre+'week'] = df1.dt.isocalendar().week
    df['week'] = np.int64(df['week'])
    df[targ_pre+'Elapsed'] = df1.astype(np.int64) // 10**9
    nu_feats.extend(['week', 'Elapsed'])
    df.set_index(colname, inplace=True)
    return df[nu_feats]

In [None]:
def gen_feat(pair):
  df0 = get_data(pair).iloc[-4200:,]
  df0['ret'] = df0['Close'].pct_change()
  df0['dir'] = np.sign(df0['ret'])
  eATR_ = eATR(df0).shift(1)
  wae = WAE(df0).shift(1)
  ssl1 = ssl(df0).shift(1)
  datepart = datepart_feat(df0)
  lags = lag_feat(df0)
  return pd.concat([df0,  eATR_, wae, ssl1, datepart, lags], axis=1).dropna()

In [None]:
dataset = gen_feat(pairs[5])
dataset.drop(['Open', 'High', 'Low', 'Close'], axis=1, inplace=True)
dataset.tail()

Unnamed: 0_level_0,ret,dir,eATR,e1,e2,trendUp,trendDown,waePosition,sslDown,sslUp,sslPosition,Day,Dayofweek,Dayofyear,week,Elapsed,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,lag_8
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
2021-03-23,-0.000168,-1.0,0.009174,0.035617,-0.035617,0.014647,0.0,1.0,1.248515,1.256065,0.0,23,1,82,12,1616457600,0.002562,0.008144,-0.004235,-0.002629,0.000497,-0.004956,-0.007028,-0.001819
2021-03-24,0.005535,1.0,0.008972,0.035801,-0.035801,0.015538,0.0,1.0,1.247805,1.255183,0.0,24,2,83,12,1616544000,-0.000168,0.002562,0.008144,-0.004235,-0.002629,0.000497,-0.004956,-0.007028
2021-03-25,-0.001064,-1.0,0.008947,0.035789,-0.035789,0.065988,0.0,1.0,1.247115,1.254445,1.0,25,3,84,12,1616630400,0.005535,-0.000168,0.002562,0.008144,-0.004235,-0.002629,0.000497,-0.004956
2021-03-26,0.001996,1.0,0.008798,0.03532,-0.03532,0.05147,0.0,1.0,1.247066,1.254485,1.0,26,4,85,12,1616716800,-0.001064,0.005535,-0.000168,0.002562,0.008144,-0.004235,-0.002629,0.000497
2021-03-29,-0.00046,-1.0,0.008205,0.035259,-0.035259,0.065566,0.0,1.0,1.248011,1.254806,1.0,29,0,88,13,1616976000,0.001996,-0.001064,0.005535,-0.000168,0.002562,0.008144,-0.004235,-0.002629


In [None]:
cols = list(dataset.columns)

print(cols)

['ret', 'dir', 'eATR', 'e1', 'e2', 'trendUp', 'trendDown', 'waePosition', 'sslDown', 'sslUp', 'sslPosition', 'Day', 'Dayofweek', 'Dayofyear', 'week', 'Elapsed', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'lag_8']


In [None]:
feats = cols[2:]

In [None]:
df_train = dataset.iloc[:-1000,:]
train = df_train.copy()
df_test = dataset.iloc[-1000:,:]
test = df_test.copy()

In [None]:
train_f = train.iloc[:-1000,:]

valid = train.iloc[-1000:,:]


In [None]:
train_f[feats].columns

Index(['eATR', 'e1', 'e2', 'trendUp', 'trendDown', 'waePosition', 'sslDown',
       'sslUp', 'sslPosition', 'Day', 'Dayofweek', 'Dayofyear', 'week',
       'Elapsed', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6',
       'lag_7', 'lag_8'],
      dtype='object')

### Using  *Random Forest* to Predict Market Direction.

In [None]:
def rfc(xs, y, n_estimators=40, max_samples=1000,
       max_features=0.5, min_samples_leaf=5, **kwargs):
    return RandomForestClassifier(n_jobs=-1, n_estimators=n_estimators,
        max_samples=max_samples, max_features=max_features,
        min_samples_leaf=min_samples_leaf, oob_score=True).fit(xs, y)

In [140]:
def rfc_results():
  """This function trains a Random Forest classifier and outputs the 
  out-of-sample performance from the validation and test sets
  """
  df1 = pd.DataFrame() 
  df2 = pd.DataFrame()
  for pair in pairs:
    # retrieving the data and preparing the features
    dataset = gen_feat(pair)
    dataset.drop(['Open', 'High', 'Low', 'Close'], axis=1, inplace=True)

    # selecting the features to train on
    cols = list(dataset.columns)
    feats = cols[2:]

    #splitting into training, validation and test sets
    df_train = dataset.iloc[:-1000,:]
    train = df_train.copy()
    df_test = dataset.iloc[-1000:,:]
    test = df_test.copy()
    train_f = train.iloc[:-1000,:]
    valid = train.iloc[-1000:,:]

    #training the algorithm
    m = rfc(train_f[feats], train_f['dir']);

    #preparing results for both validation and test sets
    valid_pred = m.predict(valid[feats])
    # valid_acc = accuracy_score(valid['dir'], valid_pred)
    test_pred = m.predict(test[feats])
    # test_acc = accuracy_score(test['dir'], test_pred)

    # Results for validation set
    valid_results = perf_stats(valid_pred*valid['ret']).to_frame(name=pair)
    valid_results = valid_results.rename_axis('pairs')

    # Results for test set
    test_results = perf_stats(test_pred*test['ret']).to_frame(name=pair)
    test_results = test_results.rename_axis('pairs')

    # Merge
    df1 = pd.concat([df1, valid_results], axis=1)
    df2 = pd.concat([df2, test_results], axis=1)

  #output
  return df1.T, df2.T

In [141]:
rfc_results_valid, rfc_results_test = rfc_results()

In [142]:
rfc_results_valid

pairs,Annual return,Cumulative returns,Annual volatility,Sharpe ratio,Calmar ratio,Stability,Max drawdown,Omega ratio,Sortino ratio,Skew,Kurtosis,Tail ratio,Daily value at risk
AUDCAD,-0.009436,-0.036921,0.084188,-0.07052,-0.063035,0.124531,-0.149686,0.988404,-0.097339,-0.213214,0.903046,0.941296,-0.01063
AUDCHF,-0.051612,-0.189646,0.135773,-0.319898,-0.166955,0.7898,-0.309137,0.938816,-0.390638,-5.766895,102.234437,0.974183,-0.017278
AUDJPY,-0.040937,-0.152843,0.126355,-0.267525,-0.111959,0.826554,-0.365646,0.955519,-0.359178,-0.366412,1.946363,0.866643,-0.016053
AUDNZD,0.023246,0.095478,0.078112,0.333258,0.196081,0.360716,-0.118553,1.057612,0.462121,-0.286284,1.719728,0.907604,-0.009738
AUDUSD,-0.036146,-0.135924,0.104637,-0.299528,-0.134445,0.572568,-0.268857,0.952603,-0.413656,-0.055071,0.278329,0.934689,-0.013307
CAD,-0.053799,-0.197036,0.077175,-0.677938,-0.20149,0.766284,-0.267006,0.893764,-0.948636,0.158614,1.020315,0.99181,-0.009931
CADCHF,0.029766,0.123441,0.126518,0.298209,0.122041,0.055662,-0.243904,1.066262,0.367127,-8.253253,171.863788,1.194609,-0.01579
CADJPY,0.007625,0.030604,0.117346,0.123269,0.026966,0.122359,-0.282779,1.021585,0.179854,0.305015,2.892791,1.059765,-0.014727
CHF,-0.039938,-0.149335,0.120557,-0.274509,-0.109544,0.588577,-0.364584,0.940775,-0.322839,-9.533363,202.729731,0.933167,-0.01532
CHFJPY,-0.063157,-0.22809,0.132012,-0.423987,-0.210224,0.556153,-0.300427,0.907585,-0.499939,-9.852099,217.349226,1.031534,-0.016854


In [143]:
rfc_results_test

pairs,Annual return,Cumulative returns,Annual volatility,Sharpe ratio,Calmar ratio,Stability,Max drawdown,Omega ratio,Sortino ratio,Skew,Kurtosis,Tail ratio,Daily value at risk
AUDCAD,-0.023944,-0.091692,0.072356,-0.298739,-0.081714,0.670248,-0.293023,0.950166,-0.408385,-0.37587,3.647413,1.018495,-0.009202
AUDCHF,-0.016129,-0.062487,0.092672,-0.129138,-0.052202,0.707204,-0.308968,0.978847,-0.179873,-0.112541,1.267715,0.940482,-0.011723
AUDJPY,0.034689,0.144906,0.108252,0.36912,0.204849,0.24879,-0.169341,1.065672,0.528905,-0.049909,2.659303,1.054888,-0.01348
AUDNZD,-0.005575,-0.021941,0.05585,-0.072187,-0.063941,0.086044,-0.087193,0.988111,-0.100147,-0.25094,2.083767,1.027484,-0.007052
AUDUSD,-0.038069,-0.142743,0.094926,-0.361364,-0.110231,0.762562,-0.345356,0.939902,-0.49381,-0.247119,2.69993,0.945024,-0.012096
CAD,0.034668,0.144814,0.073016,0.503187,0.203744,0.000719,-0.170156,1.092657,0.773188,0.637964,4.916182,1.124168,-0.009053
CADCHF,-0.042283,-0.15755,0.080095,-0.499283,-0.230089,0.738217,-0.183769,0.920389,-0.674796,-0.406059,2.815421,0.985926,-0.01025
CADJPY,-0.037371,-0.140272,0.095637,-0.350415,-0.111084,0.813953,-0.33642,0.942587,-0.486275,-0.090443,2.720504,0.971875,-0.012182
CHF,-0.008698,-0.034072,0.066811,-0.097357,-0.039779,0.112897,-0.218651,0.983825,-0.135322,-0.198739,1.992724,1.00216,-0.008443
CHFJPY,0.002568,0.010228,0.063705,0.072058,0.02443,0.009587,-0.105101,1.01218,0.103665,0.202681,2.920751,1.028668,-0.008008


Light GradientBoosting

In [111]:
def lgb(xs, y, learning_rate=0.15, boosting_type='gbdt',
        objective='binary', n_estimators=50,
        metric=['auc', 'binary_logloss'],
        num_leaves=100, max_depth= 1,
        **kwargs):
    return LGBMClassifier().fit(xs, y)

In [137]:
def lgb_results():
  """This function trains a Light Gradient Boosting Method and outputs the 
  out-of-sample performance from the validation and test sets
  """
  df1 = pd.DataFrame() 
  df2 = pd.DataFrame()
  for pair in pairs:
    # retrieving the data and preparing the features
    dataset = gen_feat(pair)
    dataset.drop(['Open', 'High', 'Low', 'Close'], axis=1, inplace=True)

    # selecting the features to train on
    cols = list(dataset.columns)
    feats = cols[2:]

    #splitting into training, validation and test sets
    df_train = dataset.iloc[:-1000,:]
    train = df_train.copy()
    df_test = dataset.iloc[-1000:,:]
    test = df_test.copy()
    train_f = train.iloc[:-1000,:]
    valid = train.iloc[-1000:,:]

    #training the algorithm
    m = lgb(train_f[feats], train_f['dir']);

    #preparing results for both validation and test sets
    valid_pred = m.predict(valid[feats])
    # valid_acc = accuracy_score(valid['dir'], valid_pred)
    test_pred = m.predict(test[feats])
    # test_acc = accuracy_score(test['dir'], test_pred)

    # Results for validation set
    valid_results = perf_stats(valid_pred*valid['ret']).to_frame(name=pair)
    valid_results = valid_results.rename_axis('pairs')

    # Results for test set
    test_results = perf_stats(test_pred*test['ret']).to_frame(name=pair)
    test_results = test_results.rename_axis('pairs')

    # Merge
    df1 = pd.concat([df1, valid_results], axis=1)
    df2 = pd.concat([df2, test_results], axis=1)

  #output
  return df1.transpose(), df2.transpose()

In [138]:
lgb_results_valid, lgb_results_test = lgb_results()

In [139]:
lgb_results_valid

statistic,Annual return,Cumulative returns,Annual volatility,Sharpe ratio,Calmar ratio,Stability,Max drawdown,Omega ratio,Sortino ratio,Skew,Kurtosis,Tail ratio,Daily value at risk
AUDCAD,-0.007652,-0.030023,0.084188,-0.049147,-0.044033,0.055104,-0.173787,0.991904,-0.06742,-0.277135,0.903384,0.934395,-0.010623
AUDCHF,0.057857,0.250064,0.135737,0.48473,0.276138,0.171244,-0.209522,1.10041,0.609551,-5.789234,103.515171,1.030809,-0.01684
AUDJPY,-0.011053,-0.043147,0.126372,-0.024699,-0.045043,0.625443,-0.245385,0.995808,-0.033742,-0.34797,1.967797,0.992771,-0.015934
AUDNZD,0.017935,0.073086,0.078118,0.266613,0.122905,0.034389,-0.145923,1.04583,0.368994,-0.276315,1.71371,0.910833,-0.009759
AUDUSD,-0.040273,-0.150512,0.104632,-0.340536,-0.161875,0.318498,-0.248793,0.946291,-0.467353,-0.115465,0.272634,0.922973,-0.013324
CAD,-0.047595,-0.175941,0.077192,-0.593138,-0.162038,0.639339,-0.29373,0.906412,-0.832555,0.172331,1.018124,0.92264,-0.009907
CADCHF,0.101835,0.469364,0.126366,0.833915,0.494112,0.706058,-0.206097,1.196763,1.029297,-8.485876,173.853118,1.245941,-0.015502
CADJPY,0.103744,0.479491,0.117161,0.901051,1.072539,0.856564,-0.096727,1.169018,1.394877,0.423612,2.824462,1.12159,-0.014342
CHF,-0.075232,-0.266823,0.120493,-0.585524,-0.268221,0.868056,-0.280486,0.877835,-0.689684,-9.421944,202.430997,0.893654,-0.015461
CHFJPY,-0.077827,-0.274953,0.131982,-0.543672,-0.205769,0.451679,-0.378227,0.883049,-0.639442,-9.832691,217.255473,0.955025,-0.016913


In [124]:
lgb_results_test

statistic,Annual return,Cumulative returns,Annual volatility,Sharpe ratio,Calmar ratio,Stability,Max drawdown,Omega ratio,Sortino ratio,Skew,Kurtosis,Tail ratio,Daily value at risk
AUDCAD,0.00663,0.02657,0.072367,0.127474,0.04345,0.109576,-0.152594,1.022051,0.182273,-0.070036,3.67584,1.050695,-0.009081
AUDCHF,-0.013241,-0.05152,0.092674,-0.097518,-0.049585,0.516913,-0.267037,0.983985,-0.136688,-0.082395,1.269281,0.988793,-0.011712
AUDJPY,0.071629,0.315903,0.108178,0.693666,0.237664,0.183279,-0.301387,1.126989,1.000255,-0.217207,2.699925,1.120534,-0.013331
AUDNZD,-0.000229,-0.000909,0.055851,0.023811,-0.002628,0.281225,-0.087193,1.003953,0.033229,-0.24691,2.089739,1.039648,-0.007031
AUDUSD,-0.059401,-0.215738,0.094883,-0.597884,-0.163713,0.821136,-0.362839,0.902527,-0.812169,-0.210677,2.695598,0.928387,-0.012179
CAD,-0.006303,-0.02478,0.073053,-0.050074,-0.028944,0.585543,-0.217776,0.991222,-0.071819,0.094987,4.98837,1.076157,-0.009218
CADCHF,-0.028482,-0.108337,0.080118,-0.320652,-0.110325,0.005284,-0.25817,0.948119,-0.450104,0.236333,2.882304,0.971758,-0.010196
CADJPY,0.023512,0.096608,0.095645,0.290784,0.147435,0.155377,-0.159473,1.050288,0.41499,-0.088807,2.734176,1.034133,-0.01194
CHF,0.006339,0.025392,0.066811,0.127964,0.066139,0.411867,-0.095843,1.021665,0.18134,-0.102459,2.001016,1.002631,-0.008383
CHFJPY,-0.027814,-0.105901,0.063684,-0.411114,-0.170306,0.726911,-0.16332,0.933254,-0.568795,0.123377,2.941029,0.953798,-0.008127


### XGBOOST

In [117]:
def xgb(xs, y):
  return XGBClassifier().fit(xs, y)

In [144]:
def xgb_results():
  """This function trains a eXtreme Gradient Boosting Method and outputs the 
  out-of-sample performance from the validation and test sets
  """
  df1 = pd.DataFrame() 
  df2 = pd.DataFrame()
  for pair in pairs:
    # retrieving the data and preparing the features
    dataset = gen_feat(pair)
    dataset.drop(['Open', 'High', 'Low', 'Close'], axis=1, inplace=True)

    # selecting the features to train on
    cols = list(dataset.columns)
    feats = cols[2:]

    #splitting into training, validation and test sets
    df_train = dataset.iloc[:-1000,:]
    train = df_train.copy()
    df_test = dataset.iloc[-1000:,:]
    test = df_test.copy()
    train_f = train.iloc[:-1000,:]
    valid = train.iloc[-1000:,:]

    #training the algorithm
    m = xgb(train_f[feats], train_f['dir']);

    #preparing results for both validation and test sets
    valid_pred = m.predict(valid[feats])
    # valid_acc = accuracy_score(valid['dir'], valid_pred)
    test_pred = m.predict(test[feats])
    # test_acc = accuracy_score(test['dir'], test_pred)

    # Results for validation set
    valid_results = perf_stats(valid_pred*valid['ret']).to_frame(name=pair)
    valid_results = valid_results.rename_axis('pairs')

    # Results for test set
    test_results = perf_stats(test_pred*test['ret']).to_frame(name=pair)
    test_results = test_results.rename_axis('pairs')

    # Merge
    df1 = pd.concat([df1, valid_results], axis=1)
    df2 = pd.concat([df2, test_results], axis=1)

  #output
  return df1.T, df2.T

In [145]:
xgb_results_valid, xgb_results_test = xgb_results()

In [146]:
xgb_results_valid

pairs,Annual return,Cumulative returns,Annual volatility,Sharpe ratio,Calmar ratio,Stability,Max drawdown,Omega ratio,Sortino ratio,Skew,Kurtosis,Tail ratio,Daily value at risk
AUDCAD,0.023427,0.096248,0.084172,0.317197,0.222238,0.450715,-0.105415,1.053863,0.448566,-0.177865,0.921749,1.051617,-0.010499
AUDCHF,0.034354,0.143434,0.135773,0.319135,0.182967,0.000293,-0.187761,1.065011,0.399348,-5.790485,103.164994,1.022578,-0.016934
AUDJPY,-0.025756,-0.098367,0.126367,-0.143197,-0.068642,0.589322,-0.375231,0.97594,-0.19154,-0.450434,1.954015,0.927837,-0.015993
AUDNZD,-0.002411,-0.009534,0.078129,0.008195,-0.016788,0.227674,-0.143616,1.001378,0.01106,-0.507022,1.695229,0.990406,-0.009841
AUDUSD,-0.031352,-0.118742,0.104643,-0.252081,-0.169592,0.564147,-0.184867,0.959958,-0.346527,-0.120251,0.274787,0.899357,-0.013288
CAD,-0.030442,-0.115451,0.077225,-0.361725,-0.10144,0.395901,-0.300094,0.941839,-0.509723,0.10367,1.000067,0.935399,-0.00984
CADCHF,-0.031317,-0.118616,0.126532,-0.184988,-0.097982,0.613974,-0.319621,0.960986,-0.219949,-8.492551,170.772865,1.098262,-0.016034
CADJPY,0.090845,0.412059,0.1172,0.800522,0.746423,0.631091,-0.121707,1.148813,1.199058,0.162514,2.883928,1.100198,-0.014394
CHF,-0.111841,-0.375406,0.120372,-0.921649,-0.274586,0.77518,-0.407307,0.814388,-1.072831,-9.460222,202.443745,0.863758,-0.015606
CHFJPY,-0.050579,-0.186138,0.132032,-0.322915,-0.153255,0.328668,-0.330033,0.928821,-0.381105,-9.865583,217.468294,0.943482,-0.016804


In [147]:
xgb_results_test

pairs,Annual return,Cumulative returns,Annual volatility,Sharpe ratio,Calmar ratio,Stability,Max drawdown,Omega ratio,Sortino ratio,Skew,Kurtosis,Tail ratio,Daily value at risk
AUDCAD,-0.01227,-0.04781,0.072367,-0.134425,-0.059413,0.478818,-0.206515,0.977261,-0.187817,-0.155199,3.668383,0.936119,-0.009156
AUDCHF,0.012001,0.048479,0.09267,0.175044,0.053164,0.180714,-0.225739,1.029404,0.247254,-0.105498,1.276176,0.954385,-0.011611
AUDJPY,0.036658,0.153573,0.108249,0.386735,0.145577,0.143039,-0.25181,1.068912,0.545732,-0.251182,2.679429,1.053466,-0.013472
AUDNZD,-0.025894,-0.098872,0.055829,-0.441992,-0.185227,0.443544,-0.139797,0.929385,-0.603657,-0.220793,2.066889,0.992121,-0.007132
AUDUSD,-0.003636,-0.01435,0.094951,0.009074,-0.019093,0.210512,-0.190429,1.001557,0.012887,-0.014949,2.719656,1.01722,-0.011959
CAD,-0.008139,-0.031911,0.073052,-0.075386,-0.033883,0.687344,-0.240218,0.986814,-0.107194,0.049073,4.98823,1.052669,-0.009226
CADCHF,0.012526,0.050638,0.080128,0.195404,0.07319,0.062748,-0.171142,1.032998,0.279492,-0.177684,2.870482,1.086584,-0.010033
CADJPY,0.006871,0.027545,0.095658,0.119455,0.056679,0.011908,-0.121227,1.02036,0.163791,-0.451076,2.739727,0.984593,-0.012006
CHF,0.017593,0.071656,0.066801,0.294449,0.176034,0.241663,-0.099939,1.050557,0.425635,-0.054439,2.002869,1.073697,-0.008338
CHFJPY,-0.0054,-0.021259,0.063705,-0.053197,-0.050919,0.129724,-0.10606,0.991102,-0.075623,0.205879,2.92714,0.990094,-0.00804
