In [None]:
from pandas_datareader import data as pdr
from sklearn.preprocessing import StandardScaler
from scipy import stats
import pandas as pd
from datetime import date
import numpy as np
import glob, os

In [None]:
pip install --upgrade joblib==1.2.0


In [None]:
#!pip install --upgrade joblib==1.2.0


In [None]:
# Define a list of currency pair tickers
fx_tic = ['USDEUR',
            'USDJPY',
            'USDGBP',
            'USDCHF',
            'USDNZD',
            'USDCAD',
            'USDSEK',
            'USDDKK',
            'USDNOK',
            'EURJPY',
            'EURGBP',
            'EURCHF',
            'EURNZD',
            'EURCAD',
            'EURSEK',
            'EURDKK',
            'EURNOK']

In [None]:
# Create a dictionary that maps each currency pair ticker to the index
fx_dic = {}
for i, tic in enumerate(fx_tic):
  fx_dic[tic] = i

fx_dic

{'USDEUR': 0,
 'USDJPY': 1,
 'USDGBP': 2,
 'USDCHF': 3,
 'USDNZD': 4,
 'USDCAD': 5,
 'USDSEK': 6,
 'USDDKK': 7,
 'USDNOK': 8,
 'EURJPY': 9,
 'EURGBP': 10,
 'EURCHF': 11,
 'EURNZD': 12,
 'EURCAD': 13,
 'EURSEK': 14,
 'EURDKK': 15,
 'EURNOK': 16}

In [None]:
# Returns the filenames corresponding to each currency pair ticker
def getFilenames(fx_tic):
    # today = 2023-06-05
    files = []
    filenames = []
    for name in fx_tic:
        dataname = name
        # Add the file name to the list of file names
        files.append(dataname)
        # Create the full path for each file and add to the filenames list
        filenames.append('./FX-Data/'+ dataname+'.csv')
    return filenames

In [None]:
# Preprocesses the input dataframe
def preprocessing(dataframe):

    # Define the end date for the test data
    end_date_test = '2018-12-31'

    # Extract features (X) and target (y) from the dataframe
    X = dataframe.iloc[:,1:].copy()
    y = dataframe.iloc[:,0].copy()

    # Split the data into training, validation 1, validation 2 and test set based on specific date ranges
    
    # 2005.1-2011.12, 7 years for train set
    X_train = X.loc[:'2011-12-30'].copy()
    # 2012.1-2014.12, 3 years for first validation set
    X_val1 = X.loc['2012-01-02':'2014-12-31'].copy()
    # 2015.1-2017.12, 3 years for second validation set 
    X_val2 = X.loc['2015-01-01':'2017-12-29'].copy()
    # 2018.1-2018.12, 1 year for test set
    X_test = X.loc['2018-01-01':end_date_test].copy()

    # Similarly split the target variable (y) into corresponding sets
    y_train = y.loc[:'2011-12-30'].copy()
    y_val1 = y.loc['2012-01-02':'2014-12-31'].copy()
    y_val2 = y.loc['2015-01-01':'2017-12-29'].copy()
    y_test = y.loc['2018-01-01':end_date_test].copy()


    # Standardize (normalize) the data 
    train_mean = X_train.mean()
    train_std = X_train.std()
    val1_mean = X_val1.mean()
    val1_std = X_val1.std()
    val2_mean = X_val2.mean()
    val2_std = X_val2.std()
    test_mean = X_test.mean()
    test_std = X_test.std()

    X_train = (X_train - train_mean)/train_std
    X_val1 = (X_val1 - val1_mean)/val1_std
    X_val2 = (X_val2 - val2_mean)/val2_std
    X_test = (X_test - test_mean)/test_std

    return X_train, X_val1, X_val2, X_test, y_train, y_val1, y_val2, y_test


In [None]:
# Preprocessing function specifically for strategy evaluation, focused on 2019 data
def preprocessing_strategy(dataframe):

    # Define the start and end dates 
    start_date_test = '2019-01-01'
    end_date_test = '2019-12-31'

    X = dataframe.iloc[:,1:].copy()
    y = dataframe.iloc[:,0].copy()

    X_test_strategy = X.loc[start_date_test:end_date_test].copy()

    y_test_strategy = y.loc[start_date_test:end_date_test].copy()

    # Standardize (normalize) the data 
    test_mean = X_test_strategy.mean()
    test_std = X_test_strategy.std()

    X_test_strategy = (X_test_strategy - test_mean) / test_std

    return X_test_strategy, y_test_strategy

In [None]:
# Output data corresponds to the current day (without lag)
def preprocessing_test(dataframe):

    X = dataframe.iloc[:,1:].copy()
    y = dataframe.iloc[:,0].copy()

    # Shift the lag back
    X1 = X.shift(periods = -5).copy()

    start_date_test = '2019-01-01'
    end_date_test = '2019-12-31'


    X_test_test = X1.loc[start_date_test:end_date_test].copy()

    y_test_test = y.loc[start_date_test:end_date_test].copy()


    return X_test_test, y_test_test

In [None]:
# Read the CSV file and apply the above preprocessing function on them
def getProcessedData(filename):
    df = pd.read_csv(filename, index_col=0)
    return preprocessing(df)



In [None]:
# Read the CSV file and apply the above preprocessing_strategy function on them
def getProcessedData_strategy(filename):
    df = pd.read_csv(filename, index_col=0)
    return preprocessing_strategy(df)

In [None]:
# Read the CSV file and apply the above preprocessing_test function on them
def getProcessedData_test(filename):
    df = pd.read_csv(filename, index_col=0)
    return preprocessing_test(df)

# Example

In [None]:
X_train, X_val1, X_val2, X_test, y_train, y_val1, y_val2, y_test = getProcessedData(getFilenames(fx_tic)[fx_dic['USDEUR']])

In [None]:
pd.isna(X_train)

Unnamed: 0_level_0,Open,High,Low,Adj Close,Parabolic_SAR,Coppock_Curve,Typical_Price,RSI,SO,SMA,...,KAMA,MI,MSD,TRIX,VORTEX_NEG,VORTEX_POS,MACD,PPO,APO,DO_UP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-05-16,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2006-05-17,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2006-05-18,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2006-05-19,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
2006-05-22,True,True,True,True,True,True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011-12-26,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2011-12-27,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2011-12-28,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2011-12-29,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
