In [7]:
import numpy as np
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import warnings
from collections import Counter
from sklearn import neighbors, svm, model_selection, metrics
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit, cross_validate, train_test_split
from sklearn.utils import resample
from tqdm import tqdm
import datetime as dt
from technical_indicators import *
from statistical_metrics import *


pd.set_option("display.max_columns", 15)
pd.set_option("display.width", 500)

# Variables
moving_window = 1000
test_len = 10


def regression_target(df, n):
    df['Target'] = df['Price'].shift(-n)
    df.dropna(inplace=True)
    
    return df

def data_processing(df):
    
    df = df.replace([np.inf, -np.inf], np.nan)
    # Drop na, nan and nat rows
    df.dropna(inplace=True)
    
    tmp_df = df.copy()
    tmp_df.drop(columns=['Target'], inplace=True)
    
    X = tmp_df.values
    y = df['Target'].values

    return X, y, df


def feature_comparison(original_df, moving_window, test_len):
    
    clf1 = svm.SVR(kernel='rbf', C=10, epsilon=0.01)
    clf2 = svm.SVR(kernel='linear', C=10, epsilon=0.01)
    clf3 = svm.SVR(kernel='rbf', C=10, epsilon=0.001)
    clf4 = svm.SVR(kernel='linear', C=10, epsilon=0.001)
    
    clf_list = [clf1, clf2, clf3, clf4]
    clf_name_list = ['SVM.SVR.RBF.eps:0.01', 'SVM.SVR.Linear.eps:0.001', 'SVM.SVR.RBF.eps:0.01', 
                     'SVM.SVR.Linear.eps:0.001']
    
    accuracy_list = []
    
    for i in range(len(clf_list)):

        df = original_df.copy()
        X, y, df = data_processing(df)
        
        correct = wrong = 0
        
        for j in tqdm(range(X.shape[0]-moving_window-test_len)):

            X_train, X_test = X[np.arange(j,j+moving_window)], X[np.arange(j+moving_window,j+moving_window+test_len)]
            y_train, y_test = y[np.arange(j,j+moving_window)], y[np.arange(j+moving_window,j+moving_window+test_len)]
            
            clf_list[i].fit(X_train, y_train)
            test_prediction = clf_list[i].predict(X_test)
            
            test_data = {'Actual': y_test , 'Predicted': test_prediction}
            df_comparison_test = pd.DataFrame(test_data)
            
            rmse, nrmse = normalised_and_root_mean_square_error(df_comparison_test, 'Actual', 'Predicted')
            
            accuracy_list.append(rmse)
        
        print('Mean rmse of {}: {}'.format(clf_name_list[i], round(np.mean(accuracy_list), 6)))
    
def build_model(clf, X_train, y_train, X_test, y_test):
    
    clf.fit(X_train, y_train)
    accuracy = clf.score(X_test, y_test)
    prediction = clf.predict(X_test)
    
    return accuracy, prediction


In [8]:
df = pd.read_csv('Data/EUR_USD_Daily.csv', index_col=0)

# Create labels for training
df = regression_target(df, 1)

df.drop(columns=['Change'], inplace=True)

df = moving_average(df, 5)
df = moving_average(df, 8)
df = moving_average(df, 10)
df = moving_average(df, 13)
df = moving_average(df, 15)
df = moving_average(df, 20)
df = moving_average(df, 21)
df.dropna(inplace=True)

In [9]:
warnings.filterwarnings(module='sklearn*', action='ignore', category=DeprecationWarning)
feature_comparison(df, moving_window, test_len)

100%|██████████████████████████████████████████████████| 3921/3921 [01:10<00:00, 55.91it/s]


Mean rmse of SVM.SVR.RBF.eps:0.01: 0.007451


100%|██████████████████████████████████████████████████| 3921/3921 [02:31<00:00, 25.93it/s]


Mean rmse of SVM.SVR.Linear.eps:0.001: 0.007362


100%|██████████████████████████████████████████████████| 3921/3921 [03:37<00:00, 20.04it/s]


Mean rmse of SVM.SVR.RBF.eps:0.01: 0.007353


100%|██████████████████████████████████████████████████| 3921/3921 [10:56<00:00,  5.97it/s]


Mean rmse of SVM.SVR.Linear.eps:0.001: 0.00732
