## Back Test Data Generation

In [1]:
import requests
import time
import calendar
import dateutil.parser as parser
from dateutil.relativedelta import relativedelta
from datetime import datetime, timezone
import pandas as pd
import numpy as np
import yaml
import warnings
warnings.filterwarnings('ignore')
from sklearn.neighbors import NearestNeighbors
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from pycaret.classification import *

In [2]:
now = datetime.now()
today = now.strftime("%d-%m-%Y_%I-%M_%p")

In [3]:
with open ('back_test_pipeline_settings.yaml') as ymlfile:
    cfg = yaml.safe_load(ymlfile)
    k_number = cfg['knn']['k_number']
    metric = cfg['knn']['metric']
    algorithm = cfg['knn']['algorithm']
    feature_1 = cfg['knn']['feature_1']
    feature_2 = cfg['knn']['feature_2']
    feature_3 = cfg['knn']['feature_3']
    feature_7 = cfg['knn']['feature_7']
    feature_8 = cfg['knn']['feature_8']   
    volume = cfg['feature']['volume']
    volume_size = cfg['sample']['volume_size']
    sample_count = cfg['sample']['count']
    candles = cfg['recommendation']['candle_count']
    pair = cfg['currency']['pair']

In [4]:
print('K Number:',k_number)
print('Metric:', metric)
print('Algorithm:', algorithm)
print('Candle Volume Size:', volume_size)
print('Random Sample Count:', sample_count)
print('Future Candle Count:', candles)
print('Pair:', pair)

K Number: 5
Metric: euclidean
Algorithm: brute
Candle Volume Size: 5000
Random Sample Count: 8000
Future Candle Count: 7
Pair: USDCAD


In [5]:
def convert_date(utc_time): 
    parsed_date = parser.parse(utc_time)
    var_date=parsed_date.date()
    var_time=parsed_date.time()
    var_f_time=var_time.hour
    var_julian_date=parsed_date.timetuple().tm_yday
    var_weekday=parsed_date.weekday()
    var_weekday_name=calendar.day_name[parsed_date.weekday()]
    return var_date, var_time, var_f_time, var_julian_date, var_weekday, var_weekday_name

In [6]:
def find_k_similar_candles(candle_id, dataset, k=k_number):
    indices=[]
    distances = []
    output = []
    model_knn = NearestNeighbors(metric = metric, algorithm = algorithm) 
    model_knn.fit(dataset)
    
    #metric = 'euclidean' or 'cosine' or 'manhattan' or 'mahalanobis'
    
    distances, indices = model_knn.kneighbors(dataset.iloc[candle_id,:].values.reshape(1,-1),
                                              n_neighbors = k)

    for i in range(0,len(distances.flatten())):
        if i!=0:
            
            output.append ([dataset.index[indices.flatten()[i]],
                            distances.flatten()[i],
                            dataset.iloc[indices.flatten()[i]][feature_1],
                            dataset.iloc[indices.flatten()[i]][feature_2],
                            dataset.iloc[indices.flatten()[i]][feature_3],
                            dataset.iloc[indices.flatten()[i]][feature_7],
                            dataset.iloc[indices.flatten()[i]][feature_8],
                           ])
    
    output = pd.DataFrame(output)
    output.columns = ['Indice','Distance',
                      feature_1,
                      feature_2,
                      feature_3,
                      feature_7,
                      feature_8,
                     ]
   # display (output)
    
    return indices, distances

# <font color='red'>Test Configs</font>

In [7]:
filename = 'USD_CAD_H4.csv'
data = pd.read_csv(filename)

In [8]:
data.columns

Index(['Date', 'Time', 'f_time', 'julian_date', 'Weekday', 'Weekday_Name',
       'UTC_Time', 'Volume', 'Open', 'High', 'Low', 'Close', 'SMA_5', 'SMA_10',
       'SMA_20', 'F_SMA_5', 'F_SMA_10', 'F_SMA_20', 'O-H', 'O-L', 'O-C', 'H-L',
       'H-C', 'L-C', 'Direction', 'col_1', 'col_2', 'col_3', 'col_4', 'col_5'],
      dtype='object')

In [9]:
data.head()

Unnamed: 0,Date,Time,f_time,julian_date,Weekday,Weekday_Name,UTC_Time,Volume,Open,High,Low,Close,SMA_5,SMA_10,SMA_20,F_SMA_5,F_SMA_10,F_SMA_20,O-H,O-L,O-C,H-L,H-C,L-C,Direction,col_1,col_2,col_3,col_4,col_5
0,2009-11-16,14:00:00,14,320,0,Monday,2009-11-16T14:00:00.000000000Z,8441,1.04497,1.05234,1.04272,1.04758,1.0476,1.0495,1.0501,-2e-05,-0.00192,-0.00252,-0.00737,0.00225,-0.00261,0.00962,0.00476,-0.00486,1,-0.00261,0.00476,0.00225,0.00214,0.00076
1,2009-11-16,18:00:00,18,320,0,Monday,2009-11-16T18:00:00.000000000Z,4828,1.0476,1.0497,1.04391,1.04752,1.047,1.0495,1.0503,0.00052,-0.00198,-0.00278,-0.0021,0.00369,8e-05,0.00579,0.00218,-0.00361,0,8e-05,0.00218,0.00369,-0.00261,0.00214
2,2009-11-16,22:00:00,22,320,0,Monday,2009-11-16T22:00:00.000000000Z,1616,1.04841,1.04841,1.04659,1.04759,1.0469,1.0491,1.0505,0.00069,-0.00151,-0.00291,0.0,0.00182,0.00082,0.00182,0.00082,-0.001,0,0.00082,0.00082,0.00182,8e-05,-0.00261
3,2009-11-17,02:00:00,2,321,1,Tuesday,2009-11-17T02:00:00.000000000Z,1222,1.04757,1.04906,1.0473,1.04823,1.0472,1.0487,1.0505,0.00103,-0.00047,-0.00227,-0.00149,0.00027,-0.00066,0.00176,0.00083,-0.00093,1,-0.00066,0.00083,0.00027,0.00082,8e-05
4,2009-11-17,06:00:00,6,321,1,Tuesday,2009-11-17T06:00:00.000000000Z,4928,1.04821,1.05526,1.0466,1.05462,1.0491,1.049,1.0507,0.00552,0.00562,0.00392,-0.00705,0.00161,-0.00641,0.00866,0.00064,-0.00802,1,-0.00641,0.00064,0.00161,-0.00066,0.00082


## Selecting 1000 Random Candles

In [10]:
random_samples = data[data[volume] > volume_size].sample(n = sample_count)
Test_Candle = list(random_samples.index.values)

In [11]:
Test_Candle[0:10]

[8605, 9952, 11884, 8466, 19008, 16931, 13607, 5659, 12303, 5964]

In [12]:
data.head()

Unnamed: 0,Date,Time,f_time,julian_date,Weekday,Weekday_Name,UTC_Time,Volume,Open,High,Low,Close,SMA_5,SMA_10,SMA_20,F_SMA_5,F_SMA_10,F_SMA_20,O-H,O-L,O-C,H-L,H-C,L-C,Direction,col_1,col_2,col_3,col_4,col_5
0,2009-11-16,14:00:00,14,320,0,Monday,2009-11-16T14:00:00.000000000Z,8441,1.04497,1.05234,1.04272,1.04758,1.0476,1.0495,1.0501,-2e-05,-0.00192,-0.00252,-0.00737,0.00225,-0.00261,0.00962,0.00476,-0.00486,1,-0.00261,0.00476,0.00225,0.00214,0.00076
1,2009-11-16,18:00:00,18,320,0,Monday,2009-11-16T18:00:00.000000000Z,4828,1.0476,1.0497,1.04391,1.04752,1.047,1.0495,1.0503,0.00052,-0.00198,-0.00278,-0.0021,0.00369,8e-05,0.00579,0.00218,-0.00361,0,8e-05,0.00218,0.00369,-0.00261,0.00214
2,2009-11-16,22:00:00,22,320,0,Monday,2009-11-16T22:00:00.000000000Z,1616,1.04841,1.04841,1.04659,1.04759,1.0469,1.0491,1.0505,0.00069,-0.00151,-0.00291,0.0,0.00182,0.00082,0.00182,0.00082,-0.001,0,0.00082,0.00082,0.00182,8e-05,-0.00261
3,2009-11-17,02:00:00,2,321,1,Tuesday,2009-11-17T02:00:00.000000000Z,1222,1.04757,1.04906,1.0473,1.04823,1.0472,1.0487,1.0505,0.00103,-0.00047,-0.00227,-0.00149,0.00027,-0.00066,0.00176,0.00083,-0.00093,1,-0.00066,0.00083,0.00027,0.00082,8e-05
4,2009-11-17,06:00:00,6,321,1,Tuesday,2009-11-17T06:00:00.000000000Z,4928,1.04821,1.05526,1.0466,1.05462,1.0491,1.049,1.0507,0.00552,0.00562,0.00392,-0.00705,0.00161,-0.00641,0.00866,0.00064,-0.00802,1,-0.00641,0.00064,0.00161,-0.00066,0.00082


# <font color='red'>CANDLE LOOP</font>

In [13]:
#LR_FINAL_MODEL = load_model('FINAL_MODELS/EURUSD/FINAL_LR_25Nov2021_EURUSD')
#ET_FINAL_MODEL = load_model('FINAL_MODELS/EURUSD/FINAL_ET_25Nov2021_EURUSD')
KNN_FINAL_MODEL = load_model('FINAL_MODELS/USDCAD/23-01-2022_01-24_AM_knn_USDCAD')
#DT_FINAL_MODEL = load_model('FINAL_MODELS/USDCAD/10-01-2022_06-15_AM_dt_USDCAD')
GBC_FINAL_MODEL = load_model('FINAL_MODELS/USDCAD/10-01-2022_07-12_AM_gbc_USDCAD')
#LIGHTGBM_FINAL_MODEL = load_model('FINAL_MODELS/GBPUSD/09-01-2022_10-58_PM_lightgbm_GBPUSD')

Transformation Pipeline and Model Successfully Loaded
Transformation Pipeline and Model Successfully Loaded


In [14]:
%%time
print ('Today: ' + today)

result_output = pd.DataFrame({'Candle_No':[],
                              'Current_Market_Fit':[],
                              'Current_Market':[],
#                              'Rec1':[],
#                              'Rec1_P':[],
#                              'Rec2':[],
#                              'Rec2_P':[],
#                              'Rec3':[],
#                              'Rec3_P':[],
#                              'LR_Label':[],
#                              'LR_Score':[],
#                              'ET_Label':[],
#                              'ET_Score':[],
                              'KNN_Label':[],
                              'KNN_Score':[],
#                              'DT_Label':[],
#                              'DT_Score':[],
#                              'LIGHTGBM_Label':[],
#                              'LIGHTGBM_Score':[],
                              'GBC_Label':[],
                              'GBC_Score':[],                              
                             })

for candle_no in Test_Candle:
    data = pd.read_csv(filename)
    data = data.iloc[candle_no:candle_no+candles]
    data['candleno'] = range (1, len(data) + 1)
    X = data['candleno'].values.reshape(-1, 1)
    Y = data['Close'].values.reshape(-1, 1)
    linear_regressor = LinearRegression()
    linear_regressor.fit(X, Y)
    y_pred = linear_regressor.predict(X) 
    
    Current_Market_Fit = r2_score(Y, y_pred)*100
    coeficient = (linear_regressor.coef_)

    if coeficient > 0:
        Current_Market= 1

    else:
        Current_Market = 0

    data = pd.read_csv(filename)
    data = data[[feature_1,
                 feature_2,
                 feature_3,
                 feature_7,
                 feature_8,
                ]]

    indices, distances = find_k_similar_candles (candle_no,data)
    indices = indices[0:1][0]
    
    predicted_output = []
    recs = []
    for indice in indices[1:5]:
             
        Predicted_Market_Fit =0
        Predicted_Trade=''
    
        data = pd.read_csv(filename) 
        data = data.iloc[indice:indice+candles]

        data['candleno'] = range (1, len(data) + 1)
        X = data['candleno'].values.reshape(-1, 1)
        Y = data['Close'].values.reshape(-1, 1)
        linear_regressor = LinearRegression()
        linear_regressor.fit(X, Y)
        y_pred = linear_regressor.predict(X)

        Predicted_Market_Fit= r2_score(Y, y_pred)*100
        coeficient = (linear_regressor.coef_)

        if coeficient > 0:
            Predicted_Trade = 'BUY'
            recs.append((r2_score(Y, y_pred)*100))
        else:
            Predicted_Trade = 'SELL'
            recs.append((r2_score(Y, y_pred)*100) * -1)
        
        predicted_output.append([Predicted_Market_Fit,Predicted_Trade])
        
        
    data_unseen = pd.DataFrame ({
        'Rec1_Score': [recs[0]],
        'Rec2_Score': [recs[1]],
        'Rec3_Score': [recs[2]],
        'Rec4_Score': [recs[3]],
    })
    
#    lr_prediction = predict_model(LR_FINAL_MODEL, data=data_unseen)
#    LR_Label = lr_prediction['Label']
#    LR_Score = lr_prediction['Score']
    
#    et_prediction = predict_model(ET_FINAL_MODEL, data=data_unseen)
#    ET_Label = et_prediction['Label']
#    ET_Score = et_prediction['Score']
    
    knn_prediction = predict_model(KNN_FINAL_MODEL, data=data_unseen)
    KNN_Label = knn_prediction['Label']
    KNN_Score = knn_prediction['Score']
    
#    dt_prediction = predict_model(DT_FINAL_MODEL, data=data_unseen)
#    DT_Label = dt_prediction['Label']
#    DT_Score = dt_prediction['Score']
    
#    lightgbm_prediction = predict_model(LIGHTGBM_FINAL_MODEL, data=data_unseen)
#    LIGHTGBM_Label = lightgbm_prediction['Label']
#    LIGHTGBM_Score = lightgbm_prediction['Score']

    gbc_prediction = predict_model(GBC_FINAL_MODEL, data=data_unseen)
    GBC_Label = gbc_prediction['Label']
    GBC_Score = gbc_prediction['Score']    
    
    result = {'Candle_No': candle_no,
              'Current_Market_Fit': Current_Market_Fit,
              'Current_Market': Current_Market,
#              'Rec1': predicted_output[0][0],
#              'Rec1_P': predicted_output[0][1],
#              'Rec2': predicted_output[1][0],
#              'Rec2_P': predicted_output[1][1],
#              'Rec3': predicted_output[2][0],
#              'Rec3_P': predicted_output[2][1],
#              'LR_Label': LR_Label[0],
#              'LR_Score': LR_Score[0],
#              'ET_Label': ET_Label[0],
#              'ET_Score': ET_Score[0],
              'KNN_Label': KNN_Label[0],
              'KNN_Score': KNN_Score[0],
#              'DT_Label': DT_Label[0],
#              'DT_Score': DT_Score[0],
#              'LIGHTGBM_Label': LIGHTGBM_Label[0],
#              'LIGHTGBM_Score': LIGHTGBM_Score[0],
              'GBC_Label': GBC_Label[0],
              'GBC_Score': GBC_Score[0],              
             }
    
    result_output = result_output.append(result, ignore_index = True)

Today: 23-01-2022_01-27_AM
CPU times: user 6h 50min 14s, sys: 8h 11min 39s, total: 15h 1min 53s
Wall time: 2h 11min 28s


In [15]:
result_output.to_csv('03_Back_Test_Final_Result_' + today + '.csv', header = True, index = False)
result_output = pd.read_csv('03_Back_Test_Final_Result_' + today + '.csv')

In [16]:
result_output = result_output[result_output['Current_Market_Fit'] > 20]
result_output = result_output.reset_index()
del result_output['index']

In [17]:
result_output.shape

(5688, 7)

In [18]:
result_output.to_csv('03_Back_Test_Final_Result_' + today + '.csv', header = True, index = False)
result_output = pd.read_csv('03_Back_Test_Final_Result_' + today + '.csv')

In [19]:
#result_output['LR_Prediction'] = result_output['Current_Market'] - result_output['LR_Label']
#result_output['ET_Prediction'] = result_output['Current_Market'] - result_output['ET_Label']
result_output['KNN_Prediction'] = result_output['Current_Market'] - result_output['KNN_Label']
#result_output['DT_Prediction'] = result_output['Current_Market'] - result_output['DT_Label']
#result_output['LIGHTGBM_Prediction'] = result_output['Current_Market'] - result_output['LIGHTGBM_Label']
result_output['GBC_Prediction'] = result_output['Current_Market'] - result_output['GBC_Label']

In [20]:
result_output.head()

Unnamed: 0,Candle_No,Current_Market_Fit,Current_Market,KNN_Label,KNN_Score,GBC_Label,GBC_Score,KNN_Prediction,GBC_Prediction
0,8605.0,75.749356,1.0,1.0,0.5876,0.0,0.6424,0.0,1.0
1,9952.0,77.439834,0.0,0.0,0.6259,1.0,0.9043,0.0,-1.0
2,11884.0,96.885402,1.0,1.0,0.9984,1.0,0.8184,0.0,0.0
3,8466.0,60.19172,0.0,0.0,0.9991,1.0,0.8166,0.0,-1.0
4,19008.0,94.687476,1.0,0.0,0.6064,1.0,0.9615,1.0,0.0


In [21]:
KNN_Result = pd.DataFrame(result_output['KNN_Prediction'].value_counts())
KNN_Result["Score"] = (KNN_Result['KNN_Prediction'] * 100 / len(result_output)).round(2)

In [22]:
GBC_Result = pd.DataFrame(result_output['GBC_Prediction'].value_counts())
GBC_Result["Score"] = (GBC_Result['GBC_Prediction'] * 100 / len(result_output)).round(2)

In [23]:
result = pd.concat([
#    LR_Result,
#    ET_Result,
    KNN_Result,
#    DT_Result,
    GBC_Result,
#    LIGHTGBM_Result,
], axis=1)
result

Unnamed: 0,KNN_Prediction,Score,GBC_Prediction,Score.1
0.0,4565,80.26,3099,54.48
-1.0,604,10.62,1405,24.7
1.0,519,9.12,1184,20.82
