## Back Test Data Generation

In [1]:
import requests
import time
import calendar
import dateutil.parser as parser
from dateutil.relativedelta import relativedelta
from datetime import datetime, timezone
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sklearn.neighbors import NearestNeighbors
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from pycaret.classification import *

In [2]:
pd.get_option("display.max_columns", None)

500

In [3]:
def convert_date(utc_time): 
    parsed_date = parser.parse(utc_time)
    var_date=parsed_date.date()
    var_time=parsed_date.time()
    var_f_time=var_time.hour
    var_julian_date=parsed_date.timetuple().tm_yday
    var_weekday=parsed_date.weekday()
    var_weekday_name=calendar.day_name[parsed_date.weekday()]
    return var_date, var_time, var_f_time, var_julian_date, var_weekday, var_weekday_name

In [4]:
def find_k_similar_candles(candle_id, dataset, k=5):
    indices=[]
    distances = []
    output = []
    model_knn = NearestNeighbors(metric = 'euclidean', algorithm = 'brute') 
    model_knn.fit(dataset)
    
    #metric = 'euclidean' or 'cosine' or 'manhattan' or 'mahalanobis'
    
    distances, indices = model_knn.kneighbors(dataset.iloc[candle_id,:].values.reshape(1,-1),
                                              n_neighbors = k)

    for i in range(0,len(distances.flatten())):
        if i!=0:
            
            output.append ([dataset.index[indices.flatten()[i]],
                            distances.flatten()[i],
                            dataset.iloc[indices.flatten()[i]]['col_1'],
                            dataset.iloc[indices.flatten()[i]]['col_2'],
                            dataset.iloc[indices.flatten()[i]]['col_3'],
                            dataset.iloc[indices.flatten()[i]]['F_SMA_10'],
                            dataset.iloc[indices.flatten()[i]]['F_SMA_20'],
                           ])
    
    output = pd.DataFrame(output)
    output.columns = ['Indice','Distance',
                      'col_1',
                      'col_2',
                      'col_3',
                      'F_SMA_10',
                      'F_SMA_20',
                     ]
   # display (output)
    
    return indices, distances

# <font color='red'>Test Configs</font>

In [5]:
filename = 'EUR_USD_H4.csv'
data = pd.read_csv(filename)

In [6]:
data.columns

Index(['Date', 'Time', 'f_time', 'julian_date', 'Weekday', 'Weekday_Name',
       'UTC_Time', 'Volume', 'Open', 'High', 'Low', 'Close', 'SMA_5', 'SMA_10',
       'SMA_20', 'F_SMA_5', 'F_SMA_10', 'F_SMA_20', 'O-H', 'O-L', 'O-C', 'H-L',
       'H-C', 'L-C', 'Direction', 'col_1', 'col_2', 'col_3', 'col_4', 'col_5'],
      dtype='object')

In [7]:
data.head()

Unnamed: 0,Date,Time,f_time,julian_date,Weekday,Weekday_Name,UTC_Time,Volume,Open,High,Low,Close,SMA_5,SMA_10,SMA_20,F_SMA_5,F_SMA_10,F_SMA_20,O-H,O-L,O-C,H-L,H-C,L-C,Direction,col_1,col_2,col_3,col_4,col_5
0,2015-09-10,21:00:00,21,253,3,Thursday,2015-09-10T21:00:00.000000000Z,3801,1.12787,1.13019,1.12732,1.12782,1.1246,1.1217,1.1201,0.00322,0.00612,0.00772,-0.00232,0.00055,5e-05,0.00287,0.00237,-0.0005,0,5e-05,0.00232,0.0005,-0.00151,-0.00523
1,2015-09-11,01:00:00,1,254,4,Friday,2015-09-11T01:00:00.000000000Z,1574,1.12786,1.12877,1.1276,1.1287,1.1264,1.1233,1.1207,0.0023,0.0054,0.008,-0.00091,0.00026,-0.00084,0.00117,7e-05,-0.0011,1,-0.00084,0.00091,0.0011,5e-05,-0.00151
2,2015-09-11,05:00:00,5,254,4,Friday,2015-09-11T05:00:00.000000000Z,7657,1.12872,1.131,1.12751,1.12907,1.128,1.1246,1.1213,0.00107,0.00447,0.00777,-0.00228,0.00121,-0.00035,0.00349,0.00193,-0.00156,1,-0.00035,0.00228,0.00156,-0.00084,5e-05
3,2015-09-11,09:00:00,9,254,4,Friday,2015-09-11T09:00:00.000000000Z,6992,1.1291,1.12935,1.12544,1.12786,1.1283,1.1253,1.1217,-0.00044,0.00256,0.00616,-0.00025,0.00366,0.00124,0.00391,0.00149,-0.00242,0,0.00124,0.00025,0.00242,-0.00035,-0.00084
4,2015-09-11,13:00:00,13,254,4,Friday,2015-09-11T13:00:00.000000000Z,10077,1.1279,1.13486,1.12744,1.13448,1.1296,1.1264,1.1225,0.00488,0.00808,0.01198,-0.00696,0.00046,-0.00658,0.00742,0.00038,-0.00704,1,-0.00658,0.00696,0.00704,0.00124,-0.00035


## Selecting 1000 Random Candles

In [8]:
random_samples = data[data['Volume'] > 5000].sample(n = 2000)
Test_Candle = list(random_samples.index.values)

In [9]:
#Test_Candle = np.random.randint(low=1, high=len(data)-40, size=500)


In [10]:
data.shape

(9651, 30)

In [11]:
data.head(2)

Unnamed: 0,Date,Time,f_time,julian_date,Weekday,Weekday_Name,UTC_Time,Volume,Open,High,Low,Close,SMA_5,SMA_10,SMA_20,F_SMA_5,F_SMA_10,F_SMA_20,O-H,O-L,O-C,H-L,H-C,L-C,Direction,col_1,col_2,col_3,col_4,col_5
0,2015-09-10,21:00:00,21,253,3,Thursday,2015-09-10T21:00:00.000000000Z,3801,1.12787,1.13019,1.12732,1.12782,1.1246,1.1217,1.1201,0.00322,0.00612,0.00772,-0.00232,0.00055,5e-05,0.00287,0.00237,-0.0005,0,5e-05,0.00232,0.0005,-0.00151,-0.00523
1,2015-09-11,01:00:00,1,254,4,Friday,2015-09-11T01:00:00.000000000Z,1574,1.12786,1.12877,1.1276,1.1287,1.1264,1.1233,1.1207,0.0023,0.0054,0.008,-0.00091,0.00026,-0.00084,0.00117,7e-05,-0.0011,1,-0.00084,0.00091,0.0011,5e-05,-0.00151


# <font color='red'>CANDLE LOOP</font>

In [12]:
%%time
LR_FINAL_MODEL = load_model('FINAL_MODELS/EURUSD/FINAL_LR_25Nov2021_EURUSD')
ET_FINAL_MODEL = load_model('FINAL_MODELS/EURUSD/FINAL_ET_25Nov2021_EURUSD')
KNN_FINAL_MODEL = load_model('FINAL_MODELS/EURUSD/FINAL_KNN_26Nov2021_EURUSD')

result_output = pd.DataFrame({'Candle_No':[],
                              'Current_Market_Fit':[],
                              'Current_Market':[],
#                              'Rec1':[],
#                              'Rec1_P':[],
#                              'Rec2':[],
#                              'Rec2_P':[],
#                              'Rec3':[],
#                              'Rec3_P':[],
                              'LR_Label':[],
                              'LR_Score':[],
                              'ET_Label':[],
                              'ET_Score':[],
                              'KNN_Label':[],
                              'KNN_Score':[],
                             })

for candle_no in Test_Candle:
    data = pd.read_csv(filename)
    data = data.iloc[candle_no:candle_no+7]
    data['candleno'] = range (1, len(data) + 1)
    X = data['candleno'].values.reshape(-1, 1)
    Y = data['Close'].values.reshape(-1, 1)
    linear_regressor = LinearRegression()
    linear_regressor.fit(X, Y)
    y_pred = linear_regressor.predict(X) 
    
    Current_Market_Fit = r2_score(Y, y_pred).round(2)*100
    coeficient = (linear_regressor.coef_)

    if coeficient > 0:
        Current_Market= 1

    else:
        Current_Market = 0

    
    data = pd.read_csv(filename)
    data = data[['col_1',
                 'col_2',
                 'col_3',
                 'F_SMA_10',
                 'F_SMA_20'
                ]]

    indices, distances = find_k_similar_candles (candle_no,data)
    indices = indices[0:1][0]
    
    predicted_output = []
    recs = []
    for indice in indices[1:5]:
             
        Predicted_Market_Fit =0
        Predicted_Trade=''
    
        data = pd.read_csv(filename) 
        data = data.iloc[indice:indice+7]

        data['candleno'] = range (1, len(data) + 1)
        X = data['candleno'].values.reshape(-1, 1)
        Y = data['Close'].values.reshape(-1, 1)
        linear_regressor = LinearRegression()
        linear_regressor.fit(X, Y)
        y_pred = linear_regressor.predict(X)

        Predicted_Market_Fit= r2_score(Y, y_pred).round(2)*100
        coeficient = (linear_regressor.coef_)

        if coeficient > 0:
            Predicted_Trade = 'BUY'
            recs.append((r2_score(Y, y_pred).round(2)*100))
        else:
            Predicted_Trade = 'SELL'
            recs.append((r2_score(Y, y_pred).round(2)*100) * -1)
        
        predicted_output.append([Predicted_Market_Fit,Predicted_Trade])
        
        
    data_unseen = pd.DataFrame ({
        'Rec1_Score': [recs[0]],
        'Rec2_Score': [recs[1]],
        'Rec3_Score': [recs[2]],
        'Rec4_Score': [recs[3]],
    })
    
    lr_prediction = predict_model(LR_FINAL_MODEL, data=data_unseen)
    LR_Label = lr_prediction['Label']
    LR_Score = lr_prediction['Score']
    
    et_prediction = predict_model(ET_FINAL_MODEL, data=data_unseen)
    ET_Label = et_prediction['Label']
    ET_Score = et_prediction['Score']
    
    knn_prediction = predict_model(KNN_FINAL_MODEL, data=data_unseen)
    KNN_Label = knn_prediction['Label']
    KNN_Score = knn_prediction['Score']
    
    
    result = {'Candle_No': candle_no,
              'Current_Market_Fit': Current_Market_Fit,
              'Current_Market': Current_Market,
#              'Rec1': predicted_output[0][0],
#              'Rec1_P': predicted_output[0][1],
#              'Rec2': predicted_output[1][0],
#              'Rec2_P': predicted_output[1][1],
#              'Rec3': predicted_output[2][0],
#              'Rec3_P': predicted_output[2][1],
              'LR_Label': LR_Label[0],
              'LR_Score': LR_Score[0],
              'ET_Label': ET_Label[0],
              'ET_Score': ET_Score[0],
              'KNN_Label': KNN_Label[0],
              'KNN_Score': KNN_Score[0],
             }
    
    result_output = result_output.append(result, ignore_index = True)

Transformation Pipeline and Model Successfully Loaded
Transformation Pipeline and Model Successfully Loaded
Transformation Pipeline and Model Successfully Loaded
CPU times: user 40min 49s, sys: 6min 34s, total: 47min 24s
Wall time: 20min 18s


In [13]:
result_output.to_csv('Back_Test_Final_Result.csv', header = True, index = False)
result_output = pd.read_csv('Back_Test_Final_Result.csv')

In [14]:
result_output = result_output[result_output['Current_Market_Fit'] > 20]
result_output = result_output.reset_index()
del result_output['index']

In [15]:
result_output.to_csv('Back_Test_Final_Result.csv', header = True, index = False)
result_output = pd.read_csv('Back_Test_Final_Result.csv')

In [16]:
result_output.head(20)

Unnamed: 0,Candle_No,Current_Market_Fit,Current_Market,LR_Label,LR_Score,ET_Label,ET_Score,KNN_Label,KNN_Score
0,7820.0,36.0,0.0,0.0,0.5398,1.0,0.5595,0.0,1.0
1,974.0,52.0,1.0,0.0,0.5841,1.0,0.5267,1.0,1.0
2,3949.0,23.0,0.0,0.0,0.6147,1.0,0.5424,0.0,1.0
3,2336.0,64.0,1.0,0.0,0.5787,1.0,0.5381,1.0,1.0
4,4247.0,91.0,1.0,0.0,0.5136,0.0,0.531,1.0,1.0
5,2909.0,73.0,0.0,1.0,0.5453,0.0,0.5102,0.0,1.0
6,8244.0,27.0,1.0,0.0,0.5421,0.0,0.5038,1.0,0.7024
7,1154.0,77.0,0.0,0.0,0.5402,1.0,0.571,0.0,1.0
8,2876.0,76.0,0.0,0.0,0.6367,0.0,0.5837,0.0,1.0
9,6954.0,78.0,1.0,0.0,0.5759,0.0,0.5125,1.0,1.0


In [17]:
result_output['LR_Prediction'] = result_output['Current_Market'] - result_output['LR_Label']
result_output['ET_Prediction'] = result_output['Current_Market'] - result_output['ET_Label']
result_output['KNN_Prediction'] = result_output['Current_Market'] - result_output['KNN_Label']

In [18]:
result_output.head()

Unnamed: 0,Candle_No,Current_Market_Fit,Current_Market,LR_Label,LR_Score,ET_Label,ET_Score,KNN_Label,KNN_Score,LR_Prediction,ET_Prediction,KNN_Prediction
0,7820.0,36.0,0.0,0.0,0.5398,1.0,0.5595,0.0,1.0,0.0,-1.0,0.0
1,974.0,52.0,1.0,0.0,0.5841,1.0,0.5267,1.0,1.0,1.0,0.0,0.0
2,3949.0,23.0,0.0,0.0,0.6147,1.0,0.5424,0.0,1.0,0.0,-1.0,0.0
3,2336.0,64.0,1.0,0.0,0.5787,1.0,0.5381,1.0,1.0,1.0,0.0,0.0
4,4247.0,91.0,1.0,0.0,0.5136,0.0,0.531,1.0,1.0,1.0,1.0,0.0


In [19]:
LR_Result = pd.DataFrame(result_output['LR_Prediction'].value_counts())
LR_Result["Score"] = (LR_Result['LR_Prediction'] * 100 / len(result_output)).round(2)

In [20]:
ET_Result = pd.DataFrame(result_output['ET_Prediction'].value_counts())
ET_Result["Score"] = (ET_Result['ET_Prediction'] * 100 / len(result_output)).round(2)

In [21]:
KNN_Result = pd.DataFrame(result_output['KNN_Prediction'].value_counts())
KNN_Result["Score"] = (KNN_Result['KNN_Prediction'] * 100 / len(result_output)).round(2)

In [22]:
result = pd.concat([LR_Result, ET_Result, KNN_Result], axis=1)
result

Unnamed: 0,LR_Prediction,Score,ET_Prediction,Score.1,KNN_Prediction,Score.2
-1.0,284,19.6,366,25.26,160,11.04
0.0,700,48.31,716,49.41,1175,81.09
1.0,465,32.09,367,25.33,114,7.87


In [23]:
result_output.to_csv('Back_Test_Final_Result.csv', header = True, index = False)