## Back Test Data Generation

In [1]:
import requests
import time
import calendar
import dateutil.parser as parser
from dateutil.relativedelta import relativedelta
from datetime import datetime, timezone
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sklearn.neighbors import NearestNeighbors
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from pycaret.classification import *

In [2]:
pd.get_option("display.max_columns", None)

500

In [3]:
def convert_date(utc_time): 
    parsed_date = parser.parse(utc_time)
    var_date=parsed_date.date()
    var_time=parsed_date.time()
    var_f_time=var_time.hour
    var_julian_date=parsed_date.timetuple().tm_yday
    var_weekday=parsed_date.weekday()
    var_weekday_name=calendar.day_name[parsed_date.weekday()]
    return var_date, var_time, var_f_time, var_julian_date, var_weekday, var_weekday_name

In [4]:
def find_k_similar_candles(candle_id, dataset, k=4):
    indices=[]
    distances = []
    output = []
    model_knn = NearestNeighbors(metric = 'euclidean', algorithm = 'auto') 
    model_knn.fit(dataset)
    
    #metric = 'euclidean' or 'cosine' or 'manhattan' or 'mahalanobis'
    
    distances, indices = model_knn.kneighbors(dataset.iloc[candle_id,:].values.reshape(1,-1),
                                              n_neighbors = k)

    for i in range(0,len(distances.flatten())):
        if i!=0:
            
            output.append ([dataset.index[indices.flatten()[i]],
                            distances.flatten()[i],
                            dataset.iloc[indices.flatten()[i]]['col_1'],
                            dataset.iloc[indices.flatten()[i]]['col_2'],
                            dataset.iloc[indices.flatten()[i]]['col_3'],
                            dataset.iloc[indices.flatten()[i]]['F_SMA_5'],
                            dataset.iloc[indices.flatten()[i]]['F_SMA_10'],
                           ])
    
    output = pd.DataFrame(output)
    output.columns = ['Indice','Distance',
                      'col_1',
                      'col_2',
                      'col_3',
                      'F_SMA_5',
                      'F_SMA_10',
                     ]
   # display (output)
    
    return indices, distances

# <font color='red'>Test Configs</font>

In [5]:
filename = 'EUR_USD_H4.csv'
data = pd.read_csv(filename)

In [6]:
data.columns

Index(['Date', 'Time', 'f_time', 'julian_date', 'Weekday', 'Weekday_Name',
       'UTC_Time', 'Volume', 'Open', 'High', 'Low', 'Close', 'SMA_5', 'SMA_10',
       'F_SMA_5', 'F_SMA_10', 'col_1', 'col_2', 'col_3'],
      dtype='object')

In [7]:
data.head()

Unnamed: 0,Date,Time,f_time,julian_date,Weekday,Weekday_Name,UTC_Time,Volume,Open,High,Low,Close,SMA_5,SMA_10,F_SMA_5,F_SMA_10,col_1,col_2,col_3
0,2015-09-07,01:00:00,1,250,0,Monday,2015-09-07T01:00:00.000000000Z,3223,1.11666,1.11694,1.11412,1.11424,1.1146,1.11371,-0.00036,0.00053,0.00242,0.0027,0.00254
1,2015-09-07,05:00:00,5,250,0,Monday,2015-09-07T05:00:00.000000000Z,6486,1.11421,1.11647,1.11216,1.11625,1.11528,1.11416,0.00097,0.00209,-0.00204,0.00022,0.00205
2,2015-09-07,09:00:00,9,250,0,Monday,2015-09-07T09:00:00.000000000Z,5447,1.11623,1.11777,1.11388,1.1148,1.11536,1.11441,-0.00056,0.00039,0.00143,0.00297,0.00235
3,2015-09-07,13:00:00,13,250,0,Monday,2015-09-07T13:00:00.000000000Z,3012,1.11478,1.11732,1.11474,1.11692,1.11577,1.11482,0.00115,0.0021,-0.00214,0.0004,4e-05
4,2015-09-07,17:00:00,17,250,0,Monday,2015-09-07T17:00:00.000000000Z,1064,1.11692,1.11724,1.11641,1.11704,1.11585,1.11521,0.00119,0.00183,-0.00012,0.0002,0.00051


## Selecting 1000 Random Candles

In [8]:
Test_Candle = np.random.randint(low=1, high=len(data)-40, size=500)
Test_Candle

array([1967, 7422, 3442, 5785, 7042, 1798, 8548, 5578, 8178, 6709, 4947,
       7341, 8040, 8834, 8350, 7981,  825, 7656, 6945, 5863, 6629, 2105,
       9235,  927, 5843, 1346, 6625, 1789, 7603, 1130,  419, 2375,  134,
       6324, 4246, 4445, 7290, 1851, 9135, 2505, 2017, 3389, 3225, 8145,
       1656, 5030, 6095, 7957, 7104, 8260, 6153, 4974, 4501, 5876, 8628,
       8452, 5165, 3722, 2374, 4089, 5947, 6917, 2406, 3258, 8130, 6572,
       1260, 6015, 5191, 3042, 5857,  527, 6432, 1092, 2603, 7968, 8677,
       1917, 2968, 5480, 1538, 4708, 9602,  354,  671, 5413, 2964, 3481,
       8813, 2122, 5555, 7520, 4884, 8018, 1160, 5607, 1088, 2844, 4046,
       5278, 6039, 9052, 8319, 9036,   12, 8240,  375, 5649, 8314, 7976,
       3214, 9547, 4621, 7289, 3033, 3982, 3829, 5967, 8595, 7320, 9365,
        820, 1962, 9179, 2854, 1015,  961, 3586, 4856, 6371, 7423, 5561,
       7601,  934, 5162, 6315, 3376, 2357, 1399, 6991, 4993, 3530, 5829,
       1535, 3447,  371,  792, 9220, 1003, 5812, 61

In [9]:
data.shape

(9649, 19)

In [10]:
data.head(2)

Unnamed: 0,Date,Time,f_time,julian_date,Weekday,Weekday_Name,UTC_Time,Volume,Open,High,Low,Close,SMA_5,SMA_10,F_SMA_5,F_SMA_10,col_1,col_2,col_3
0,2015-09-07,01:00:00,1,250,0,Monday,2015-09-07T01:00:00.000000000Z,3223,1.11666,1.11694,1.11412,1.11424,1.1146,1.11371,-0.00036,0.00053,0.00242,0.0027,0.00254
1,2015-09-07,05:00:00,5,250,0,Monday,2015-09-07T05:00:00.000000000Z,6486,1.11421,1.11647,1.11216,1.11625,1.11528,1.11416,0.00097,0.00209,-0.00204,0.00022,0.00205


# <font color='red'>CANDLE LOOP</font>

In [11]:
%%time
GBC_FINAL_MODEL = load_model('FINAL_MODELS/EURUSD/FINAL_GBC_22Nov2021_EURUSD')
LIGHTGBM_FINAL_MODEL = load_model('FINAL_MODELS/EURUSD/FINAL_LIGHTGBM_22Nov2021_EURUSD')

result_output = pd.DataFrame({'Candle_No':[],
                              'Current_Market_Fit':[],
                              'Current_Market':[],
#                              'Rec1':[],
#                              'Rec1_P':[],
#                              'Rec2':[],
#                              'Rec2_P':[],
#                              'Rec3':[],
#                              'Rec3_P':[],
                              'GBC_Label':[],
                              'GBC_Score':[],
                              'LIGHTGBM_Label':[],
                              'LIGHTGBM_Score':[],
                             })

for candle_no in Test_Candle:
    data = pd.read_csv(filename)
    data = data.iloc[candle_no:candle_no+5]
    data['candleno'] = range (1, len(data) + 1)
    X = data['candleno'].values.reshape(-1, 1)
    Y = data['Close'].values.reshape(-1, 1)
    linear_regressor = LinearRegression()
    linear_regressor.fit(X, Y)
    y_pred = linear_regressor.predict(X) 
    
    Current_Market_Fit = r2_score(Y, y_pred).round(2)*100
    coeficient = (linear_regressor.coef_)

    if coeficient > 0:
        Current_Market= 1

    else:
        Current_Market = 0

    
    data = pd.read_csv(filename)
    data = data[['col_1','col_2','col_3','F_SMA_5','F_SMA_10']]

    indices, distances = find_k_similar_candles (candle_no,data)
    indices = indices[0:1][0]
    
    predicted_output = []
    recs = []
    for indice in indices[1:4]:
             
        Predicted_Market_Fit =0
        Predicted_Trade=''
    
        data = pd.read_csv(filename) 
        data = data.iloc[indice:indice+5]

        data['candleno'] = range (1, len(data) + 1)
        X = data['candleno'].values.reshape(-1, 1)
        Y = data['Close'].values.reshape(-1, 1)
        linear_regressor = LinearRegression()
        linear_regressor.fit(X, Y)
        y_pred = linear_regressor.predict(X)

        Predicted_Market_Fit= r2_score(Y, y_pred).round(2)*100
        coeficient = (linear_regressor.coef_)

        if coeficient > 0:
            Predicted_Trade = 'BUY'
            recs.append((r2_score(Y, y_pred).round(2)*100))
        else:
            Predicted_Trade = 'SELL'
            recs.append((r2_score(Y, y_pred).round(2)*100) * -1)
        
        predicted_output.append([Predicted_Market_Fit,Predicted_Trade])
        
        
    data_unseen = pd.DataFrame ({'Rec1': [recs[0]], 'Rec2': [recs[1]], 'Rec3':[recs[2]]})
    
    gbc_prediction = predict_model(GBC_FINAL_MODEL, data=data_unseen)
    GBC_Label = gbc_prediction['Label']
    GBC_Score = gbc_prediction['Score']
    
    lightgbm_prediction = predict_model(LIGHTGBM_FINAL_MODEL, data=data_unseen)
    LIGHTGBM_Label = lightgbm_prediction['Label']
    LIGHTGBM_Score = lightgbm_prediction['Score']
    
    result = {'Candle_No': candle_no,
              'Current_Market_Fit': Current_Market_Fit,
              'Current_Market': Current_Market,
#              'Rec1': predicted_output[0][0],
#              'Rec1_P': predicted_output[0][1],
#              'Rec2': predicted_output[1][0],
#              'Rec2_P': predicted_output[1][1],
#              'Rec3': predicted_output[2][0],
#              'Rec3_P': predicted_output[2][1],
              'GBC_Label': GBC_Label[0],
              'GBC_Score': GBC_Score[0],
              'LIGHTGBM_Label': LIGHTGBM_Label[0],
              'LIGHTGBM_Score': LIGHTGBM_Score[0],
             }
    
    result_output = result_output.append(result, ignore_index = True)

Transformation Pipeline and Model Successfully Loaded
Transformation Pipeline and Model Successfully Loaded
CPU times: user 27min 6s, sys: 1min 7s, total: 28min 14s
Wall time: 3min 2s


In [12]:
result_output.to_csv('Back_Test_Final_Result.csv', header = True, index = False)
result_output = pd.read_csv('Back_Test_Final_Result.csv')

In [13]:
result_output = result_output[result_output['Current_Market_Fit'] > 20]
result_output = result_output.reset_index()
del result_output['index']

In [14]:
result_output.to_csv('Back_Test_Final_Result.csv', header = True, index = False)
result_output = pd.read_csv('Back_Test_Final_Result.csv')

In [15]:
result_output.head()

Unnamed: 0,Candle_No,Current_Market_Fit,Current_Market,GBC_Label,GBC_Score,LIGHTGBM_Label,LIGHTGBM_Score
0,1967.0,96.0,1.0,0.0,0.8671,0.0,0.7809
1,3442.0,80.0,1.0,0.0,0.9071,1.0,0.6597
2,5785.0,95.0,0.0,0.0,0.9946,0.0,0.9407
3,7042.0,77.0,0.0,0.0,0.7271,0.0,0.7245
4,1798.0,89.0,1.0,1.0,0.9411,0.0,0.688
