## Back Test Data Generation

In [1]:
import requests
import time
import calendar
import dateutil.parser as parser
from dateutil.relativedelta import relativedelta
from datetime import datetime, timezone
import pandas as pd
import numpy as np
import warnings
import yaml
from sklearn.neighbors import NearestNeighbors
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import pytz
warnings.filterwarnings('ignore')

### Settings

In [2]:
with open ('back_test_pipeline_settings.yaml') as ymlfile:
    cfg = yaml.safe_load(ymlfile)
    k_number = cfg['knn']['k_number']
    metric = cfg['knn']['metric']
    algorithm = cfg['knn']['algorithm']
    feature_1 = cfg['knn']['feature_1']
    feature_2 = cfg['knn']['feature_2']
    feature_3 = cfg['knn']['feature_3']
    feature_7 = cfg['knn']['feature_7']
    feature_8 = cfg['knn']['feature_8']
    feature_15 = cfg['knn']['feature_15']
    volume = cfg['feature']['volume']
    volume_size = cfg['sample']['volume_size']
    sample_count = cfg['sample']['count']
    candles = cfg['recommendation']['candle_count']
    pair = cfg['currency']['pair']
    instrument = cfg['currency']['instrument']

In [3]:
print('K Number:',k_number)
print('Metric:', metric)
print('Algorithm:', algorithm)
print('Candle Volume Size:', volume_size)
print('Random Sample Count:', sample_count)
print('Future Candle Count:', candles)
print('Pair:', pair)
print('Instrument:', instrument)

K Number: 5
Metric: euclidean
Algorithm: brute
Candle Volume Size: 5500
Random Sample Count: 50
Future Candle Count: 7
Pair: EURUSD
Instrument: EUR_USD


In [4]:
def convert_date(utc_time): 
    parsed_date = parser.parse(utc_time)
    var_date=parsed_date.date()
    var_time=parsed_date.time()
    var_f_time=var_time.hour
    var_julian_date=parsed_date.timetuple().tm_yday
    var_weekday=parsed_date.weekday()
    var_weekday_name=calendar.day_name[parsed_date.weekday()]
    return var_date, var_time, var_f_time, var_julian_date, var_weekday, var_weekday_name

In [5]:
def find_k_similar_candles(candle_id, dataset, k = k_number):
    indices=[]
    distances = []
    output = []
    model_knn = NearestNeighbors(metric = metric, algorithm = algorithm) 
    model_knn.fit(dataset)
    
    #metric = 'euclidean' or 'cosine' or 'manhattan' or 'mahalanobis'
    
    distances, indices = model_knn.kneighbors(dataset.iloc[candle_id,:].values.reshape(1,-1),
                                              n_neighbors = k)

    for i in range(0,len(distances.flatten())):
        if i!=0:
            
            output.append ([dataset.index[indices.flatten()[i]],
                            distances.flatten()[i],
                            dataset.iloc[indices.flatten()[i]][feature_1],
                            dataset.iloc[indices.flatten()[i]][feature_2],
                            dataset.iloc[indices.flatten()[i]][feature_3],
                            dataset.iloc[indices.flatten()[i]][feature_7],
                            dataset.iloc[indices.flatten()[i]][feature_8],                            
                           ])
    
    output = pd.DataFrame(output)
    output.columns = ['Indice','Distance',
                      feature_1,
                      feature_2,
                      feature_3,
                      feature_7,
                      feature_8,
                     ]
   # display (output)
    
    return indices, distances

# <font color='red'>Test Configs</font>

In [6]:
filename = '{}_H4.csv'.format(instrument)
data = pd.read_csv(filename)

In [7]:
data.columns

Index(['Date', 'Time', 'f_time', 'julian_date', 'Weekday', 'Weekday_Name',
       'UTC_Time', 'Volume', 'Open', 'High', 'Low', 'Close', 'SMA_5', 'SMA_10',
       'SMA_20', 'F_SMA_5', 'F_SMA_10', 'F_SMA_20', 'O-H', 'O-L', 'O-C', 'H-L',
       'H-C', 'L-C', 'Direction', 'col_1', 'col_2', 'col_3', 'col_4', 'col_5',
       'Trend'],
      dtype='object')

In [8]:
data.shape

(9639, 31)

In [9]:
data.head()

Unnamed: 0,Date,Time,f_time,julian_date,Weekday,Weekday_Name,UTC_Time,Volume,Open,High,...,H-L,H-C,L-C,Direction,col_1,col_2,col_3,col_4,col_5,Trend
0,2015-12-22,22:00:00,22,356,1,Tuesday,2015-12-22T22:00:00.000000000Z,1383,1.09564,1.09573,...,0.00163,0.00145,-0.00018,0,0.00136,0.00145,0.00154,0.00132,-0.00141,0.0
1,2015-12-23,02:00:00,2,357,2,Wednesday,2015-12-23T02:00:00.000000000Z,936,1.0943,1.0948,...,0.00149,0.0008,-0.00069,0,0.0003,0.0008,0.00099,0.00136,0.00132,0.0
2,2015-12-23,06:00:00,6,357,2,Wednesday,2015-12-23T06:00:00.000000000Z,3950,1.09402,1.09424,...,0.00276,0.00198,-0.00078,0,0.00176,0.00198,0.00254,0.0003,0.00136,0.0
3,2015-12-23,10:00:00,10,357,2,Wednesday,2015-12-23T10:00:00.000000000Z,3717,1.09224,1.09346,...,0.0035,0.00337,-0.00013,0,0.00215,0.00337,0.00228,0.00176,0.0003,0.0
4,2015-12-23,14:00:00,14,357,2,Wednesday,2015-12-23T14:00:00.000000000Z,6342,1.09009,1.09136,...,0.00436,0.00318,-0.00118,0,0.00191,0.00318,0.00309,0.00215,0.00176,0.0


In [10]:
data.describe()

Unnamed: 0,f_time,julian_date,Weekday,Volume,Open,High,Low,Close,SMA_5,SMA_10,...,H-L,H-C,L-C,Direction,col_1,col_2,col_3,col_4,col_5,Trend
count,9639.0,9639.0,9639.0,9639.0,9639.0,9639.0,9639.0,9639.0,9639.0,9639.0,...,9639.0,9639.0,9639.0,9639.0,9639.0,9639.0,9639.0,9639.0,9639.0,9639.0
mean,11.36788,179.555971,2.068783,9232.743646,1.14318,1.144627,1.141756,1.143183,1.143183,1.143181,...,0.002871,0.001444,-0.001427,0.50472,-3e-06,0.001444,0.001424,-3e-06,-3e-06,0.347858
std,6.84964,106.665342,1.54484,9772.785796,0.04523,0.045203,0.045227,0.045229,0.045191,0.045156,...,0.001993,0.001515,0.001462,0.500004,0.002075,0.001515,0.00157,0.002075,0.002075,56.304042
min,1.0,1.0,0.0,1.0,1.03695,1.03915,1.03406,1.03698,1.0387,1.0395,...,0.0,0.0,-0.02835,0.0,-0.02738,0.0,0.0,-0.02738,-0.02738,-99.0
25%,5.0,85.0,1.0,2900.5,1.112145,1.113555,1.110615,1.112195,1.1123,1.1122,...,0.00154,0.00049,-0.00189,0.0,-0.00097,0.00049,0.00043,-0.00097,-0.00097,-50.0
50%,13.0,179.0,2.0,5702.0,1.1352,1.13666,1.13373,1.13518,1.135,1.135,...,0.00238,0.001,-0.00101,1.0,-2e-05,0.001,0.00095,-2e-05,-2e-05,0.0
75%,17.0,272.0,3.0,12125.5,1.178715,1.18008,1.17729,1.17871,1.1787,1.1785,...,0.00363,0.0019,-0.00048,1.0,0.00091,0.0019,0.0019,0.000915,0.000915,52.0
max,22.0,366.0,6.0,107306.0,1.25493,1.2556,1.25234,1.25494,1.2514,1.2491,...,0.03852,0.03486,0.0,1.0,0.03281,0.03486,0.03647,0.03281,0.03281,99.0


In [11]:
volume_med = data['Volume'].median()
volume_med

5702.0

## Selecting n random candles where their volume is more than 5500

In [12]:
print('Candle Volume Size:', volume_size)
print('Random Sample Count:', sample_count)

Candle Volume Size: 5500
Random Sample Count: 50


In [13]:
random_samples = data[data[volume] > volume_size].sample(n = sample_count)

In [14]:
#Random_Candles = np.random.randint(low=1, high=len(data)-40, size=1000)
Random_Candles = list(random_samples.index.values)

In [15]:
#show the fisrt 10 random generated candle numbers
Random_Candles[0:10]

[8870, 3250, 3336, 8112, 7080, 2863, 6575, 3243, 3612, 3325]

# <font color='red'>CANDLE LOOP</font>

In [16]:
CST = pytz.timezone('America/Chicago')
datetime_cst = datetime.now(CST)
print("Date & Time in CST : ", 
      datetime_cst.strftime('%Y:%m:%d %H:%M:%S %Z %z'))

Date & Time in CST :  2022:03:04 23:12:55 CST -0600


In [18]:
%%time

result_output = pd.DataFrame({'Candle_No':[],
                              'Current_Market_Fit':[],
                              'Current_Market':[],
                              
                              'Rec1_Close_Score':[],
                              'Rec1_High_Score':[],
                              'Rec1_Low_Score':[],
                              'Rec1_HH':[],
                              'Rec1_LL':[],
                              
                              'Rec2_Close_Score':[],
                              'Rec2_High_Score':[],
                              'Rec2_Low_Score':[],
                              'Rec2_HH':[],
                              'Rec2_LL':[],
                              
                              'Rec3_Close_Score':[],
                              'Rec3_High_Score':[],
                              'Rec3_Low_Score':[],
                              'Rec3_HH':[],
                              'Rec3_LL':[],
                              
                              'Rec4_Close_Score':[],
                              'Rec4_High_Score':[],
                              'Rec4_Low_Score':[],
                              'Rec4_HH':[],
                              'Rec4_LL':[],
                             })

for candle_no in Random_Candles:
    data = pd.read_csv(filename)
    data = data.iloc[candle_no:candle_no+candles]
    data['candleno'] = range (1, len(data) + 1)
    X = data['candleno'].values.reshape(-1, 1)
    Y = data['Close'].values.reshape(-1, 1)
    linear_regressor = LinearRegression()
    linear_regressor.fit(X, Y)
    y_pred = linear_regressor.predict(X) 
    
    Current_Market_Fit = r2_score(Y, y_pred)*100
    #print(Current_Market_Fit)
    coeficient = (linear_regressor.coef_)

    if coeficient > 0:
        Current_Market = 1  ## Bullish / Buy ##
    else:
        Current_Market = 0  ## Bearish / Sell ##
    
    data = pd.read_csv(filename)
    data = data[[feature_1,
                 feature_2,
                 feature_3,
                 feature_7,
                 feature_8,
                ]]

    indices, distances = find_k_similar_candles (candle_no,data)
    indices = indices[0:1][0]
    
    predicted_output_1 = []
    predicted_output_2 = []
    predicted_output_3 = []

    for indice in indices[1:5]:
             
        Predicted_Market_Fit =0
        Predicted_Trade=''
    
        data = pd.read_csv(filename) 
        data = data.iloc[indice:indice+candles]
        
        HH = data.iloc[0]['Close'] - data['High'].max()
        LL = data.iloc[0]['Close'] - data['Low'].min()
#        print("Close:", data.iloc[0]['Close'] , "High: ", data['High'].max(), 'Low: ', data['Low'].min())

        data['candleno'] = range (1, len(data) + 1)
        X = data['candleno'].values.reshape(-1, 1)
        
        Y = data['Close'].values.reshape(-1, 1)
        linear_regressor = LinearRegression()
        linear_regressor.fit(X, Y)
        y_pred = linear_regressor.predict(X)
        Predicted_Market_Fit= r2_score(Y, y_pred)*100
        coeficient = (linear_regressor.coef_)
        if coeficient > 0:
            Predicted_Trade = 1    ## Buy ##
        else:
            Predicted_Trade = -1   ## Sell ##
            
        predicted_output_1.append([Predicted_Market_Fit * Predicted_Trade, HH.round(4), LL.round(4)])
        
        Y = data['High'].values.reshape(-1, 1)
        linear_regressor = LinearRegression()
        linear_regressor.fit(X, Y)
        y_pred = linear_regressor.predict(X)
        Predicted_Market_Fit= r2_score(Y, y_pred)*100
        coeficient = (linear_regressor.coef_)
        if coeficient > 0:
            Predicted_Trade = 1    ## Buy ##
        else:
            Predicted_Trade = -1   ## Sell ##            
        
#        print (int(Predicted_Market_Fit) * Predicted_Trade)
        predicted_output_2.append([Predicted_Market_Fit * Predicted_Trade])
        
        Y = data['Low'].values.reshape(-1, 1)
        linear_regressor = LinearRegression()
        linear_regressor.fit(X, Y)
        y_pred = linear_regressor.predict(X)
        Predicted_Market_Fit= r2_score(Y, y_pred)*100
        coeficient = (linear_regressor.coef_)
        if coeficient > 0:
            Predicted_Trade = 1    ## Buy ##
        else:
            Predicted_Trade = -1   ## Sell ##
        
#        print (int(Predicted_Market_Fit) * Predicted_Trade)
        predicted_output_3.append([Predicted_Market_Fit * Predicted_Trade])
        
    
    result = {'Candle_No': candle_no,
              'Current_Market_Fit': Current_Market_Fit,
              'Current_Market': Current_Market,
              
              'Rec1_Close_Score': predicted_output_1[0][0],
              'Rec1_High_Score': predicted_output_2[0][0],
              'Rec1_Low_Score': predicted_output_3[0][0],
              'Rec1_HH': predicted_output_1[0][1],
              'Rec1_LL': predicted_output_1[0][2],
              
              'Rec2_Close_Score': predicted_output_1[0][0],
              'Rec2_High_Score': predicted_output_2[0][0],
              'Rec2_Low_Score': predicted_output_3[0][0],
              'Rec2_HH': predicted_output_1[1][1],
              'Rec2_LL': predicted_output_1[1][2],
              
              'Rec3_Close_Score': predicted_output_1[0][0],
              'Rec3_High_Score': predicted_output_2[0][0],
              'Rec3_Low_Score': predicted_output_3[0][0],
              'Rec3_HH': predicted_output_1[2][1],
              'Rec3_LL': predicted_output_1[2][2],
              
              'Rec4_Close_Score': predicted_output_1[0][0],
              'Rec4_High_Score': predicted_output_2[0][0],
              'Rec4_Low_Score': predicted_output_3[0][0],
              'Rec4_HH': predicted_output_1[3][1],
              'Rec4_LL': predicted_output_1[3][2],
             }
    
    result_output = result_output.append(result, ignore_index = True)

CPU times: user 43.5 s, sys: 9.37 s, total: 52.9 s
Wall time: 12.5 s


In [19]:
now = datetime.now()
today = now.strftime("%d-%m-%Y_%I-%M_%p")

In [20]:
result_output.to_csv('01_Back_Test_Data.csv', header = True, index = False)
result_output.to_csv(today + "_" + "Back_Test_Data_" + pair + '.csv', header = True, index = False)
result_output = pd.read_csv(today + "_" + "Back_Test_Data_" + pair + '.csv')

In [21]:
result_output.head(10)

Unnamed: 0,Candle_No,Current_Market_Fit,Current_Market,Rec1_Close_Score,Rec1_High_Score,Rec1_Low_Score,Rec1_HH,Rec1_LL,Rec2_Close_Score,Rec2_High_Score,...,Rec3_Close_Score,Rec3_High_Score,Rec3_Low_Score,Rec3_HH,Rec3_LL,Rec4_Close_Score,Rec4_High_Score,Rec4_Low_Score,Rec4_HH,Rec4_LL
0,8870.0,80.719208,0.0,-36.771012,-79.682204,-31.915608,-0.0009,0.0068,-36.771012,-79.682204,...,-36.771012,-79.682204,-31.915608,-0.0012,0.0036,-36.771012,-79.682204,-31.915608,-0.0047,0.0014
1,3250.0,64.866329,0.0,-77.126268,-93.845323,-76.450445,-0.0025,0.0074,-77.126268,-93.845323,...,-77.126268,-93.845323,-76.450445,-0.0062,0.0083,-77.126268,-93.845323,-76.450445,-0.0082,0.0054
2,3336.0,12.713084,0.0,45.017514,18.506163,-27.395539,-0.0056,0.0047,45.017514,18.506163,...,45.017514,18.506163,-27.395539,-0.0088,0.0009,45.017514,18.506163,-27.395539,-0.0032,0.0092
3,8112.0,1.616715,1.0,84.74047,17.544092,48.010906,-0.0053,0.0007,84.74047,17.544092,...,84.74047,17.544092,48.010906,-0.0051,0.0019,84.74047,17.544092,48.010906,-0.0048,0.0026
4,7080.0,72.13194,1.0,-0.966798,0.011288,-7.090035,-0.0008,0.0035,-0.966798,0.011288,...,-0.966798,0.011288,-7.090035,-0.0008,0.0026,-0.966798,0.011288,-7.090035,-0.0008,0.0043
5,2863.0,76.376948,0.0,-72.644422,-85.902056,-72.01024,-0.001,0.0049,-72.644422,-85.902056,...,-72.644422,-85.902056,-72.01024,-0.0023,0.0053,-72.644422,-85.902056,-72.01024,-0.0045,0.0011
6,6575.0,28.167252,0.0,-12.434133,-23.742347,-57.432837,-0.0075,0.0058,-12.434133,-23.742347,...,-12.434133,-23.742347,-57.432837,-0.0055,0.0041,-12.434133,-23.742347,-57.432837,-0.0055,0.005
7,3243.0,10.65787,0.0,0.008648,28.555086,26.383411,-0.0115,0.0046,0.008648,28.555086,...,0.008648,28.555086,26.383411,-0.0018,0.0043,0.008648,28.555086,26.383411,-0.0047,0.0048
8,3612.0,94.38092,0.0,-8.158948,-1.010471,16.020134,-0.0017,0.0042,-8.158948,-1.010471,...,-8.158948,-1.010471,16.020134,-0.001,0.0066,-8.158948,-1.010471,16.020134,-0.0028,0.0039
9,3325.0,30.900649,1.0,90.345856,89.896352,93.811979,-0.0196,0.0004,90.345856,89.896352,...,90.345856,89.896352,93.811979,-0.0017,0.006,90.345856,89.896352,93.811979,-0.0187,0.0004


In [22]:
result_output.shape

(50, 23)

In [23]:
result_output.isnull().sum()

Candle_No             0
Current_Market_Fit    0
Current_Market        0
Rec1_Close_Score      0
Rec1_High_Score       0
Rec1_Low_Score        0
Rec1_HH               0
Rec1_LL               0
Rec2_Close_Score      0
Rec2_High_Score       0
Rec2_Low_Score        0
Rec2_HH               0
Rec2_LL               0
Rec3_Close_Score      0
Rec3_High_Score       0
Rec3_Low_Score        0
Rec3_HH               0
Rec3_LL               0
Rec4_Close_Score      0
Rec4_High_Score       0
Rec4_Low_Score        0
Rec4_HH               0
Rec4_LL               0
dtype: int64

#### Generating Log File

In [24]:
file = open(today + "_" + "data_generation_log_" + pair + '.txt', "w")
file.write ("Date: " + today + "\n" + \
            "Currency Pair: " + pair + "\n" + \
            "K_Number: " + str(k_number) + "\n" + \
            "KNN_Metric: " + metric + "\n" + \
            "KNN_Algorithm: " + algorithm + "\n" + \
            "Feature: " + feature_1 + "\n" + \
            "Feature: " + feature_2 + "\n" + \
            "Feature: " + feature_3 + "\n" + \
            "Feature: " + feature_7 + "\n" + \
            "Feature: " + feature_8 + "\n" + \
            "Volume Size: " + str(volume_size) + "\n" + \
            "Sample Count: " + str(sample_count) + "\n" + \
            "Candle Counts: " + str(candles) + "\n"
           )
file.close()