## Back Test Data Generation

In [1]:
import requests
import time
import calendar
import dateutil.parser as parser
from dateutil.relativedelta import relativedelta
from datetime import datetime, timezone
import pandas as pd
import numpy as np
import warnings
import yaml
from sklearn.neighbors import NearestNeighbors
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import pytz
warnings.filterwarnings('ignore')

### Settings

In [2]:
with open ('back_test_pipeline_settings.yaml') as ymlfile:
    cfg = yaml.safe_load(ymlfile)
    k_number = cfg['knn']['k_number']
    metric = cfg['knn']['metric']
    algorithm = cfg['knn']['algorithm']
    feature_1 = cfg['knn']['feature_1']
    feature_2 = cfg['knn']['feature_2']
    feature_3 = cfg['knn']['feature_3']
    feature_7 = cfg['knn']['feature_7']
    feature_8 = cfg['knn']['feature_8']
    feature_15 = cfg['knn']['feature_15']
    volume = cfg['feature']['volume']
    volume_size = cfg['sample']['volume_size']
    sample_count = cfg['sample']['count']
    candles = cfg['recommendation']['candle_count']
    pair = cfg['currency']['pair']
    instrument = cfg['currency']['instrument']

In [3]:
print('K Number:',k_number)
print('Metric:', metric)
print('Algorithm:', algorithm)
print('Candle Volume Size:', volume_size)
print('Random Sample Count:', sample_count)
print('Future Candle Count:', candles)
print('Pair:', pair)
print('Instrument:', instrument)

K Number: 5
Metric: euclidean
Algorithm: brute
Candle Volume Size: 5500
Random Sample Count: 10
Future Candle Count: 7
Pair: EURUSD
Instrument: EUR_USD


In [4]:
def convert_date(utc_time): 
    parsed_date = parser.parse(utc_time)
    var_date=parsed_date.date()
    var_time=parsed_date.time()
    var_f_time=var_time.hour
    var_julian_date=parsed_date.timetuple().tm_yday
    var_weekday=parsed_date.weekday()
    var_weekday_name=calendar.day_name[parsed_date.weekday()]
    return var_date, var_time, var_f_time, var_julian_date, var_weekday, var_weekday_name

In [5]:
def find_k_similar_candles(candle_id, dataset, k = k_number):
    indices=[]
    distances = []
    output = []
    model_knn = NearestNeighbors(metric = metric, algorithm = algorithm) 
    model_knn.fit(dataset)
    
    #metric = 'euclidean' or 'cosine' or 'manhattan' or 'mahalanobis'
    
    distances, indices = model_knn.kneighbors(dataset.iloc[candle_id,:].values.reshape(1,-1),
                                              n_neighbors = k)

    for i in range(0,len(distances.flatten())):
        if i!=0:
            
            output.append ([dataset.index[indices.flatten()[i]],
                            distances.flatten()[i],
                            dataset.iloc[indices.flatten()[i]][feature_1],
                            dataset.iloc[indices.flatten()[i]][feature_2],
                            dataset.iloc[indices.flatten()[i]][feature_3],
                            dataset.iloc[indices.flatten()[i]][feature_15],
#                            dataset.iloc[indices.flatten()[i]][feature_8],                            
                           ])
    
    output = pd.DataFrame(output)
    output.columns = ['Indice','Distance',
                      feature_1,
                      feature_2,
                      feature_3,
                      feature_15,
#                      feature_8,
                     ]
   # display (output)
    
    return indices, distances

# <font color='red'>Test Configs</font>

In [6]:
filename = '{}_H4.csv'.format(instrument)
data = pd.read_csv(filename)

In [7]:
data.columns

Index(['Date', 'Time', 'f_time', 'julian_date', 'Weekday', 'Weekday_Name',
       'UTC_Time', 'Volume', 'Open', 'High', 'Low', 'Close', 'SMA_5', 'SMA_10',
       'SMA_20', 'F_SMA_5', 'F_SMA_10', 'F_SMA_20', 'O-H', 'O-L', 'O-C', 'H-L',
       'H-C', 'L-C', 'Direction', 'col_1', 'col_2', 'col_3', 'col_4', 'col_5',
       'Trend'],
      dtype='object')

In [8]:
data.shape

(9651, 31)

In [9]:
data.head()

Unnamed: 0,Date,Time,f_time,julian_date,Weekday,Weekday_Name,UTC_Time,Volume,Open,High,...,H-L,H-C,L-C,Direction,col_1,col_2,col_3,col_4,col_5,Trend
0,2015-12-10,06:00:00,6,344,3,Thursday,2015-12-10T06:00:00.000000000Z,5212,1.10018,1.10028,...,0.00416,0.00346,-0.0007,0,0.00336,0.0001,0.0007,0.00053,0.00176,0.0
1,2015-12-10,10:00:00,10,344,3,Thursday,2015-12-10T10:00:00.000000000Z,8492,1.0968,1.09725,...,0.00391,0.00013,-0.00378,1,-0.00032,0.00045,0.00378,0.00336,0.00053,0.0
2,2015-12-10,14:00:00,14,344,3,Thursday,2015-12-10T14:00:00.000000000Z,10868,1.09708,1.09712,...,0.00459,0.0024,-0.00219,0,0.00236,4e-05,0.00219,-0.00032,0.00336,0.0
3,2015-12-10,18:00:00,18,344,3,Thursday,2015-12-10T18:00:00.000000000Z,3356,1.09469,1.09566,...,0.00264,0.00157,-0.00107,0,0.0006,0.00097,0.00107,0.00236,-0.00032,0.0
4,2015-12-10,22:00:00,22,344,3,Thursday,2015-12-10T22:00:00.000000000Z,1840,1.09407,1.09477,...,0.00211,0.00153,-0.00058,0,0.00083,0.0007,0.00058,0.0006,0.00236,0.0


In [10]:
data.describe()

Unnamed: 0,f_time,julian_date,Weekday,Volume,Open,High,Low,Close,SMA_5,SMA_10,...,H-L,H-C,L-C,Direction,col_1,col_2,col_3,col_4,col_5,Trend
count,9651.0,9651.0,9651.0,9651.0,9651.0,9651.0,9651.0,9651.0,9651.0,9651.0,...,9651.0,9651.0,9651.0,9651.0,9651.0,9651.0,9651.0,9651.0,9651.0,9651.0
mean,11.368666,180.972334,2.067351,9065.3381,1.143028,1.144476,1.141608,1.143034,1.143028,1.143018,...,0.002868,0.001442,-0.001426,0.505129,-6e-06,0.001448,0.001426,-6e-06,-6e-06,0.500259
std,6.849653,107.043578,1.54545,9360.259721,0.045314,0.045285,0.04531,0.045312,0.045278,0.045247,...,0.001987,0.001512,0.00146,0.5,0.002068,0.001539,0.00146,0.002068,0.002068,56.294224
min,1.0,1.0,0.0,1.0,1.03695,1.03915,1.03406,1.03698,1.0387,1.0395,...,0.0,0.0,-0.02835,0.0,-0.02738,0.0,0.0,-0.02738,-0.02738,-99.0
25%,5.0,87.0,1.0,2892.0,1.111625,1.11306,1.110135,1.1117,1.11165,1.1115,...,0.00154,0.00049,-0.00189,0.0,-0.00097,0.00046,0.00048,-0.00097,-0.00097,-50.0
50%,13.0,180.0,2.0,5672.0,1.13516,1.13664,1.13368,1.13513,1.135,1.1349,...,0.00238,0.001,-0.00101,1.0,-2e-05,0.00101,0.00101,-2e-05,-2e-05,0.0
75%,17.0,274.0,3.0,11984.0,1.1787,1.180055,1.17727,1.178685,1.1786,1.1785,...,0.00363,0.0019,-0.00048,1.0,0.00091,0.00192,0.00189,0.00091,0.00091,52.0
max,22.0,366.0,6.0,107306.0,1.25493,1.2556,1.25234,1.25494,1.2514,1.2491,...,0.03852,0.03486,0.0,1.0,0.03281,0.02982,0.02835,0.03281,0.03281,99.0


In [11]:
volume_med = data['Volume'].median()
volume_med

5672.0

## Selecting n random candles where their volume is more than 5500

In [12]:
print('Candle Volume Size:', volume_size)
print('Random Sample Count:', sample_count)

Candle Volume Size: 5500
Random Sample Count: 10


In [13]:
random_samples = data[data[volume] > volume_size].sample(n = sample_count)

In [14]:
#Random_Candles = np.random.randint(low=1, high=len(data)-40, size=1000)
Random_Candles = list(random_samples.index.values)

In [15]:
#show the fisrt 10 random generated candle numbers
Random_Candles[0:10]

[6947, 8687, 5495, 4359, 6932, 6875, 3310, 7277, 3554, 3267]

# <font color='red'>CANDLE LOOP</font>

In [28]:
CST = pytz.timezone('America/Chicago')
datetime_cst = datetime.now(CST)
print("Date & Time in CST : ", 
      datetime_cst.strftime('%Y:%m:%d %H:%M:%S %Z %z'))

Date & Time in CST :  2022:02:23 22:01:13 CST -0600


In [29]:
%%time

result_output = pd.DataFrame({'Candle_No':[],
                              'Current_Market_Fit':[],
                              'Current_Market':[],
                              
                              'Rec1_Close_Score':[],
                              'Rec1_High_Score':[],
                              'Rec1_Low_Score':[],
                              'Rec1_HH':[],
                              'Rec1_LL':[],
                              
                              'Rec2_Close_Score':[],
                              'Rec2_High_Score':[],
                              'Rec2_Low_Score':[],
                              'Rec2_HH':[],
                              'Rec2_LL':[],
                              
                              'Rec3_Close_Score':[],
                              'Rec3_High_Score':[],
                              'Rec3_Low_Score':[],
                              'Rec3_HH':[],
                              'Rec3_LL':[],
                              
                              'Rec4_Close_Score':[],
                              'Rec4_High_Score':[],
                              'Rec4_Low_Score':[],
                              'Rec4_HH':[],
                              'Rec4_LL':[],
                             })

for candle_no in Random_Candles:
    data = pd.read_csv(filename)
    data = data.iloc[candle_no:candle_no+candles]
    data['candleno'] = range (1, len(data) + 1)
    X = data['candleno'].values.reshape(-1, 1)
    Y = data['Close'].values.reshape(-1, 1)
    linear_regressor = LinearRegression()
    linear_regressor.fit(X, Y)
    y_pred = linear_regressor.predict(X) 
    
    Current_Market_Fit = int(r2_score(Y, y_pred)*100)
    #print(Current_Market_Fit)
    coeficient = (linear_regressor.coef_)

    if coeficient > 0:
        Current_Market = 1  ## Bullish / Buy ##
    else:
        Current_Market = 0  ## Bearish / Sell ##
    
    data = pd.read_csv(filename)
    data = data[[feature_1,
                 feature_2,
                 feature_3,
                 feature_15,
#                 feature_8,
                ]]

    indices, distances = find_k_similar_candles (candle_no,data)
    indices = indices[0:1][0]
    
    predicted_output_1 = []
    predicted_output_2 = []
    predicted_output_3 = []

    for indice in indices[1:5]:
             
        Predicted_Market_Fit =0
        Predicted_Trade=''
    
        data = pd.read_csv(filename) 
        data = data.iloc[indice:indice+candles]
        
        HH = data.iloc[0]['Close'] - data['High'].max()
        LL = data.iloc[0]['Close'] - data['Low'].min()
#        print("Close:", data.iloc[0]['Close'] , "High: ", data['High'].max(), 'Low: ', data['Low'].min())

        data['candleno'] = range (1, len(data) + 1)
        X = data['candleno'].values.reshape(-1, 1)
        
        Y = data['Close'].values.reshape(-1, 1)
        linear_regressor = LinearRegression()
        linear_regressor.fit(X, Y)
        y_pred = linear_regressor.predict(X)
        Predicted_Market_Fit= r2_score(Y, y_pred)*100
        coeficient = (linear_regressor.coef_)
        if coeficient > 0:
            Predicted_Trade = 1    ## Buy ##
        else:
            Predicted_Trade = -1   ## Sell ##
            
        predicted_output_1.append([int(Predicted_Market_Fit) * Predicted_Trade, HH.round(4), LL.round(4)])
        
        Y = data['High'].values.reshape(-1, 1)
        linear_regressor = LinearRegression()
        linear_regressor.fit(X, Y)
        y_pred = linear_regressor.predict(X)
        Predicted_Market_Fit= r2_score(Y, y_pred)*100
        coeficient = (linear_regressor.coef_)
        if coeficient > 0:
            Predicted_Trade = 1    ## Buy ##
        else:
            Predicted_Trade = -1   ## Sell ##            
        
#        print (int(Predicted_Market_Fit) * Predicted_Trade)
        predicted_output_2.append([int(Predicted_Market_Fit) * Predicted_Trade])
        
        Y = data['Low'].values.reshape(-1, 1)
        linear_regressor = LinearRegression()
        linear_regressor.fit(X, Y)
        y_pred = linear_regressor.predict(X)
        Predicted_Market_Fit= r2_score(Y, y_pred)*100
        coeficient = (linear_regressor.coef_)
        if coeficient > 0:
            Predicted_Trade = 1    ## Buy ##
        else:
            Predicted_Trade = -1   ## Sell ##
        
#        print (int(Predicted_Market_Fit) * Predicted_Trade)
        predicted_output_3.append([int(Predicted_Market_Fit) * Predicted_Trade])
        
    
    result = {'Candle_No': candle_no,
              'Current_Market_Fit': Current_Market_Fit,
              'Current_Market': Current_Market,
              
              'Rec1_Close_Score': predicted_output_1[0][0],
              'Rec1_High_Score': predicted_output_2[0][0],
              'Rec1_Low_Score': predicted_output_3[0][0],
              'Rec1_HH': predicted_output_1[0][1],
              'Rec1_LL': predicted_output_1[0][2],
              
              'Rec2_Close_Score': predicted_output_1[0][0],
              'Rec2_High_Score': predicted_output_2[0][0],
              'Rec2_Low_Score': predicted_output_3[0][0],
              'Rec2_HH': predicted_output_1[1][1],
              'Rec2_LL': predicted_output_1[1][2],
              
              'Rec3_Close_Score': predicted_output_1[0][0],
              'Rec3_High_Score': predicted_output_2[0][0],
              'Rec3_Low_Score': predicted_output_3[0][0],
              'Rec3_HH': predicted_output_1[2][1],
              'Rec3_LL': predicted_output_1[2][2],
              
              'Rec4_Close_Score': predicted_output_1[0][0],
              'Rec4_High_Score': predicted_output_2[0][0],
              'Rec4_Low_Score': predicted_output_3[0][0],
              'Rec4_HH': predicted_output_1[3][1],
              'Rec4_LL': predicted_output_1[3][2],
             }
    
    result_output = result_output.append(result, ignore_index = True)

CPU times: user 8.69 s, sys: 1.81 s, total: 10.5 s
Wall time: 2.4 s


In [30]:
now = datetime.now()
today = now.strftime("%d-%m-%Y_%I-%M_%p")

In [31]:
result_output.to_csv('01_Back_Test_Data.csv', header = True, index = False)
result_output.to_csv(today + "_" + "Back_Test_Data_" + pair + '.csv', header = True, index = False)
result_output = pd.read_csv(today + "_" + "Back_Test_Data_" + pair + '.csv')

In [32]:
result_output.head(10)

Unnamed: 0,Candle_No,Current_Market_Fit,Current_Market,Rec1_Close_Score,Rec1_High_Score,Rec1_Low_Score,Rec1_HH,Rec1_LL,Rec2_Close_Score,Rec2_High_Score,...,Rec3_Close_Score,Rec3_High_Score,Rec3_Low_Score,Rec3_HH,Rec3_LL,Rec4_Close_Score,Rec4_High_Score,Rec4_Low_Score,Rec4_HH,Rec4_LL
0,6947.0,70.0,1.0,-42.0,-63.0,-40.0,-0.0027,0.0113,-42.0,-63.0,...,-42.0,-63.0,-40.0,-0.0065,0.0035,-42.0,-63.0,-40.0,-0.0029,0.0038
1,8687.0,38.0,0.0,-7.0,7.0,-9.0,-0.0083,0.0038,-7.0,7.0,...,-7.0,7.0,-9.0,-0.0035,0.0023,-7.0,7.0,-9.0,-0.0108,0.0016
2,5495.0,17.0,1.0,46.0,60.0,61.0,-0.0086,0.0019,46.0,60.0,...,46.0,60.0,61.0,-0.0043,0.0021,46.0,60.0,61.0,-0.007,0.009
3,4359.0,70.0,0.0,18.0,44.0,49.0,-0.0038,0.0048,18.0,44.0,...,18.0,44.0,49.0,-0.0024,0.0036,18.0,44.0,49.0,-0.0066,0.003
4,6932.0,83.0,1.0,78.0,-1.0,34.0,-0.0024,0.0008,78.0,-1.0,...,78.0,-1.0,34.0,-0.0035,0.0008,78.0,-1.0,34.0,-0.0063,0.0007
5,6875.0,52.0,1.0,-91.0,-88.0,-93.0,-0.0019,0.0073,-91.0,-88.0,...,-91.0,-88.0,-93.0,-0.0018,0.0142,-91.0,-88.0,-93.0,-0.0041,0.0027
6,3310.0,46.0,1.0,-17.0,-1.0,-1.0,-0.0014,0.0055,-17.0,-1.0,...,-17.0,-1.0,-1.0,-0.0112,0.0028,-17.0,-1.0,-1.0,-0.0064,0.0023
7,7277.0,94.0,1.0,-54.0,-91.0,-52.0,-0.0006,0.0068,-54.0,-91.0,...,-54.0,-91.0,-52.0,-0.0021,0.0058,-54.0,-91.0,-52.0,-0.0013,0.0075
8,3554.0,10.0,0.0,-11.0,-44.0,-35.0,-0.003,0.0043,-11.0,-44.0,...,-11.0,-44.0,-35.0,-0.0027,0.0091,-11.0,-44.0,-35.0,-0.005,0.0027
9,3267.0,25.0,1.0,-19.0,48.0,-14.0,-0.0037,0.0036,-19.0,48.0,...,-19.0,48.0,-14.0,-0.0034,0.0028,-19.0,48.0,-14.0,-0.0029,0.01


In [33]:
result_output.shape

(10, 23)

#### Generating Log File

In [None]:
file = open(today + "_" + "data_generation_log_" + pair + '.txt', "w")
file.write ("Date: " + today + "\n" + \
            "Currency Pair: " + pair + "\n" + \
            "K_Number: " + str(k_number) + "\n" + \
            "KNN_Metric: " + metric + "\n" + \
            "KNN_Algorithm: " + algorithm + "\n" + \
            "Feature: " + feature_1 + "\n" + \
            "Feature: " + feature_2 + "\n" + \
            "Feature: " + feature_3 + "\n" + \
            "Feature: " + feature_7 + "\n" + \
            "Feature: " + feature_8 + "\n" + \
            "Volume Size: " + str(volume_size) + "\n" + \
            "Sample Count: " + str(sample_count) + "\n" + \
            "Candle Counts: " + str(candles) + "\n"
           )
file.close()