In [1]:
import requests
import json
import time
import math
import re
import calendar
import dateutil.parser as parser
from dateutil.relativedelta import relativedelta
from datetime import datetime, timezone
import yaml
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
from sklearn.neighbors import NearestNeighbors
from yellowbrick.cluster import KElbowVisualizer, SilhouetteVisualizer
import os, sys, glob
import kaleido
from PIL import Image
from fpdf import FPDF
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

In [2]:
def convert_date(utc_time): 
    parsed_date = parser.parse(utc_time)
    var_date=parsed_date.date()
    var_time=parsed_date.time()
    var_f_time=var_time.hour
    var_julian_date=parsed_date.timetuple().tm_yday
    var_weekday=parsed_date.weekday()
    var_weekday_name=calendar.day_name[parsed_date.weekday()]
    return var_date, var_time, var_f_time, var_julian_date, var_weekday, var_weekday_name

In [3]:
def eda(data):
    output=[]
    for col in data.columns:
        duplicatedvalue = data[col].duplicated().sum()
        duplicatedrows = data.duplicated().sum()
        missingvalue = np.sum(pd.isna(data[col]))
        uniquevalue = data[col].nunique()
        datatype = str(data[col].dtype)
        
        output.append([col, duplicatedvalue, duplicatedrows, missingvalue, uniquevalue, datatype])
        
    output = pd.DataFrame(output) 
    output.columns = ['Features', 'Duplicated Values', 'Duplicated Rows', 'Missing Values', 'Unique Values', 'Data Type']
    display(output)

In [4]:
def get_last_value(value):
    last = value
    return last

In [5]:
def perc_on_bar(plot, feature):
    total = len(feature)
    for p in ax.patches:
        percentage = "{:.1f}%".format(100 * p.get_height() / total)
        x = p.get_x() + p.get_width() / 2 - 0.05
        y = p.get_y() + p.get_height()
        ax.annotate(percentage, (x, y), size=12)
    plt.show()

In [6]:
def viz(data, indice):
    fig = go.Figure(data=[go.Candlestick(x=data['UTC_Time'],
                open=data['Open'],
                high=data['High'],
                low=data['Low'],
                close=data['Close'])])

    fig.update_layout(xaxis_rangeslider_visible=False,
                      title="Neighbor: " + indice + " " + pair + " " + timeframe + " " + now.strftime("%Y-%m-%d %H:%M:%S"),
                      title_font_color="blue",
                      title_font_size = 20)
    
    fig.update_xaxes(rangebreaks=[dict(bounds=["sat", "mon"])])
    
    fig.write_image(path + "/" + indice + "_chart.png")
    
    fig.show()

# <font color='red'>Currency Pair</font>

In [7]:
Load_10K_Records=True


asia = ['EUR_NZD','EUR_AUD','GBP_NZD','GBP_AUD','AUD_USD','AUD_CAD','AUD_CHF','AUD_NZD','NZD_USD','NZD_CHF','NZD_CAD']

currency_pairs = ['EUR_USD','EUR_GBP','EUR_NZD','EUR_AUD','EUR_CHF','EUR_CAD',
                  'GBP_USD','GBP_CHF','GBP_NZD','GBP_AUD','GBP_CAD','AUD_USD',
                  'AUD_CAD','AUD_CHF','AUD_NZD','NZD_USD','NZD_CHF','NZD_CAD',
                  'USD_CAD','USD_CHF','CAD_CHF']

currency_pairs = ["EUR_USD"]


timeframe = "H4"
#D #H1 #H4 M30
# https://developer.oanda.com/rest-live-v20/instrument-df/#CandlestickGranularity
price_char = "M"
#M(midpoint candles) #B(bid candles) #A(ask candles) #BA
price_com = "mid"
#mid #bid #ask


# <font color='red'>Test Configs</font>

In [8]:
Test_Candle = [2108, 8371, 4614, 8207, 2281, 5172, 3342, 121, 8607, 6387, 9303, 3835, 2134, 6930, 6858, 7301, 8099, 985, 5419, 7471, 1457, 8910, 3841, 704, 7308, 8734, 7172, 7385, 1613, 5421, 7061, 1088, 4383, 9123, 166, 9414, 3899, 4915, 988, 5856, 4308, 742, 5213, 6201, 3898, 1346, 9255, 2498, 6305, 4755, 837, 4628, 805, 3896, 5168, 237, 6147, 4220, 3683, 6668, 989, 216, 4455, 3231, 3456, 8141, 629, 8250, 8351, 8648, 2499, 1818, 1253, 22, 3337, 8189, 2423, 5209, 1426, 4332, 3663, 8871, 1620, 8493, 7271, 214, 6443, 97, 2524, 1767, 7839, 3644, 6230, 7605, 4921, 5945, 2162, 3773, 7216, 2936]

### Logging

In [9]:
filename = "BackTest_{}.csv".format(currency_pairs[0])
data = pd.read_csv(filename)

In [10]:
data.shape

(18, 29)

In [11]:
data.head(2)

Unnamed: 0,Date,Time,f_time,julian_date,Weekday,Weekday_Name,UTC_Time,Volume,Open,High,...,Direction,col_1,col_2,col_3,col_4,col_5,SMA_10,SMA_20,F_SMA_10,F_SMA_20
0,2021-10-26,17:00:00,17,299,1,Tuesday,2021-10-26T17:00:00.000000000Z,5432,1.15872,1.1601,...,1,-0.00092,0.00138,0.00096,0.00308,-0.00093,1.160292,1.160797,-0.000652,-0.001157
1,2021-10-26,21:00:00,21,299,1,Tuesday,2021-10-26T21:00:00.000000000Z,2833,1.15925,1.16043,...,1,-0.00099,0.00118,0.00124,-0.00092,0.00308,1.16025,1.160321,-1e-05,-8.1e-05


In [12]:
data.tail(2)

Unnamed: 0,Date,Time,f_time,julian_date,Weekday,Weekday_Name,UTC_Time,Volume,Open,High,...,Direction,col_1,col_2,col_3,col_4,col_5,SMA_10,SMA_20,F_SMA_10,F_SMA_20
16,2021-10-29,09:00:00,9,302,4,Friday,2021-10-29T09:00:00.000000000Z,24663,1.16511,1.16569,...,0,0.00255,0.00058,0.00045,0.00135,0.00202,1.166164,1.164063,-0.003604,-0.001503
17,2021-10-29,13:00:00,13,302,4,Friday,2021-10-29T13:00:00.000000000Z,39245,1.16258,1.1636,...,0,0.00872,0.00102,0.00034,0.00255,0.00135,1.163276,1.163522,-0.009416,-0.009662


## Calculate Average True Range (ATR)

In [13]:
high_low = data['High'] - data['Low']
high_cp = np.abs(data['High'] - data['Close'].shift())
low_cp = np.abs(data['Low'] - data['Close'].shift())
df = pd.concat([high_low, high_cp, low_cp], axis=1)
true_range = np.max(df, axis=1)
data['ATR_14'] = true_range.rolling(14).mean()

In [14]:
data = data.drop_duplicates()
data = data.to_csv(filename, header = True, index = False)
data = pd.read_csv(filename)

In [15]:
data.tail(2)

Unnamed: 0,Date,Time,f_time,julian_date,Weekday,Weekday_Name,UTC_Time,Volume,Open,High,...,Direction,col_1,col_2,col_3,col_4,col_5,SMA_10,SMA_20,F_SMA_10,F_SMA_20
16,2021-10-29,09:00:00,9,302,4,Friday,2021-10-29T09:00:00.000000000Z,24663,1.16511,1.16569,...,0,0.00255,0.00058,0.00045,0.00135,0.00202,1.166164,1.164063,-0.003604,-0.001503
17,2021-10-29,13:00:00,13,302,4,Friday,2021-10-29T13:00:00.000000000Z,39245,1.16258,1.1636,...,0,0.00872,0.00102,0.00034,0.00255,0.00135,1.163276,1.163522,-0.009416,-0.009662


# Feature Engineering

### CandleStick Data

In [16]:
data['O-H'] = data['Open'] - data['High']
data['O-L'] = data['Open'] - data['Low']
data['O-C'] = data['Open'] - data['Close']
data['H-L'] = data['High'] - data['Low']
data['H-C'] = data['High'] - data['Close']
data['L-C'] = data['Low'] - data['Close']

data['Direction'] = data['O-C'].apply(lambda x: 1 if x<0 else 0)

data['col_1'] = data['Open'] - data['Close']

for value in data['col_1']:   
    if value > 0:
        data['col_2'] = data['High'] - data['Open']
        data['col_3'] = data['Close'] - data['Low']
    else:
        data['col_2'] = data['High'] - data['Close']
        data['col_3'] = data['Open'] - data['Low']

#Two Previous Candlesticks 
data['col_4'] = data['col_1'].shift(1)
data['col_5'] = data['col_1'].shift(2)

#Moving Avarage
data['SMA_10'] = data['Close'].rolling(window=5).mean()
data['SMA_20'] = data['Close'].rolling(window=10).mean()

#Feature Engineering Moving Avarage 
data['F_SMA_10'] = data['Close'] - data['SMA_10']
data['F_SMA_20'] = data['Close'] - data['SMA_20']

In [17]:
data = data.dropna()

In [18]:
data = data.to_csv(filename, header = True, index = False)

In [19]:
data = pd.read_csv(filename)

In [20]:
data.tail()

Unnamed: 0,Date,Time,f_time,julian_date,Weekday,Weekday_Name,UTC_Time,Volume,Open,High,...,Direction,col_1,col_2,col_3,col_4,col_5,SMA_10,SMA_20,F_SMA_10,F_SMA_20
0,2021-10-28,21:00:00,21,301,3,Thursday,2021-10-28T21:00:00.000000000Z,3758,1.16831,1.16902,...,1,-0.00013,0.00071,0.0005,-0.00046,-0.00483,1.165324,1.162958,0.003116,0.005482
1,2021-10-29,01:00:00,1,302,4,Friday,2021-10-29T01:00:00.000000000Z,5699,1.16846,1.16861,...,0,0.00202,0.00015,5e-05,-0.00013,-0.00046,1.166808,1.163421,-0.000368,0.003019
2,2021-10-29,05:00:00,5,302,4,Friday,2021-10-29T05:00:00.000000000Z,16354,1.16643,1.16758,...,0,0.00135,0.00115,0.00016,0.00202,-0.00013,1.167224,1.16384,-0.002144,0.00124
3,2021-10-29,09:00:00,9,302,4,Friday,2021-10-29T09:00:00.000000000Z,24663,1.16511,1.16569,...,0,0.00255,0.00058,0.00045,0.00135,0.00202,1.166164,1.164063,-0.003604,-0.001503
4,2021-10-29,13:00:00,13,302,4,Friday,2021-10-29T13:00:00.000000000Z,39245,1.16258,1.1636,...,0,0.00872,0.00102,0.00034,0.00255,0.00135,1.163276,1.163522,-0.009416,-0.009662


# <font color='red'>CANDLE INDEX NUMBER</font>

In [21]:
candle_no = len(data) - 2
candle_no

3

# Calculating Stop Loss

In [22]:
STOP_LOSS = data.iloc[candle_no]['ATR_14']
STOP_LOSS

0.0029842857142857

In [23]:
CLOSED_PRICE = data.iloc[candle_no]['Close']
CLOSED_PRICE

1.16256

In [24]:
BUY_SL = CLOSED_PRICE - STOP_LOSS
BUY_SL = BUY_SL.round(5)

In [25]:
SELL_SL = CLOSED_PRICE + STOP_LOSS
SELL_SL = SELL_SL.round(5)

# Modeling

In [26]:
data = pd.read_csv(filename)

### Feature Selection and Reduction

In [27]:
data = data.drop(columns=['Volume','Weekday','Date','Time',
                          'Weekday_Name','UTC_Time','Direction',
                          'Open', 'High', 'Low', 'Close',
                          'O-H','O-L','O-C','H-L','H-C','L-C',
                          'SMA_10','SMA_20',
                          'f_time','julian_date','ATR_14',
#                          'col_1','col_2','col_3',
                          'col_4','col_5',
#                          'F_SMA_10','F_SMA_20'
                         ])

# Comment out the features that you want to fit into your model.

### Feature Being Fit to the Model

In [28]:
data.head()

Unnamed: 0,col_1,col_2,col_3,F_SMA_10,F_SMA_20
0,-0.00013,0.00071,0.0005,0.003116,0.005482
1,0.00202,0.00015,5e-05,-0.000368,0.003019
2,0.00135,0.00115,0.00016,-0.002144,0.00124
3,0.00255,0.00058,0.00045,-0.003604,-0.001503
4,0.00872,0.00102,0.00034,-0.009416,-0.009662


### Scaling using Standard Scaler

In [29]:
def find_k_similar_candles(candle_id, dataset, k=4):
    indices=[]
    distances = []
    output = []
    model_knn = NearestNeighbors(metric = 'euclidean', algorithm = 'auto') 
    model_knn.fit(dataset)
    
    #metric = 'euclidean' or 'cosine' or 'manhattan' or 'mahalanobis'
    
    distances, indices = model_knn.kneighbors(dataset.iloc[candle_id,:].values.reshape(1,-1),
                                              n_neighbors = k)

    for i in range(0,len(distances.flatten())):
        if i==0:
            display (pd.DataFrame(data.iloc[candle_id]).transpose())
            #print("Recommendation for {0}:\n".format(eurusd_data.index[candle_id]))
        else:
            #print("{0}: {1}, with distance of {2}".format(i,
            #                                               dataset.index[indices.flatten()[i]],
            #                                               distances.flatten()[i]))
            
            output.append ([dataset.index[indices.flatten()[i]],
                            distances.flatten()[i],
#                           dataset.iloc[indices.flatten()[i]]['O-H'],dataset.iloc[indices.flatten()[i]]['O-L'],dataset.iloc[indices.flatten()[i]]['O-C'],dataset.iloc[indices.flatten()[i]]['H-L'],dataset.iloc[indices.flatten()[i]]['H-C'],dataset.iloc[indices.flatten()[i]]['L-C'],
                           dataset.iloc[indices.flatten()[i]]['col_1'],dataset.iloc[indices.flatten()[i]]['col_2'],dataset.iloc[indices.flatten()[i]]['col_3'],
#                           dataset.iloc[indices.flatten()[i]]['col_4'],dataset.iloc[indices.flatten()[i]]['col_5'],
                           dataset.iloc[indices.flatten()[i]]['F_SMA_10'],dataset.iloc[indices.flatten()[i]]['F_SMA_20'],
                           ])
    
    output = pd.DataFrame(output)
    output.columns = ['Indice','Distance',
#                      'O-H','O-L','O-C','H-L','H-C','L-C',
                      'col_1','col_2','col_3',
#                      'col_4','col_5',
                      'F_SMA_10','F_SMA_20'
                     ]
    display (output)
    
    return indices, distances

### Top 5 Similar Candlesticks

In [30]:
indices, distances = find_k_similar_candles (candle_no,data)

Unnamed: 0,col_1,col_2,col_3,F_SMA_10,F_SMA_20
3,0.00255,0.00058,0.00045,-0.003604,-0.001503


Unnamed: 0,Indice,Distance,col_1,col_2,col_3,F_SMA_10,F_SMA_20
0,2,0.003392,0.00135,0.00115,0.00016,-0.002144,0.00124
1,1,0.005617,0.00202,0.00015,5e-05,-0.000368,0.003019
2,0,0.010057,-0.00013,0.00071,0.0005,0.003116,0.005482


In [31]:
indices = indices[0:1][0]

In [32]:
indices

array([3, 2, 1, 0])

### Currnet Market/Candlestick

In [37]:
closed_candle = "currnet_market_data.csv"
data = pd.read_csv(filename)
data = data.iloc[candle_no-30:candle_no+1]
data.to_csv(closed_candle, header = True, index = False)
viz(data, "current_market")

NameError: name 'pair' is not defined

### Recommendations

In [34]:
print(datetime.now())

2021-10-30 01:10:06.918004


In [35]:
for indice in indices[1:10]:
    recommendation_log = "{}_data.csv".format(indice)
    data = pd.read_csv(filename) 
    data = data.iloc[indice:indice+30]
    data.to_csv(path + "/" + recommendation_log, header = True, index = False)
    
    print ('Neighbor:', indice, '|', '10K Records:', Load_10K_Records)
    viz(data, indice.astype(str))
    
    data['candleno'] = range (1, len(data) + 1)
    X = data['candleno'].values.reshape(-1, 1)
    Y = data['Close'].values.reshape(-1, 1)
    linear_regressor = LinearRegression()
    linear_regressor.fit(X, Y)
    y_pred = linear_regressor.predict(X)
    plt.scatter(X, Y)
    plt.plot(X, y_pred, color='red')
    plt.show()
    
    print(r2_score(Y, y_pred).round(2)*100, '% Fit')
    coeficient = (linear_regressor.coef_)
    
    if coeficient > 0:
        print('Action: BUY')
        print('STOP LOSS:', BUY_SL)
    else:
        print('Action: SELL')
        print('STOP LOSS:', SELL_SL)



NameError: name 'path' is not defined

### Save the Recommendations

In [47]:
currentpath = os.path.join(sys.path[0])
pngfiles = []
pngfiles = glob.glob(currentpath+"/"+path+"/*.png")
pdf = FPDF()

for pngfile in pngfiles:
    pdf.add_page()
    pdf.image(pngfile, w=200, h=130)
    
pdf.output(path+"/recommendations.pdf", "F")

''