# FOREX PREDICTION USING KNN


In [13]:
# Importing Libraries..

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from plotly import graph_objects as go

In [2]:
# Import Dataset

data = pd.read_csv("audusd.csv")
data = data[['Open', 'High', 'Low', 'Close']]
data

Unnamed: 0,Open,High,Low,Close
0,0.70238,0.70247,0.70154,0.70154
1,0.70155,0.70201,0.70154,0.70167
2,0.70166,0.70185,0.70146,0.70162
3,0.70161,0.70186,0.70115,0.70115
4,0.70115,0.70115,0.70115,0.70115
...,...,...,...,...
2708,0.63158,0.63201,0.63020,0.63134
2709,0.63135,0.63233,0.63101,0.63183
2710,0.63183,0.63189,0.63085,0.63119
2711,0.63119,0.63188,0.63107,0.63172


In [3]:
# on trading day we have only open price so we create O-H, O-L, O-C all H,L,C is prior day

In [4]:
data['O-H'] = data['Open']-data['High'].shift(1)
data['O-L'] = data['Open']-data['Low'].shift(1)
data['O-C'] = data['Open']-data['Close'].shift(1)
data['Close_return'] = np.log(data['Close']/data['Close'].shift(1))   #one day log return
data['Close return shift 1'] = data['Close_return'].shift(1) #use yesterdays return for today
data

Unnamed: 0,Open,High,Low,Close,O-H,O-L,O-C,Close_return,Close return shift 1
0,0.70238,0.70247,0.70154,0.70154,,,,,
1,0.70155,0.70201,0.70154,0.70167,-0.00092,0.00001,0.00001,0.000185,
2,0.70166,0.70185,0.70146,0.70162,-0.00035,0.00012,-0.00001,-0.000071,0.000185
3,0.70161,0.70186,0.70115,0.70115,-0.00024,0.00015,-0.00001,-0.000670,-0.000071
4,0.70115,0.70115,0.70115,0.70115,-0.00071,0.00000,0.00000,0.000000,-0.000670
...,...,...,...,...,...,...,...,...,...
2708,0.63158,0.63201,0.63020,0.63134,-0.00106,0.00065,0.00005,-0.000301,-0.001614
2709,0.63135,0.63233,0.63101,0.63183,-0.00066,0.00115,0.00001,0.000776,-0.000301
2710,0.63183,0.63189,0.63085,0.63119,-0.00050,0.00082,0.00000,-0.001013,0.000776
2711,0.63119,0.63188,0.63107,0.63172,-0.00070,0.00034,0.00000,0.000839,-0.001013


In [5]:
data['Label'] = np.where(data['Close'] > data['Open'], 1, 0)   #1 for buy 0 for sell

In [6]:
data

Unnamed: 0,Open,High,Low,Close,O-H,O-L,O-C,Close_return,Close return shift 1,Label
0,0.70238,0.70247,0.70154,0.70154,,,,,,0
1,0.70155,0.70201,0.70154,0.70167,-0.00092,0.00001,0.00001,0.000185,,1
2,0.70166,0.70185,0.70146,0.70162,-0.00035,0.00012,-0.00001,-0.000071,0.000185,0
3,0.70161,0.70186,0.70115,0.70115,-0.00024,0.00015,-0.00001,-0.000670,-0.000071,0
4,0.70115,0.70115,0.70115,0.70115,-0.00071,0.00000,0.00000,0.000000,-0.000670,0
...,...,...,...,...,...,...,...,...,...,...
2708,0.63158,0.63201,0.63020,0.63134,-0.00106,0.00065,0.00005,-0.000301,-0.001614,0
2709,0.63135,0.63233,0.63101,0.63183,-0.00066,0.00115,0.00001,0.000776,-0.000301,1
2710,0.63183,0.63189,0.63085,0.63119,-0.00050,0.00082,0.00000,-0.001013,0.000776,0
2711,0.63119,0.63188,0.63107,0.63172,-0.00070,0.00034,0.00000,0.000839,-0.001013,1


In [7]:
# remove nan

data = data.dropna()
data

Unnamed: 0,Open,High,Low,Close,O-H,O-L,O-C,Close_return,Close return shift 1,Label
2,0.70166,0.70185,0.70146,0.70162,-0.00035,0.00012,-0.00001,-0.000071,0.000185,0
3,0.70161,0.70186,0.70115,0.70115,-0.00024,0.00015,-0.00001,-0.000670,-0.000071,0
4,0.70115,0.70115,0.70115,0.70115,-0.00071,0.00000,0.00000,0.000000,-0.000670,0
5,0.70115,0.70115,0.70115,0.70115,0.00000,0.00000,0.00000,0.000000,0.000000,0
6,0.70115,0.70115,0.70115,0.70115,0.00000,0.00000,0.00000,0.000000,0.000000,0
...,...,...,...,...,...,...,...,...,...,...
2708,0.63158,0.63201,0.63020,0.63134,-0.00106,0.00065,0.00005,-0.000301,-0.001614,0
2709,0.63135,0.63233,0.63101,0.63183,-0.00066,0.00115,0.00001,0.000776,-0.000301,1
2710,0.63183,0.63189,0.63085,0.63119,-0.00050,0.00082,0.00000,-0.001013,0.000776,0
2711,0.63119,0.63188,0.63107,0.63172,-0.00070,0.00034,0.00000,0.000839,-0.001013,1


In [8]:
X = data[['O-H', 'O-L', 'O-C', 'Close return shift 1']].values
Y = data['Label'].values

In [9]:
#scaling the values
scaler = MinMaxScaler()
XC = scaler.fit_transform(X)

In [10]:
#data split
xtrain, xtest, ytrain, ytest = train_test_split(XC,Y,train_size=0.8, shuffle = True) 

In [19]:
#logistic regression model
clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(xtrain, ytrain)
pred = clf.predict(xtest)

In [20]:
accuracy_score(ytest, pred)   #overall accuracy

0.6408839779005525

In [21]:
ytest[-10:]  #actual values

array([0, 1, 1, 0, 0, 0, 0, 0, 0, 0])

In [22]:
pred[-10:]  #predicted values

array([0, 1, 0, 0, 1, 0, 1, 0, 1, 0])

In [23]:
#plot
fig = go.Figure()
fig.add_traces(data = go.Scatter(y= ytest, name = "Actual"))
fig.add_traces(data = go.Scatter(y= pred, name = "Predicted"))
fig.update_layout(title = "Class prediction", title_x = 0.5)
fig.show()