In [30]:
# Find out what model works best with the dataset from XM

In [31]:
import pandas as pd

from datetime import datetime
import MetaTrader5 as mt5

if not mt5.initialize():
	print(f"MT5 Init failed, error code {mt5.last_error()}")
	quit()

pair = "GBPUSD"
rates = mt5.copy_rates_from_pos(pair, mt5.TIMEFRAME_D1, 0, 100)
print(rates)

[(1620777600, 1.41359, 1.41524, 1.40485, 1.40495, 63618, 19, 0)
 (1620864000, 1.40495, 1.40771, 1.40047, 1.40465, 55833, 19, 0)
 (1620950400, 1.40465, 1.41101, 1.40351, 1.40944, 42933, 19, 0)
 (1621209600, 1.41011, 1.4146 , 1.40762, 1.41266, 41995, 19, 0)
 (1621296000, 1.41266, 1.42196, 1.41266, 1.41829, 48245, 19, 0)
 (1621382400, 1.41829, 1.41998, 1.40983, 1.41097, 71628, 19, 0)
 (1621468800, 1.41097, 1.41915, 1.41002, 1.41825, 51751, 19, 0)
 (1621555200, 1.41825, 1.42332, 1.41395, 1.4146 , 51963, 19, 0)
 (1621814400, 1.41531, 1.41708, 1.41102, 1.41481, 44632, 19, 0)
 (1621900800, 1.41481, 1.42101, 1.41147, 1.41451, 51615, 19, 0)
 (1621987200, 1.41451, 1.4175 , 1.41104, 1.41124, 47226, 19, 0)
 (1622073600, 1.41124, 1.42183, 1.40905, 1.41988, 51258, 19, 0)
 (1622160000, 1.41988, 1.42073, 1.41352, 1.41871, 73869, 19, 0)
 (1622419200, 1.41771, 1.42169, 1.41629, 1.42042, 41542, 19, 0)
 (1622505600, 1.42042, 1.42483, 1.41448, 1.41448, 68813, 19, 0)
 (1622592000, 1.41448, 1.41824, 1.41108,

# Convert the `rates` into a Dataframe

In [32]:
data = pd.DataFrame(rates)
data['time'] = pd.to_datetime(data['time'], unit='s')
print(type(data))
print(data.head())
data

<class 'pandas.core.frame.DataFrame'>
        time     open     high      low    close  tick_volume  spread  \
0 2021-05-12  1.41359  1.41524  1.40485  1.40495        63618      19   
1 2021-05-13  1.40495  1.40771  1.40047  1.40465        55833      19   
2 2021-05-14  1.40465  1.41101  1.40351  1.40944        42933      19   
3 2021-05-17  1.41011  1.41460  1.40762  1.41266        41995      19   
4 2021-05-18  1.41266  1.42196  1.41266  1.41829        48245      19   

   real_volume  
0            0  
1            0  
2            0  
3            0  
4            0  


Unnamed: 0,time,open,high,low,close,tick_volume,spread,real_volume
0,2021-05-12,1.41359,1.41524,1.40485,1.40495,63618,19,0
1,2021-05-13,1.40495,1.40771,1.40047,1.40465,55833,19,0
2,2021-05-14,1.40465,1.41101,1.40351,1.40944,42933,19,0
3,2021-05-17,1.41011,1.41460,1.40762,1.41266,41995,19,0
4,2021-05-18,1.41266,1.42196,1.41266,1.41829,48245,19,0
...,...,...,...,...,...,...,...,...
95,2021-09-22,1.36553,1.36886,1.36083,1.36136,44802,19,0
96,2021-09-23,1.36126,1.37499,1.36107,1.37149,44086,19,0
97,2021-09-24,1.37183,1.37352,1.36569,1.36681,34296,19,0
98,2021-09-27,1.36659,1.37279,1.36542,1.36912,36539,19,0


In [33]:
# Understand the dataset
print(data.describe)

<bound method NDFrame.describe of          time     open     high      low    close  tick_volume  spread  \
0  2021-05-12  1.41359  1.41524  1.40485  1.40495        63618      19   
1  2021-05-13  1.40495  1.40771  1.40047  1.40465        55833      19   
2  2021-05-14  1.40465  1.41101  1.40351  1.40944        42933      19   
3  2021-05-17  1.41011  1.41460  1.40762  1.41266        41995      19   
4  2021-05-18  1.41266  1.42196  1.41266  1.41829        48245      19   
..        ...      ...      ...      ...      ...          ...     ...   
95 2021-09-22  1.36553  1.36886  1.36083  1.36136        44802      19   
96 2021-09-23  1.36126  1.37499  1.36107  1.37149        44086      19   
97 2021-09-24  1.37183  1.37352  1.36569  1.36681        34296      19   
98 2021-09-27  1.36659  1.37279  1.36542  1.36912        36539      19   
99 2021-09-28  1.36941  1.37162  1.35221  1.35355        31028      19   

    real_volume  
0             0  
1             0  
2             0  
3    

We'll be using scikit learn's to train_test_split the dataset but we have to define a few things

Need to define the X set and the y set from the data

In [34]:
X = data.drop(columns='close')
print(X.head())
y = data['close']
print(y.head())

        time     open     high      low  tick_volume  spread  real_volume
0 2021-05-12  1.41359  1.41524  1.40485        63618      19            0
1 2021-05-13  1.40495  1.40771  1.40047        55833      19            0
2 2021-05-14  1.40465  1.41101  1.40351        42933      19            0
3 2021-05-17  1.41011  1.41460  1.40762        41995      19            0
4 2021-05-18  1.41266  1.42196  1.41266        48245      19            0
0    1.40495
1    1.40465
2    1.40944
3    1.41266
4    1.41829
Name: close, dtype: float64


In [35]:
# Split the data set into training and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=4)
print(f"X_train: {X_train}")
print(f"X_test: {X_test}")
print(f"y_train: {y_train}")
print(f"y_test: {y_test}")

X_train:          time     open     high      low  tick_volume  spread  real_volume
65 2021-08-11  1.38359  1.38869  1.38017        31395      19            0
62 2021-08-06  1.39272  1.39317  1.38602        32483      19            0
27 2021-06-18  1.39138  1.39435  1.37908        73597      19            0
75 2021-08-25  1.37260  1.37660  1.36955        26478      19            0
61 2021-08-05  1.38859  1.39479  1.38710        40653      19            0
..        ...      ...      ...      ...          ...     ...          ...
87 2021-09-10  1.38329  1.38873  1.38255        28064      19            0
1  2021-05-13  1.40495  1.40771  1.40047        55833      19            0
69 2021-08-17  1.38421  1.38426  1.37249        33236      19            0
55 2021-07-28  1.38725  1.39100  1.38415        51599      19            0
46 2021-07-15  1.38557  1.38980  1.38040        67595      19            0

[70 rows x 7 columns]
X_test:          time     open     high      low  tick_volume  sprea

Using LazyRegressor to find out the models

In [36]:
# from lazypredict.Supervised import LazyRegressor
# reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
# models, predictions = reg.fit(X_train, X_test, y_train, y_test)

# print(models)

# Pick a Model to study

So according to the `LazyRegressor`, there are a bunch of models are unknown to me at this time of writing this. The only model I am familiar with is the LinearRegression. I may start with that and if it doesn't perform as much, I will continue up the models. RidgeCV being the top.

In [37]:
# import torch
# import math

# class LinearRegression:
#     def fit(self, X, y, method, learning_rate=0.01, iterations=500, batch_size=32):
#         X, y = torch.from_numpy(X), torch.from_numpy(y)
#         X = torch.cat([(X), torch.ones_like(y)], dim=1)
#         rows, cols = X.size()
#         if method == 'solve':
#             if rows >= cols == torch.matrix_rank(X):
#                 self.weights = torch.matmul(
#                     torch.matmul(
#                         torch.inverse(
#                             torch.matmul(
#                                 torch.transpose(X, 0, 1),
#                                 X)),
#                         torch.transpose(X, 0, 1)),
#                     y)
#             else:
#                 print('X has not full column rank. method=\'solve\' cannot be used.')
#         elif method == 'sgd':
#             self.weights = torch.normal(mean=0, std=1/cols, size=(cols, 1), dtype=torch.float64)
#             for i in range(iterations):
#                 Xy = torch.cat([X, y], dim=1)
#                 Xy = Xy[torch.randperm(Xy.size()[0])]
#                 X, y = torch.split(Xy, [Xy.size()[1]-1, 1], dim=1)
#                 for j in range(int(math.ceil(rows/batch_size))):
#                     start, end = batch_size*j, min(batch_size*(j+1), rows)
#                     Xb = torch.index_select(X, 0, torch.arange(start, end))
#                     yb = torch.index_select(y, 0, torch.arange(start, end))
                    
#                     self.weights.requires_grad_(True)
#                     diff = torch.matmul(Xb, self.weights) - yb
#                     loss = torch.matmul(torch.transpose(diff, 0, 1), diff)
#                     loss.backward()
                    
#                     self.weights = (self.weights - learning_rate*self.weights.grad).detach()
#         else:
#             print(f'Unknown method: \'{method}\'')
        
#         return self
    
#     def predict(self, X):
#         X = torch.from_numpy(X)
#         if not hasattr(self, 'weights'):
#             print('Cannot predict. You should call the .fit() method first.')
#             return
        
#         X = torch.cat([X, torch.ones((X.size()[0], 1))], dim=1)
        
#         if X.size()[1] != self.weights.size()[0]:
#             print(f'Shapes do not match. {X.size()[1]} != {self.weights.size()[0]}')
#             return
        
#         return torch.matmul(X, self.weights)
    
#     def rmse(self, X, y):
#         y = torch.from_numpy(y)
#         y_hat = self.predict(X)
        
#         if y_hat is None:
#             return
        
#         return torch.sqrt(torch.mean(torch.square(y_hat - y)))

With the Linear Regression model defined with fitting and predicting, let's map and plot the current dataset

In [38]:
# import mplfinance as mpf

# data = data.set_index('time')
# mpf.plot(data, type='line', title=pair)

In [39]:
from sklearn.linear_model import LinearRegression

df_data = pd.DataFrame(rates)
df_x = df_data.drop(columns='close')
df_y = df_data['close']
x_train, x_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=0)
model = LinearRegression().fit(x_train, y_train)
y_pre = model.predict(x_test)

# import matplotlib.pyplot as plt


# plt.figure(num = 3, figsize=(10, 5))
# y_test_array = np.array(y_test)
# plt.plot(y_test_array)
# plt.plot(y_pre)
# plt.show()

The Metatrader 5 seems to not be able to handle order management very well. This includes current order sent.
However, the broker that I'm using (XM) is giving me a high spread that the predicted price just happens to be in the middle of.

In [46]:
# Login
import config
client = mt5.login(config.ACCOUNT, password=config.PASSWORD, server=config.SERVER)
for p in config.PAIRS:
	symbol_info = mt5.symbol_info(p)
	symbol_tick = mt5.symbol_info_tick(p)
	points = symbol_info.point
	ask = symbol_tick.ask
	bid = symbol_tick.bid
	current_rate = mt5.copy_rates_from_pos(p, config.TIMEFRAME, 0, 1)
	current_df = pd.DataFrame(current_rate)
	current_open_price = current_df.iloc[0]["open"]
	to_predict = current_df.drop(columns="close")
	symbol_predict = model.predict(to_predict)
	if current_open_price > symbol_predict[0]:
		signal = "Sell"
		trade_feasible = symbol_predict[0] < bid
		profit=mt5.order_calc_profit(mt5.ORDER_TYPE_SELL, p, config.LOT_SIZE, bid, symbol_predict[0])
		distance = current_open_price - symbol_predict[0]
		stop_loss = bid+distance*points
	else:
		signal = "Buy"
		trade_feasible = symbol_predict[0] > ask
		profit=mt5.order_calc_profit(mt5.ORDER_TYPE_BUY, p, config.LOT_SIZE, ask, symbol_predict[0])
		distance = symbol_predict[0] - current_open_price
		stop_loss = ask-distance*points
	print(f"------------{p}--------------")
	print(f"Points : {points}")
	print(f"Ask Price : {ask}")
	print(f"Bid Price : {bid}")
	print(f"Current Open Price : {current_open_price}")
	print(f"Predicted Price : {symbol_predict[0]}")
	print(f"Stop Loss : {stop_loss}")
	print(f"Signal : {signal}")
	print(f"Distance : {distance}")
	print(f"Can we place a trade? : {trade_feasible}")
	print(f"Calculated Profit : {profit}")
	print("---------------------------------------")


------------EURAUD#--------------
Points : 1e-05
Ask Price : 1.6136300000000001
Bid Price : 1.6133899999999999
Current Open Price : 1.60493
Predicted Price : 1.593567511297318
Stop Loss : 1.613390113624887
Signal : Sell
Distance : 0.011362488702681972
Can we place a trade? : True
Calculated Profit : 14.34
---------------------------------------
------------EURUSD#--------------
Points : 1e-05
Ask Price : 1.1675200000000001
Bid Price : 1.16744
Current Open Price : 1.16932
Predicted Price : 1.1799381248669354
Stop Loss : 1.1675198938187514
Signal : Buy
Distance : 0.010618124866935474
Can we place a trade? : True
Calculated Profit : 12.42
---------------------------------------
------------GBPUSD#--------------
Points : 1e-05
Ask Price : 1.3537
Bid Price : 1.3536000000000001
Current Open Price : 1.36941
Predicted Price : 1.353069479544942
Stop Loss : 1.3536001634052046
Signal : Sell
Distance : 0.01634052045505796
Can we place a trade? : True
Calculated Profit : 0.53
----------------------