In [1]:
# Find out what model works best with the dataset from XM

In [2]:
import pandas as pd

from datetime import datetime
import MetaTrader5 as mt5

if not mt5.initialize():
	print(f"MT5 Init failed, error code {mt5.last_error()}")
	quit()

pair = "EURUSD"
rates = mt5.copy_rates_from_pos(pair, mt5.TIMEFRAME_D1, 0, 100)
print(rates)

[(1622160000, 1.21932, 1.2205 , 1.21327, 1.21896, 60110, 0, 0)
 (1622419200, 1.21993, 1.22314, 1.21833, 1.2225 , 36694, 0, 0)
 (1622505600, 1.22254, 1.22543, 1.22118, 1.22126, 52448, 0, 0)
 (1622592000, 1.22125, 1.22265, 1.21643, 1.221  , 53793, 0, 0)
 (1622678400, 1.22097, 1.22143, 1.21183, 1.21265, 59299, 0, 0)
 (1622764800, 1.21261, 1.21856, 1.21041, 1.21662, 56073, 0, 0)
 (1623024000, 1.21595, 1.22018, 1.21448, 1.21893, 40542, 0, 0)
 (1623110400, 1.21891, 1.21942, 1.21643, 1.21726, 48682, 0, 0)
 (1623196800, 1.21722, 1.22179, 1.21709, 1.21787, 42992, 0, 0)
 (1623283200, 1.21787, 1.2195 , 1.21432, 1.21751, 66547, 0, 0)
 (1623369600, 1.21751, 1.21931, 1.20927, 1.21063, 54807, 0, 0)
 (1623628800, 1.21025, 1.21303, 1.20942, 1.212  , 46944, 0, 0)
 (1623715200, 1.2119 , 1.21473, 1.21014, 1.21264, 50679, 0, 0)
 (1623801600, 1.21256, 1.21348, 1.1994 , 1.19943, 66416, 0, 0)
 (1623888000, 1.19939, 1.20064, 1.18918, 1.19069, 90570, 0, 0)
 (1623974400, 1.19056, 1.19251, 1.18537, 1.18597, 81407

# Convert the `rates` into a Dataframe

In [3]:
data = pd.DataFrame(rates)
data['time'] = pd.to_datetime(data['time'], unit='s')
print(type(data))
print(data.head())
data

<class 'pandas.core.frame.DataFrame'>
        time     open     high      low    close  tick_volume  spread  \
0 2021-05-28  1.21932  1.22050  1.21327  1.21896        60110       0   
1 2021-05-31  1.21993  1.22314  1.21833  1.22250        36694       0   
2 2021-06-01  1.22254  1.22543  1.22118  1.22126        52448       0   
3 2021-06-02  1.22125  1.22265  1.21643  1.22100        53793       0   
4 2021-06-03  1.22097  1.22143  1.21183  1.21265        59299       0   

   real_volume  
0            0  
1            0  
2            0  
3            0  
4            0  


Unnamed: 0,time,open,high,low,close,tick_volume,spread,real_volume
0,2021-05-28,1.21932,1.22050,1.21327,1.21896,60110,0,0
1,2021-05-31,1.21993,1.22314,1.21833,1.22250,36694,0,0
2,2021-06-01,1.22254,1.22543,1.22118,1.22126,52448,0,0
3,2021-06-02,1.22125,1.22265,1.21643,1.22100,53793,0,0
4,2021-06-03,1.22097,1.22143,1.21183,1.21265,59299,0,0
...,...,...,...,...,...,...,...,...
95,2021-10-08,1.15526,1.15861,1.15414,1.15736,59970,0,0
96,2021-10-11,1.15659,1.15867,1.15487,1.15517,46485,0,0
97,2021-10-12,1.15522,1.15702,1.15242,1.15295,57798,0,0
98,2021-10-13,1.15294,1.15973,1.15250,1.15940,67602,0,0


In [4]:
# Understand the dataset
print(data.describe)

<bound method NDFrame.describe of          time     open     high      low    close  tick_volume  spread  \
0  2021-05-28  1.21932  1.22050  1.21327  1.21896        60110       0   
1  2021-05-31  1.21993  1.22314  1.21833  1.22250        36694       0   
2  2021-06-01  1.22254  1.22543  1.22118  1.22126        52448       0   
3  2021-06-02  1.22125  1.22265  1.21643  1.22100        53793       0   
4  2021-06-03  1.22097  1.22143  1.21183  1.21265        59299       0   
..        ...      ...      ...      ...      ...          ...     ...   
95 2021-10-08  1.15526  1.15861  1.15414  1.15736        59970       0   
96 2021-10-11  1.15659  1.15867  1.15487  1.15517        46485       0   
97 2021-10-12  1.15522  1.15702  1.15242  1.15295        57798       0   
98 2021-10-13  1.15294  1.15973  1.15250  1.15940        67602       0   
99 2021-10-14  1.15903  1.16245  1.15876  1.15992        33794       0   

    real_volume  
0             0  
1             0  
2             0  
3    

We'll be using scikit learn's to train_test_split the dataset but we have to define a few things

Need to define the X set and the y set from the data

In [28]:
X = data.drop(columns='close')
print(X.head())
y = data['close']
print(y.head())
print(y.tail())
print(y.shape)
# Shift the close prices by 1
y.drop(0,inplace=True)
y.loc[100] = 0
print(y.shape)
print(y.head())
print(y.tail())

        time     open     high      low  tick_volume  spread  real_volume
0 2021-05-28  1.21932  1.22050  1.21327        60110       0            0
1 2021-05-31  1.21993  1.22314  1.21833        36694       0            0
2 2021-06-01  1.22254  1.22543  1.22118        52448       0            0
3 2021-06-02  1.22125  1.22265  1.21643        53793       0            0
4 2021-06-03  1.22097  1.22143  1.21183        59299       0            0
0    1.22100
1    1.21265
2    1.21662
3    1.21893
4    1.21726
Name: close, dtype: float64
95    1.15940
96    1.15992
97    0.00000
98    0.00000
99    0.00000
Name: close, dtype: float64
(100,)
(100,)
1    1.21265
2    1.21662
3    1.21893
4    1.21726
5    1.21787
Name: close, dtype: float64
96     1.15992
97     0.00000
98     0.00000
99     0.00000
100    0.00000
Name: close, dtype: float64


In [6]:
# Split the data set into training and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=4)
print(f"X_train: {X_train}")
print(f"X_test: {X_test}")
print(f"y_train: {y_train}")
print(f"y_test: {y_test}")

X_train:          time     open     high      low  tick_volume  spread  real_volume
65 2021-08-27  1.17514  1.18026  1.17349        54550       0            0
62 2021-08-24  1.17449  1.17650  1.17273        40186       0            0
27 2021-07-06  1.18619  1.18950  1.18067        79642       0            0
75 2021-09-10  1.18229  1.18510  1.18091        52650       0            0
61 2021-08-23  1.16897  1.17503  1.16897        42138       0            0
..        ...      ...      ...      ...          ...     ...          ...
87 2021-09-28  1.16940  1.17032  1.16682        64793       0            0
1  2021-05-31  1.21993  1.22314  1.21833        36694       0            0
69 2021-09-02  1.18374  1.18759  1.18344        42004       0            0
55 2021-08-13  1.17290  1.18048  1.17281        35375       0            0
46 2021-08-02  1.18641  1.18969  1.18590        47011       0            0

[70 rows x 7 columns]
X_test:          time     open     high      low  tick_volume  sprea

Using LazyRegressor to find out the models

In [7]:
# from lazypredict.Supervised import LazyRegressor
# reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
# models, predictions = reg.fit(X_train, X_test, y_train, y_test)

# print(models)

# Pick a Model to study

So according to the `LazyRegressor`, there are a bunch of models are unknown to me at this time of writing this. The only model I am familiar with is the LinearRegression. I may start with that and if it doesn't perform as much, I will continue up the models. RidgeCV being the top.

In [8]:
# import torch
# import math

# class LinearRegression:
#     def fit(self, X, y, method, learning_rate=0.01, iterations=500, batch_size=32):
#         X, y = torch.from_numpy(X), torch.from_numpy(y)
#         X = torch.cat([(X), torch.ones_like(y)], dim=1)
#         rows, cols = X.size()
#         if method == 'solve':
#             if rows >= cols == torch.matrix_rank(X):
#                 self.weights = torch.matmul(
#                     torch.matmul(
#                         torch.inverse(
#                             torch.matmul(
#                                 torch.transpose(X, 0, 1),
#                                 X)),
#                         torch.transpose(X, 0, 1)),
#                     y)
#             else:
#                 print('X has not full column rank. method=\'solve\' cannot be used.')
#         elif method == 'sgd':
#             self.weights = torch.normal(mean=0, std=1/cols, size=(cols, 1), dtype=torch.float64)
#             for i in range(iterations):
#                 Xy = torch.cat([X, y], dim=1)
#                 Xy = Xy[torch.randperm(Xy.size()[0])]
#                 X, y = torch.split(Xy, [Xy.size()[1]-1, 1], dim=1)
#                 for j in range(int(math.ceil(rows/batch_size))):
#                     start, end = batch_size*j, min(batch_size*(j+1), rows)
#                     Xb = torch.index_select(X, 0, torch.arange(start, end))
#                     yb = torch.index_select(y, 0, torch.arange(start, end))
                    
#                     self.weights.requires_grad_(True)
#                     diff = torch.matmul(Xb, self.weights) - yb
#                     loss = torch.matmul(torch.transpose(diff, 0, 1), diff)
#                     loss.backward()
                    
#                     self.weights = (self.weights - learning_rate*self.weights.grad).detach()
#         else:
#             print(f'Unknown method: \'{method}\'')
        
#         return self
    
#     def predict(self, X):
#         X = torch.from_numpy(X)
#         if not hasattr(self, 'weights'):
#             print('Cannot predict. You should call the .fit() method first.')
#             return
        
#         X = torch.cat([X, torch.ones((X.size()[0], 1))], dim=1)
        
#         if X.size()[1] != self.weights.size()[0]:
#             print(f'Shapes do not match. {X.size()[1]} != {self.weights.size()[0]}')
#             return
        
#         return torch.matmul(X, self.weights)
    
#     def rmse(self, X, y):
#         y = torch.from_numpy(y)
#         y_hat = self.predict(X)
        
#         if y_hat is None:
#             return
        
#         return torch.sqrt(torch.mean(torch.square(y_hat - y)))

With the Linear Regression model defined with fitting and predicting, let's map and plot the current dataset

In [9]:
# import mplfinance as mpf

# data = data.set_index('time')
# mpf.plot(data, type='line', title=pair)

The Metatrader 5 seems to not be able to handle order management very well. This includes current order sent.
However, the broker that I'm using (XM) is giving me a high spread that the predicted price just happens to be in the middle of.

In [11]:
# Login
import config
client = mt5.login(config.ACCOUNT, password=config.PASSWORD, server=config.SERVER)
for p in config.PAIRS:
	symbol_info = mt5.symbol_info(p)
	symbol_tick = mt5.symbol_info_tick(p)
	points = symbol_info.point
	ask = symbol_tick.ask
	bid = symbol_tick.bid
	current_rate = mt5.copy_rates_from_pos(p, config.TIMEFRAME, 0, 1)
	current_df = pd.DataFrame(current_rate)
	current_open_price = current_df.iloc[0]["open"]
	to_predict = current_df.drop(columns="close")
	symbol_predict = model.predict(to_predict)
	if current_open_price > symbol_predict[0]:
		signal = "Sell"
		trade_feasible = symbol_predict[0] < bid
		profit=mt5.order_calc_profit(mt5.ORDER_TYPE_SELL, p, config.LOT_SIZE, bid, symbol_predict[0])
		distance = current_open_price - symbol_predict[0]
		stop_loss = bid+distance*points
	else:
		signal = "Buy"
		trade_feasible = symbol_predict[0] > ask
		profit=mt5.order_calc_profit(mt5.ORDER_TYPE_BUY, p, config.LOT_SIZE, ask, symbol_predict[0])
		distance = symbol_predict[0] - current_open_price
		stop_loss = ask-distance*points
	print(f"------------{p}--------------")
	print(f"Points : {points}")
	print(f"Ask Price : {ask}")
	print(f"Bid Price : {bid}")
	print(f"Current Open Price : {current_open_price}")
	print(f"Predicted Price : {symbol_predict[0]}")
	print(f"Stop Loss : {stop_loss}")
	print(f"Signal : {signal}")
	print(f"Distance : {distance}")
	print(f"Can we place a trade? : {trade_feasible}")
	print(f"Calculated Profit : {profit}")
	print("Current DataFrame before splitting close price")
	print(current_df)
	print("---------------------------------------")


------------USDCAD--------------
Points : 1e-05
Ask Price : 1.23816
Bid Price : 1.23816
Current Open Price : 1.2382900000000001
Predicted Price : 1.237242894242747
Stop Loss : 1.2381600104710575
Signal : Sell
Distance : 0.0010471057572531972
Can we place a trade? : True
Calculated Profit : 7.41
Current DataFrame before splitting close price
         time     open     high      low    close  tick_volume  spread  \
0  1634223600  1.23829  1.23831  1.23725  1.23816         1520       0   

   real_volume  
0            0  
---------------------------------------


Try forecasting 2 ticks ahead. This should solve the "invalid stops" issue.

In [14]:
import config
import MetaTrader5 as mt5
import pandas as pd
if not mt5.initialize():
	print("Something went wrong")
elif mt5.login(login=config.ACCOUNT, password=config.PASSWORD, server=config.SERVER):
	current_rate = mt5.copy_rates_from_pos("USDCAD", mt5.TIMEFRAME_M1, 0, 1)
	print(current_rate)
	current_rate_df = pd.DataFrame(current_rate)
	print(current_rate_df)
	features = current_rate_df.columns
	print(features)
	index = current_rate_df.index
	print(index)

[(1634225580, 1.23825, 1.23863, 1.23818, 1.23863, 86, 0, 0)]
         time     open     high      low    close  tick_volume  spread  \
0  1634225580  1.23825  1.23863  1.23818  1.23863           86       0   

   real_volume  
0            0  
Index(['time', 'open', 'high', 'low', 'close', 'tick_volume', 'spread',
       'real_volume'],
      dtype='object')
RangeIndex(start=0, stop=1, step=1)
