In [None]:
# Find out what model works best with the dataset from XM

In [None]:
import pandas as pd

from datetime import datetime
import MetaTrader5 as mt5

if not mt5.initialize():
	print(f"MT5 Init failed, error code {mt5.last_error()}")
	quit()

pair = "GBPUSD"
rates = mt5.copy_rates_from_pos(pair, mt5.TIMEFRAME_D1, 0, 100)
print(rates)

# Convert the `rates` into a Dataframe

In [None]:
data = pd.DataFrame(rates)
data['time'] = pd.to_datetime(data['time'], unit='s')
print(type(data))
print(data.head())
data

In [None]:
# Understand the dataset
print(data.describe)

We'll be using scikit learn's to train_test_split the dataset but we have to define a few things

Need to define the X set and the y set from the data

In [None]:
X = data.drop(columns='close')
print(X.head())
y = data['close']
print(y.head())

In [None]:
# Split the data set into training and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=4)
print(f"X_train: {X_train}")
print(f"X_test: {X_test}")
print(f"y_train: {y_train}")
print(f"y_test: {y_test}")

Using LazyRegressor to find out the models

In [None]:
from lazypredict.Supervised import LazyRegressor
reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)

print(models)

# Pick a Model to study

So according to the `LazyRegressor`, there are a bunch of models are unknown to me at this time of writing this. The only model I am familiar with is the LinearRegression. I may start with that and if it doesn't perform as much, I will continue up the models. RidgeCV being the top.

In [None]:
import torch
import math

class LinearRegression:
    def fit(self, X, y, method, learning_rate=0.01, iterations=500, batch_size=32):
        X, y = torch.from_numpy(X), torch.from_numpy(y)
        X = torch.cat([(X), torch.ones_like(y)], dim=1)
        rows, cols = X.size()
        if method == 'solve':
            if rows >= cols == torch.matrix_rank(X):
                self.weights = torch.matmul(
                    torch.matmul(
                        torch.inverse(
                            torch.matmul(
                                torch.transpose(X, 0, 1),
                                X)),
                        torch.transpose(X, 0, 1)),
                    y)
            else:
                print('X has not full column rank. method=\'solve\' cannot be used.')
        elif method == 'sgd':
            self.weights = torch.normal(mean=0, std=1/cols, size=(cols, 1), dtype=torch.float64)
            for i in range(iterations):
                Xy = torch.cat([X, y], dim=1)
                Xy = Xy[torch.randperm(Xy.size()[0])]
                X, y = torch.split(Xy, [Xy.size()[1]-1, 1], dim=1)
                for j in range(int(math.ceil(rows/batch_size))):
                    start, end = batch_size*j, min(batch_size*(j+1), rows)
                    Xb = torch.index_select(X, 0, torch.arange(start, end))
                    yb = torch.index_select(y, 0, torch.arange(start, end))
                    
                    self.weights.requires_grad_(True)
                    diff = torch.matmul(Xb, self.weights) - yb
                    loss = torch.matmul(torch.transpose(diff, 0, 1), diff)
                    loss.backward()
                    
                    self.weights = (self.weights - learning_rate*self.weights.grad).detach()
        else:
            print(f'Unknown method: \'{method}\'')
        
        return self
    
    def predict(self, X):
        X = torch.from_numpy(X)
        if not hasattr(self, 'weights'):
            print('Cannot predict. You should call the .fit() method first.')
            return
        
        X = torch.cat([X, torch.ones((X.size()[0], 1))], dim=1)
        
        if X.size()[1] != self.weights.size()[0]:
            print(f'Shapes do not match. {X.size()[1]} != {self.weights.size()[0]}')
            return
        
        return torch.matmul(X, self.weights)
    
    def rmse(self, X, y):
        y = torch.from_numpy(y)
        y_hat = self.predict(X)
        
        if y_hat is None:
            return
        
        return torch.sqrt(torch.mean(torch.square(y_hat - y)))

With the Linear Regression model defined with fitting and predicting, let's map and plot the current dataset

In [None]:
import mplfinance as mpf

data = data.set_index('time')
mpf.plot(data, type='line', title=pair)

In [None]:
from sklearn.linear_model import LinearRegression

df_data = pd.DataFrame(rates)
df_x = df_data.drop(columns='close')
df_y = df_data['close']
x_train, x_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=0)
model = LinearRegression()
model.fit(x_train, y_train)
y_pre = model.predict(x_test)

import matplotlib.pyplot as plt


plt.figure(num = 3, figsize=(10, 5))
y_test_array = np.array(y_test)
plt.plot(y_test_array)
plt.plot(y_pre)
plt.show()

In [None]:
model.score(df_x, df_y)

In [None]:
model.coef_

In [None]:
model.intercept_

In [None]:
y_predict