## Get data

In [1]:
from utils.mt5 import initialize_, get_currency_pair_data_

initialize_()

usdcad_df = get_currency_pair_data_('USDCAD')
usdcad_df.tail()

Unnamed: 0_level_0,open,high,low,close,tick_volume,spread
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-09-08,1.35592,1.35701,1.3558,1.35627,1300,15
2024-09-09,1.35627,1.35763,1.35458,1.35635,20864,15
2024-09-10,1.35634,1.36147,1.35611,1.36096,22312,15
2024-09-11,1.36097,1.36221,1.35662,1.35778,27129,15
2024-09-12,1.35778,1.35846,1.35692,1.35696,3196,15


## Preprocess the data

In [2]:
import numpy as np

data = np.array(usdcad_df.copy()['close']).reshape(-1, 1)
data

array([[1.24685],
       [1.24769],
       [1.24424],
       ...,
       [1.36096],
       [1.35778],
       [1.35696]])

In [3]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

data = scaler.fit_transform(data)
data

array([[0.23274365],
       [0.23730193],
       [0.21858042],
       ...,
       [0.8519644 ],
       [0.83470805],
       [0.8302583 ]])

In [4]:
def create_dataset(data, steps=1):
    X, y = [], []
    for i in range(len(data)-steps-1):
        X.append(data[i: i+steps, 0])
        y.append(data[i+steps, 0])
    return np.array(X), np.array(y)

In [5]:
steps = 60

split_idx = int(len(data) * 0.8)
train_data, test_data = data[:split_idx], data[split_idx:len(data)]

X_train, y_train = create_dataset(train_data, steps)
X_test, y_test = create_dataset(test_data, steps)

X_train.shape, X_test.shape

((815, 60), (158, 60))

In [6]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

X_train.shape, X_test.shape

((815, 60, 1), (158, 60, 1))

## Train a model

In [7]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Input, Dense, LSTM
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.metrics import MeanSquaredError

# model = Sequential()

# model.add(Input(shape=(steps, 1)))
# model.add(LSTM(100, return_sequences=True))
# model.add(LSTM(100, return_sequences=True))
# model.add(LSTM(100))
# model.add(Dense(1))

# model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001), metrics=[MeanSquaredError()])
# model.summary()

In [8]:
# model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=50, verbose=1);

In [9]:
import joblib 

# joblib.dump(model, 'models/lstm_USDCAD.pkl') 

In [10]:
model = joblib.load('models/lstm_USDCAD.pkl')  
model.predict(X_test).reshape(-1,)[:5] 

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 171ms/step


array([0.7865837, 0.7948355, 0.7786328, 0.8213531, 0.8165957],
      dtype=float32)

In [11]:
y_test[:5]

array([0.78782288, 0.77349685, 0.81256783, 0.81446712, 0.81718038])

In [12]:
from sklearn.metrics import mean_squared_error
import math 

train_predict = scaler.inverse_transform(model.predict(X_train))
test_predict = scaler.inverse_transform(model.predict(X_test))

train_error = math.sqrt(mean_squared_error(y_train, train_predict))
test_error = math.sqrt(mean_squared_error(y_test, test_predict))

train_error, test_error # (0.7723888049452278, 0.4897638825407242)

[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


(0.7717199243276479, 0.48929205858189695)