## Get data

In [1]:
from utils.mt5 import mt5_initialize, get_currency_pair_data

mt5_initialize()

df = get_currency_pair_data('USDKES')
df.tail()

Unnamed: 0_level_0,open,high,low,close,tick_volume,spread
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-09-04,124.68892,127.04924,123.63172,124.26965,257,303579
2024-09-05,124.23585,126.87368,123.10978,126.01266,257,410790
2024-09-06,125.99751,126.93707,125.18213,125.92646,237,411160
2024-09-08,126.39715,126.39715,125.59318,125.59318,20,386925
2024-09-09,125.56655,127.20549,124.11228,127.14406,153,390974


## Preprocess the data

In [2]:
import numpy as np

def preprocess_data(df, days, scaler):
    df = df.copy()
    df['date'] = df.index
    df = df[['close', 'date']]
    df.loc[:, 'close'] = scaler.fit_transform(df[['close']])
    # Create the 'future' column by shifting 'close' by -days
    df['future'] = df['close'].shift(-days)
    df.dropna(inplace=True)
    return df

In [3]:
from collections import deque

def create_features_and_target(df, days, scaler):
    df = preprocess_data(df, days, scaler)
    # Capture the last 'days' number of 'close' values
    last_sequence = np.array(df[['close']].tail(days))
    sequence_data = []
    sequences = deque(maxlen=days)
    
    # Iterate through the DataFrame to create sequences of 'close' values and corresponding targets
    for entry, target in zip(df[['close', 'date']].values, df['future'].values):
        sequences.append(entry)
        if len(sequences) == days:
            sequence_data.append([np.array(sequences), target])
    
    # Prepare the last sequence by combining the most recent sequences with the last 'days' number of 'close' values
    last_sequence = list([s[:len(['close'])] for s in sequences]) + list(last_sequence)
    last_sequence = np.array(last_sequence).astype(np.float32)
    
    X, y = [], []
    for seq, target in sequence_data:
        X.append(seq)
        y.append(target)

    features = np.array(X)
    # Ensure features only include the 'close' values
    features = features[:, :, :len(['close'])].astype(np.float32)
    target = np.array(y)

    return features, target, last_sequence


## Train a model

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
days = 7

features, target, last_sequence = create_features_and_target(df, days, scaler)
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=3)

In [5]:
import tensorflow

def train_model(features, target, epochs=100):
    model = tensorflow.keras.Sequential([
    tensorflow.keras.layers.LSTM(60, return_sequences=True),
    tensorflow.keras.layers.Dropout(0.3),
    tensorflow.keras.layers.LSTM(120, return_sequences=False),
    tensorflow.keras.layers.Dropout(0.3),
    tensorflow.keras.layers.Dense(20),
    tensorflow.keras.layers.Dense(1),
    ])

    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
    model.fit(features, target, epochs=epochs, verbose=False)
    return model

## Predict prices

In [6]:
model = train_model(features, target)

In [7]:
last_sequence = last_sequence[-days:]
predictions = model.predict(last_sequence)
predicted_prices = scaler.inverse_transform(predictions)
"""
open	high	low	close	tick_volume	spread
time						
2024-09-04	124.68892	127.04924	123.63172	124.26965	257	303579
2024-09-05	124.23585	126.87368	123.10978	126.01266	257	410790
2024-09-06	125.99751	126.93707	125.18213	125.92646	237	411160
2024-09-08	126.39715	126.39715	125.59318	125.59318	20	386925
2024-09-09	125.56655	127.20549	124.11228	127.14406	153	390974
"""
predicted_prices

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 684ms/step


array([[124.65404 ],
       [124.873695],
       [124.40623 ],
       [124.914635],
       [125.0846  ],
       [124.90757 ],
       [124.554886]], dtype=float32)