# Training of the 3 most optimal LSTM models

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from keras_tuner.tuners import RandomSearch

from sklearn.metrics import mean_squared_error, r2_score

from tensorflow.keras.models import save_model, load_model

2025-01-24 12:45:54.044272: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-24 12:45:54.046860: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-24 12:45:54.054047: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-24 12:45:54.065336: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-24 12:45:54.068697: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-24 12:45:54.078377: I tensorflow/core/platform/cpu_feature_gu

In [2]:
data = pd.read_csv("../data/processed/scaled_EURUSD_daily.csv", index_col=0, parse_dates=True)
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,MA20,MA50,Rolling_STD,Daily_Return,Volatility,Upper_Band,...,High_Low_Diff,Volatility_Price_Ratio,Momentum,Rolling_Return_5d,MA20_MA50_Interaction,Close_Volatility_Interaction,EMA20,EMA50,MACD,Signal_Line
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-11-23,2.191889,2.257424,2.252358,2.192606,2.266625,1.613936,-0.412957,0.954561,2.01357,2.215342,...,-0.160033,-0.556635,0.550805,1.092093,2.015158,2.658475,2.219578,2.257584,0.092931,0.093239
2014-11-24,2.308272,2.330381,2.31203,2.302185,2.266625,1.613936,-0.412957,0.954561,2.01357,2.215342,...,0.022916,-0.563156,0.550805,1.092093,2.015158,2.689204,2.2302,2.262066,0.128314,0.100489
2014-11-25,2.389062,2.416758,2.384088,2.391431,2.266625,1.613936,-0.412957,0.775757,2.01357,2.215342,...,0.219317,-0.568423,0.550805,1.092093,2.015158,2.714231,2.248461,2.270022,0.187758,0.11847
2014-11-26,2.439155,2.409953,2.424995,2.439066,2.266625,1.613936,-0.412957,0.417363,2.01357,2.215342,...,-0.46405,-0.571217,0.550805,1.092093,2.015158,2.72759,2.2696,2.279614,0.254458,0.146521
2014-11-27,2.350267,2.350416,2.35913,2.35641,2.266625,1.613936,-0.412957,-0.694317,2.01357,2.215342,...,-0.367195,-0.566361,0.550805,1.092093,2.015158,2.70441,2.280713,2.28545,0.285147,0.17525


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2608 entries, 2014-11-23 to 2024-11-22
Data columns (total 33 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Open                          2608 non-null   float64
 1   High                          2608 non-null   float64
 2   Low                           2608 non-null   float64
 3   Close                         2608 non-null   float64
 4   MA20                          2608 non-null   float64
 5   MA50                          2608 non-null   float64
 6   Rolling_STD                   2608 non-null   float64
 7   Daily_Return                  2608 non-null   float64
 8   Volatility                    2608 non-null   float64
 9   Upper_Band                    2608 non-null   float64
 10  Lower_Band                    2608 non-null   float64
 11  Bollinger_Bandwidth           2608 non-null   float64
 12  Lag1_Close                    2608 non-null 

----
## Split data into Training and Testing sets

In [4]:
target = 'Close'
# Use only the "optimal features" selected from Random Forrest feature selection.
selected_features = ['Open', 'High', 'Low']

train_size = int(len(data) * 0.8)

train_data = data.iloc[:train_size]
test_data = data.iloc[train_size:]

X_train, y_train = train_data[selected_features], train_data[target]
X_test, y_test = test_data[selected_features], test_data[target]

print(f"Training set: {X_train.shape}, {y_train.shape}")
print(f"Testing set: {X_test.shape}, {y_test.shape}")

Training set: (2086, 3), (2086,)
Testing set: (522, 3), (522,)


---
## LSTM Architecture with Hyperparameter Tuning

In [5]:
def create_sequences(data, target, window_size):
    """
    Converts data into sliding window sequences for LSTM.

    Args:
        data (pd.DataFrame): Input features.
        target (pd.Series): Target values (e.g., 'Close').
        window_size (int): Number of past observations in each sequence.

    Returns:
        tuple: (X, y) where X is the feature tensor and y is the target vector.
    """
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data.iloc[i:i + window_size].values)
        y.append(target.iloc[i + window_size])
    return np.array(X), np.array(y)

#Set Window size and create sequences
window_size = 50

X_train_seq, y_train_seq = create_sequences(
    pd.DataFrame(X_train, columns=selected_features),
    y_train,
    window_size
)
X_test_seq, y_test_seq = create_sequences(
    pd.DataFrame(X_test, columns=selected_features),
    y_test,
    window_size
)

In [6]:
def build_model(hp):
    model = Sequential()
    model.add(Input(shape=(X_train_seq.shape[1], X_train_seq.shape[2])))
    
    for i in range(hp.Int("num_layers", 1, 3)):
        model.add(LSTM(
            units=hp.Int(f"units_{i}", min_value=32, max_value=128, step=32),
            return_sequences=(i < hp.Int("num_layers", 1, 3) - 1)
        ))
        model.add(Dropout(hp.Float(f"dropout_{i}", 0.1, 0.5, step=0.1)))

    model.add(Dense(1))

    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hp.Float("lr", 1e-4, 1e-2, sampling="log")
        ),
        loss="mean_squared_error",
        metrics=["mae"]
    )
    return model

In [7]:
#Find best Hyperparameters
tuner = RandomSearch(
    build_model,
    objective="val_loss",
    max_trials=20,
    executions_per_trial=2,
    directory="hyperparam_tuning",
    project_name="lstm_close_price_with_window"
)
tuner.search(
    X_train_seq, y_train_seq,
    validation_data=(X_test_seq, y_test_seq),
    epochs=20,
    batch_size=32,
    callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5)],
    verbose=1
)

best_hps = tuner.get_best_hyperparameters(num_trials=3)
for i, hp in enumerate(best_hps):
    print(f"Top {i+1} Hyperparameters: {hp.values}")

Trial 20 Complete [00h 00m 48s]
val_loss: 0.003066711826249957

Best val_loss So Far: 0.002484046504832804
Total elapsed time: 00h 20m 26s
Top 1 Hyperparameters: {'num_layers': 1, 'units_0': 128, 'dropout_0': 0.1, 'lr': 0.0058263756897235135, 'units_1': 64, 'dropout_1': 0.4, 'units_2': 64, 'dropout_2': 0.1}
Top 2 Hyperparameters: {'num_layers': 1, 'units_0': 64, 'dropout_0': 0.2, 'lr': 0.006882586554868881, 'units_1': 128, 'dropout_1': 0.30000000000000004, 'units_2': 96, 'dropout_2': 0.5}
Top 3 Hyperparameters: {'num_layers': 2, 'units_0': 32, 'dropout_0': 0.1, 'lr': 0.004114773318984142, 'units_1': 96, 'dropout_1': 0.1, 'units_2': 64, 'dropout_2': 0.30000000000000004}


---
## Model Evaluation

In [8]:
def build_and_train_model(hp, X_train_seq, y_train_seq, X_test_seq, y_test_seq):
    model = Sequential()

    # Add input layer
    model.add(Input(shape=(X_train_seq.shape[1], X_train_seq.shape[2])))

    # Add LSTM layers based on hyperparameters
    for i in range(hp['num_layers']):
        return_sequences = i < hp['num_layers'] - 1  # True for all but the last layer
        model.add(LSTM(units=hp[f'units_{i}'], return_sequences=return_sequences))
        model.add(Dropout(rate=hp[f'dropout_{i}']))

    # Add output layer
    model.add(Dense(1))

    # Compile the model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp['lr']),
        loss="mean_squared_error"
    )

    # Train the model
    model.fit(X_train_seq, y_train_seq, epochs=20, batch_size=32, verbose=0)

    # Predictions
    y_train_pred = model.predict(X_train_seq)
    y_test_pred = model.predict(X_test_seq)

    # Metrics
    train_mse = mean_squared_error(y_train_seq, y_train_pred)
    test_mse = mean_squared_error(y_test_seq, y_test_pred)
    train_r2 = r2_score(y_train_seq, y_train_pred)
    test_r2 = r2_score(y_test_seq, y_test_pred)

    return {
        "model": model,
        "train_mse": train_mse,
        "test_mse": test_mse,
        "train_r2": train_r2,
        "test_r2": test_r2
    }

# Top hyperparameters
top_hyperparams = [
    {'num_layers': 3, 'units_0': 64, 'dropout_0': 0.1, 'lr': 0.007527178255464862,
     'units_1': 128, 'dropout_1': 0.1, 'units_2': 32, 'dropout_2': 0.1},
    {'num_layers': 2, 'units_0': 64, 'dropout_0': 0.2, 'lr': 0.006641024661942141,
     'units_1': 32, 'dropout_1': 0.2, 'units_2': 64, 'dropout_2': 0.1},
    {'num_layers': 2, 'units_0': 32, 'dropout_0': 0.3, 'lr': 0.009043644296402382,
     'units_1': 64, 'dropout_1': 0.4, 'units_2': 64, 'dropout_2': 0.2}
]

# Evaluate top 3 models
model_results = []
for i, hp in enumerate(top_hyperparams, start=1):
    print(f"Training and evaluating Model {i}...")
    result = build_and_train_model(hp, X_train_seq, y_train_seq, X_test_seq, y_test_seq)
    model_results.append({
        "Model": f"Model_{i}",
        "Train_MSE": result["train_mse"],
        "Test_MSE": result["test_mse"],
        "Train_R²": result["train_r2"],
        "Test_R²": result["test_r2"]
    })

results_df = pd.DataFrame(model_results)
print(results_df)

Training and evaluating Model 1...
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Training and evaluating Model 2...
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Training and evaluating Model 3...
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
     Model  Train_MSE  Test_MSE  Train_R²   Test_R²
0  Model_1   0.005812  0.003992  0.994324  0.953978
1  Model_2   0.006812  0.004811  0.993347  0.944529
2  Model_3   0.005928  0.002840  0.994211  0.967252


----
## Retrain on Full dataset

This maximizes the predictive power of these models before testing them on the unseen dataset in the `model_comparison` notebook.

In [9]:
# Combine train and test sets into a full dataset
X_full = np.concatenate([X_train_seq, X_test_seq], axis=0)
y_full = np.concatenate([y_train_seq, y_test_seq], axis=0)
print(f"Full dataset shape: {X_full.shape}, {y_full.shape}")

Full dataset shape: (2508, 50, 3), (2508,)


In [10]:
# Retrain and save models
best_params_1 = {'num_layers': 3, 'units_0': 64, 'dropout_0': 0.1, 'lr': 0.007527178255464862, 'units_1': 128, 'dropout_1': 0.1, 'units_2': 32, 'dropout_2': 0.1}
best_params_2 = {'num_layers': 2, 'units_0': 64, 'dropout_0': 0.2, 'lr': 0.006641024661942141, 'units_1': 32, 'dropout_1': 0.2, 'units_2': 64, 'dropout_2': 0.1}
best_params_3 = {'num_layers': 2, 'units_0': 32, 'dropout_0': 0.30000000000000004, 'lr': 0.009043644296402382, 'units_1': 64, 'dropout_1': 0.4, 'units_2': 64, 'dropout_2': 0.2}


model_nums = []
for i, params in enumerate([best_params_1, best_params_2, best_params_3], start=1):
    print(f"Training Model {i} with params: {params}")
    
    # Build the model using the selected hyperparameters
    model = Sequential()
    model.add(Input(shape=(X_full.shape[1], X_full.shape[2])))

    # Add LSTM layers based on the params
    for layer in range(params['num_layers']):
        return_sequences = layer < (params['num_layers'] - 1)
        model.add(LSTM(params[f'units_{layer}'], return_sequences=return_sequences))
        model.add(Dropout(params[f'dropout_{layer}']))
    
    model.add(Dense(1))
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=params['lr']),
        loss='mean_squared_error',
        metrics=['mae']
    )
    
    # Train the model on the full dataset
    model.fit(
        X_full, y_full,
        epochs=20,
        batch_size=32,
        verbose=2,
        callbacks=[tf.keras.callbacks.EarlyStopping(monitor="loss", patience=5)]
    )

    # Save the model
    model_path = f"../models/EURUSD_daily/lstm_model_full_{i}.keras"
    model.save(model_path)
    model_nums.append(i)
    print(f"Model {i} saved as '{model_path}'.")

Training Model 1 with params: {'num_layers': 3, 'units_0': 64, 'dropout_0': 0.1, 'lr': 0.007527178255464862, 'units_1': 128, 'dropout_1': 0.1, 'units_2': 32, 'dropout_2': 0.1}
Epoch 1/20
79/79 - 5s - 59ms/step - loss: 0.0854 - mae: 0.2001
Epoch 2/20
79/79 - 3s - 32ms/step - loss: 0.0294 - mae: 0.1283
Epoch 3/20
79/79 - 3s - 32ms/step - loss: 0.0234 - mae: 0.1146
Epoch 4/20
79/79 - 3s - 32ms/step - loss: 0.0182 - mae: 0.0997
Epoch 5/20
79/79 - 3s - 32ms/step - loss: 0.0193 - mae: 0.1026
Epoch 6/20
79/79 - 3s - 33ms/step - loss: 0.0163 - mae: 0.0919
Epoch 7/20
79/79 - 3s - 32ms/step - loss: 0.0132 - mae: 0.0844
Epoch 8/20
79/79 - 3s - 32ms/step - loss: 0.0133 - mae: 0.0841
Epoch 9/20
79/79 - 3s - 32ms/step - loss: 0.0181 - mae: 0.0989
Epoch 10/20
79/79 - 3s - 32ms/step - loss: 0.0142 - mae: 0.0876
Epoch 11/20
79/79 - 3s - 32ms/step - loss: 0.0153 - mae: 0.0873
Epoch 12/20
79/79 - 3s - 32ms/step - loss: 0.0148 - mae: 0.0893
Model 1 saved as '../models/EURUSD_daily/lstm_model_full_1.keras'

In [11]:
# Verify saved models
for i in model_nums:
    model_path = f"../models/EURUSD_daily/lstm_model_full_{i}.h5"
    loaded_model = tf.keras.models.load_model(model_path)
    print(f"Loaded Model {i} successfully from '{model_path}'")



Loaded Model 1 successfully from '../models/EURUSD_daily/lstm_model_full_1.h5'
Loaded Model 2 successfully from '../models/EURUSD_daily/lstm_model_full_2.h5'
Loaded Model 3 successfully from '../models/EURUSD_daily/lstm_model_full_3.h5'
