## Define Lstm Model

In [None]:

# Function to load data for a given time step
def load_data(time_steps):
    dir_path = f'/content/drive/MyDrive/NYSE_Dataset/time_steps_{time_steps}'
    X_train = np.load(os.path.join(dir_path, 'X_train.npy'))
    X_test = np.load(os.path.join(dir_path, 'X_test.npy'))
    y_train = np.load(os.path.join(dir_path, 'y_train.npy'))
    y_test = np.load(os.path.join(dir_path, 'y_test.npy'))
    return X_train, X_test, y_train, y_test

# Function to build the LSTM model
def build_model(units, learning_rate):
    model = Sequential()
    model.add(LSTM(units=units, activation='tanh', return_sequences=False, input_shape=(time_steps, num_features)))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model


## Model Training

In [None]:
# List of time steps
time_steps_list = [1, 2, 5, 10]

# Define the parameter grid
param_grid = {
    'units': [200, 300, 500],
    'learning_rate': [1e-4, 5e-4]
}

# Iterate through each time step, perform grid search, and save the best model
for time_steps in time_steps_list:
    # Load the data
    X_train, X_test, y_train, y_test = load_data(time_steps)

    # Define the number of features (assumed to be same for all time steps)
    num_features = X_train.shape[2]

    best_val_loss = float('inf')
    best_model = None

    # Iterate through the parameter grid
    for params in ParameterGrid(param_grid):
        units = params['units']
        learning_rate = params['learning_rate']

        # Build the model
        model = build_model(units, learning_rate)

        # EarlyStopping callback
        early_stopping = EarlyStopping(
            monitor='val_loss',
            patience=2,
            restore_best_weights=True
        )

        # Train the model
        history = model.fit(
            X_train, y_train,
            epochs=300,
            batch_size=32,
            validation_data=(X_test, y_test),
            callbacks=[early_stopping],
            verbose=1
        )

        # Get the validation loss of the best epoch
        val_loss = min(history.history['val_loss'])

        # Update the best model if the current one is better
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model

    # Save the best model
    best_model.save(f'/content/drive/MyDrive/NYSE_Dataset/best_model_time_steps_{time_steps}.h5')

    print(f"Best model for time_steps={time_steps} saved with val_loss={best_val_loss}.")


  super().__init__(**kwargs)


Epoch 1/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - loss: 0.0084 - val_loss: 2.3540e-05
Epoch 2/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 6.3109e-04 - val_loss: 1.8739e-05
Epoch 3/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 3.6387e-04 - val_loss: 1.5656e-05
Epoch 4/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 2.4534e-04 - val_loss: 2.4485e-05
Epoch 5/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 2.5183e-04 - val_loss: 1.1650e-05
Epoch 6/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 2.0857e-04 - val_loss: 7.7757e-06
Epoch 7/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 1.5782e-04 - val_loss: 5.4087e-06
Epoch 8/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/ste



Best model for time_steps=1 saved with val_loss=2.050065631920006e-06.
Epoch 1/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - loss: 0.0068 - val_loss: 1.1755e-04
Epoch 2/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 7.4764e-04 - val_loss: 4.1012e-04
Epoch 3/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 3.8547e-04 - val_loss: 2.0147e-04
Epoch 1/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - loss: 0.0063 - val_loss: 1.0824e-04
Epoch 2/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 5.4813e-04 - val_loss: 1.3121e-05
Epoch 3/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 2.8223e-04 - val_loss: 1.1223e-05
Epoch 4/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 2.6224e-04 - val_loss: 3.1703e-05
Epoch 5/300
[1m1524



Best model for time_steps=2 saved with val_loss=4.431676188687561e-06.
Epoch 1/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - loss: 0.0126 - val_loss: 0.0011
Epoch 2/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.0011 - val_loss: 4.0325e-04
Epoch 3/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 4.8183e-04 - val_loss: 1.5569e-04
Epoch 4/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 3.3033e-04 - val_loss: 2.4411e-04
Epoch 5/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 2.7187e-04 - val_loss: 4.3242e-05
Epoch 6/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 2.3510e-04 - val_loss: 3.4572e-05
Epoch 7/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 1.7892e-04 - val_loss: 5.5179e-05
Epoch 8/300
[1m1524/152



Best model for time_steps=5 saved with val_loss=4.080453891219804e-06.
Epoch 1/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - loss: 0.0167 - val_loss: 4.4151e-04
Epoch 2/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.0012 - val_loss: 3.5757e-05
Epoch 3/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 5.4788e-04 - val_loss: 2.2008e-05
Epoch 4/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 3.5503e-04 - val_loss: 3.5680e-04
Epoch 5/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 3.0381e-04 - val_loss: 1.1446e-05
Epoch 6/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 2.6118e-04 - val_loss: 1.3684e-05
Epoch 7/300
[1m1524/1524[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 2.1238e-04 - val_loss: 5.4828e-06
Epoch 8/300
[1m1524



Best model for time_steps=10 saved with val_loss=2.1969660792819923e-06.


In [None]:
# Load the data
df_sf = pd.read_csv('/content/drive/MyDrive/NYSE_Dataset/df_sf.csv')

## Evaluate the five model to select the best one among them

In [None]:

# Function to calculate rRMSE
def root_relative_mean_squared_error(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    rrmse = rmse / np.mean(y_true)
    return rrmse

# Function to load data for a given time step
def load_data(time_steps):
    dir_path = f'/content/drive/MyDrive/NYSE_Dataset/time_steps_{time_steps}'
    X_train = np.load(os.path.join(dir_path, 'X_train.npy'))
    X_test = np.load(os.path.join(dir_path, 'X_test.npy'))
    y_train = np.load(os.path.join(dir_path, 'y_train.npy'))
    y_test = np.load(os.path.join(dir_path, 'y_test.npy'))
    return X_train, X_test, y_train, y_test

# List of time steps
time_steps_list = [1, 2, 5, 10]

# Iterate through each time step and visualize actual vs predicted prices
for time_steps in time_steps_list:
    # Load the data
    X_train, X_test, y_train, y_test = load_data(time_steps)

    # Load the best model
    model_path = f'/content/drive/MyDrive/NYSE_Dataset/best_model_time_steps_{time_steps}.h5'
    best_model = load_model(model_path)

    # Make predictions
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    me = np.mean(y_pred - y_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rrmse = root_relative_mean_squared_error(y_test, y_pred)

    # Print metrics
    print(f"Metrics for time_steps={time_steps}:")
    print(f"ME: {me:.2f}%")
    print(f"MAE: {mae:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"rRMSE: {rrmse:.4f}\n")





[1m381/381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step




Metrics for time_steps=1:
ME: -0.00%
MAE: 0.0012
MSE: 0.0000
rRMSE: 0.0171

[1m381/381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Metrics for time_steps=2:
ME: 0.00%
MAE: 0.0016
MSE: 0.0000
rRMSE: 0.0251





[1m381/381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step
Metrics for time_steps=5:
ME: 0.00%
MAE: 0.0016
MSE: 0.0000
rRMSE: 0.0242





[1m381/381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step
Metrics for time_steps=10:
ME: 0.00%
MAE: 0.0012
MSE: 0.0000
rRMSE: 0.0177

