# Hyper Parameter Tuning using RandomizedSearchCV

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Paths to the prepped data files
base_path = './data/prepped_data/'  # Adjusted base path to where your files are now
X_train_path = base_path + 'X_train.csv'
X_test_path = base_path + 'X_test.csv'
y_train_path = base_path + 'y_train.csv'
y_test_path = base_path + 'y_test.csv'

# Loading the datasets
X_train = pd.read_csv(X_train_path)
X_test = pd.read_csv(X_test_path)
y_train = pd.read_csv(y_train_path)
y_test = pd.read_csv(y_test_path)

# Assuming the last columns of X_train and X_test are year, month, day, hour for reconstructing the datetime index
X_train['datetime'] = pd.to_datetime(X_train[['year', 'month', 'day', 'hour']])
X_test['datetime'] = pd.to_datetime(X_test[['year', 'month', 'day', 'hour']])
y_train.index = pd.to_datetime(X_train['datetime'])
y_test.index = pd.to_datetime(X_test['datetime'])

# Exclude non-numerical columns before scaling
columns_to_scale = X_train.columns.difference(['year', 'month', 'day', 'hour', 'datetime'])

# Initialize the scaler for the features
scaler_X = MinMaxScaler()

# Scale 'X' features (excluding non-numerical columns)
X_train_scaled = scaler_X.fit_transform(X_train[columns_to_scale])
X_test_scaled = scaler_X.transform(X_test[columns_to_scale])

# Initialize a separate scaler for the target variable
scaler_y = MinMaxScaler()

# Scale 'y' (the target variable)
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1)).flatten()
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1)).flatten()


## Imports

## Random Search

In [None]:
import numpy as np
import math
import optuna
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.metrics import mean_squared_error

# Assuming the following variables are defined and properly prepared:
# X_train_scaled, y_train_scaled, X_test_scaled, y_test_scaled, scaler_y

n_input = 168  # Number of time steps to look back for predictions
n_features = X_train_scaled.shape[1]  # Number of features in the dataset

def objective(trial):
    # Hyperparameters to be tuned
    units = trial.suggest_categorical('units', [50, 100, 150])
    activation = trial.suggest_categorical('activation', ['relu', 'tanh'])
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    
    # Model definition
    model = Sequential([
        LSTM(units=units, activation=activation, input_shape=(n_input, n_features)),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    
    # Fit model using TimeseriesGenerator
    train_generator = TimeseriesGenerator(X_train_scaled, y_train_scaled, length=n_input, batch_size=batch_size)
    validation_generator = TimeseriesGenerator(X_test_scaled, y_test_scaled, length=n_input, batch_size=batch_size)
    model.fit(train_generator, epochs=10, validation_data=validation_generator, verbose=0)
    
    # Evaluate the model
    val_loss = model.evaluate(validation_generator, verbose=0)
    return val_loss

# Optimize
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10, show_progress_bar=True)

# After optimization, print best parameters
best_params = study.best_trial.params
print(f"Best parameters: {best_params}")

# Correct approach to use the best parameters to create a new model
model = Sequential([
    LSTM(units=best_params['units'], activation=best_params['activation'], input_shape=(n_input, n_features)),
    Dense(1)
])
model.compile(optimizer=Adam(learning_rate=best_params['learning_rate']), loss='mse')

# It's suggested to retrain your model here with the entire dataset or a combined train-validation set if applicable
# Adjust batch_size based on best_params if it was part of the optimization
train_generator = TimeseriesGenerator(X_train_scaled, y_train_scaled, length=n_input, batch_size=best_params['batch_size'])
model.fit(train_generator, epochs=10, verbose=1)  # Or more epochs as needed

# Final evaluation, potentially on a separate test set if you have one
validation_generator = TimeseriesGenerator(X_test_scaled, y_test_scaled, length=n_input, batch_size=best_params['batch_size'])
y_pred_scaled = model.predict(validation_generator)
y_test_original = y_test.values.flatten()[:len(y_pred)]  # Adjust length if necessary
y_pred = scaler_y.inverse_transform(y_pred_scaled)

# Calculate RMSE and MAPE
rmse = math.sqrt(mean_squared_error(y_test_original, y_pred))
mape = np.mean(np.abs((y_test_original - y_pred) / y_test_original)) * 100

print(f"RMSE: {rmse}")
print(f"MAPE: {mape}%")


[I 2024-03-18 22:59:25,536] A new study created in memory with name: no-name-caaa2c31-beeb-4d87-a13e-15a8dae8c805


  0%|          | 0/10 [00:00<?, ?it/s]

  super().__init__(**kwargs)
  self._warn_if_super_not_called()


[I 2024-03-18 23:04:50,981] Trial 0 finished with value: 0.008750918321311474 and parameters: {'units': 50, 'activation': 'relu', 'learning_rate': 0.00018385008016090357, 'batch_size': 32}. Best is trial 0 with value: 0.008750918321311474.
[I 2024-03-18 23:09:06,439] Trial 1 finished with value: 0.00778465811163187 and parameters: {'units': 50, 'activation': 'relu', 'learning_rate': 0.0024869512390378475, 'batch_size': 64}. Best is trial 1 with value: 0.00778465811163187.
[I 2024-03-18 23:13:25,929] Trial 2 finished with value: 0.0033894293010234833 and parameters: {'units': 50, 'activation': 'relu', 'learning_rate': 0.0036323905906861884, 'batch_size': 64}. Best is trial 2 with value: 0.0033894293010234833.
[I 2024-03-18 23:28:11,075] Trial 3 finished with value: 0.005231366027146578 and parameters: {'units': 150, 'activation': 'tanh', 'learning_rate': 0.002739119041734605, 'batch_size': 32}. Best is trial 2 with value: 0.0033894293010234833.


In [None]:
import matplotlib.pyplot as plt
y_test_original = y_test.values.flatten()[:len(y_pred)]  # Adjust length if necessary
# Assuming y_test_original and y_pred are correctly aligned with these dates
test_dates_aligned = X_test['datetime'].values[-len(y_pred):]

plt.figure(figsize=(10, 6))
plt.plot(test_dates_aligned, y_test_original, label='Actual', marker='.', linestyle='-', linewidth=1)
plt.plot(test_dates_aligned, y_pred, label='Predicted', alpha=0.7, marker='.', linestyle='--', linewidth=1)
plt.title('Actual vs Predicted')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()

# Improve readability of the date labels
plt.xticks(rotation=45)
plt.tight_layout()

plt.show()
