# Hyper Parameter Tuning using RandomizedSearchCV

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Paths to the prepped data files
base_path = './data/prepped_data/'  # Adjusted base path to where your files are now
X_train_path = base_path + 'X_train.csv'
X_test_path = base_path + 'X_test.csv'
y_train_path = base_path + 'y_train.csv'
y_test_path = base_path + 'y_test.csv'

# Loading the datasets
X_train = pd.read_csv(X_train_path)
X_test = pd.read_csv(X_test_path)
y_train = pd.read_csv(y_train_path)
y_test = pd.read_csv(y_test_path)

# Assuming the last columns of X_train and X_test are year, month, day, hour for reconstructing the datetime index
X_train['datetime'] = pd.to_datetime(X_train[['year', 'month', 'day', 'hour']])
X_test['datetime'] = pd.to_datetime(X_test[['year', 'month', 'day', 'hour']])
y_train.index = pd.to_datetime(X_train['datetime'])
y_test.index = pd.to_datetime(X_test['datetime'])

# Exclude non-numerical columns before scaling
columns_to_scale = X_train.columns.difference(['year', 'month', 'day', 'hour', 'datetime'])

# Initialize the scaler for the features
scaler_X = MinMaxScaler()

# Scale 'X' features (excluding non-numerical columns)
X_train_scaled = scaler_X.fit_transform(X_train[columns_to_scale])
X_test_scaled = scaler_X.transform(X_test[columns_to_scale])

# Initialize a separate scaler for the target variable
scaler_y = MinMaxScaler()

# Scale 'y' (the target variable)
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1)).flatten()
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1)).flatten()


## Imports

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from keras_tuner import RandomSearch




## Get time for one Epoch

In [6]:
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

n_input = X_train_scaled.shape[1]  # Assuming X_train_scaled is a 3D array of shape (samples, time_steps, features)
n_features = X_train_scaled.shape[2]


# Define the model with a typical configuration
def create_test_model():
    model = Sequential()
    model.add(LSTM(units=100, activation='relu', input_shape=(n_input, n_features)))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

model = create_test_model()

# Measure the time for a few epochs
start_time = time.time()

model.fit(
    X_train_scaled, y_train_scaled,
    epochs=2,  # Small number of epochs
    validation_data=(X_test_scaled, y_test_scaled),
    verbose=1
)

elapsed_time = time.time() - start_time
time_per_epoch = elapsed_time / 2  # Since we're running for 2 epochs

print(f"Time per epoch: {time_per_epoch} seconds")


IndexError: tuple index out of range

# The actual run

In [None]:
def build_model(hp):
    model = Sequential()
    model.add(LSTM(
        units=hp.Int('units', min_value=50, max_value=150, step=50),
        activation=hp.Choice('activation', ['relu', 'tanh']),
        input_shape=(n_input, n_features)
    ))
    model.add(Dense(1))
    model.compile(
        optimizer=Adam(hp.Float('learning_rate', min_value=0.0001, max_value=0.01, sampling='log')),
        loss='mse'
    )
    return model

tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=10,
    executions_per_trial=2,
    directory='keras_tuner_dir',
    project_name='rnn_tuning'
)

callbacks = [
    tf.keras.callbacks.EarlyStopping('val_loss', patience=3),
    tf.keras.callbacks.TensorBoard(log_dir='./logs')  # Add TensorBoard callback
]

tuner.search(
    X_train_scaled, y_train_scaled,
    epochs=20,
    validation_data=(X_test_scaled, y_test_scaled),
    callbacks=callbacks
)


best_model = tuner.get_best_models(num_models=1)[0]
best_hyperparameters = tuner.get_best_hyperparameters()[0]

# Evaluate the best model
loss = best_model.evaluate(X_test_scaled, y_test_scaled)
print('Loss:', loss)

# Print best hyperparameters
print('Best hyperparameters:', best_hyperparameters.values)

