# LSTM model using Optuna

### Optuna is an open-source hyperparameter optimization (HPO) library for Python. It provides a framework for automating the process of finding the best hyperparameters for machine learning models.

#### After the dataset loading and preprocessing comes the model implementation.

In [None]:
# Importing necessary libraries
import tensorflow as tf
import optuna
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import math
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error

In [None]:
# Splitting the data frame into train and test

# Determine the index to split the data
split_index = int(len(df) * 0.8)  # Adjust the split ratio as needed

# Split the data into training and testing sets
train_data = df.iloc[:split_index]
test_data = df.iloc[split_index:]

In [None]:
# Split the training set into X and y
X_train = train_data.drop('target_column', axis=1).values.astype(float) # Convert data types to float if needed
y_train = train_data['target_column'].values

# Split the testing set into X and y
X_test = test_data.drop('target_column', axis=1).values.astype(float) # Convert data types to float if needed
y_test = test_data['target_column'].values

In [None]:
# Define the set of batch_sizes numbers and set of epochs numbers to search 
batch_sizes = [16, 32, 64]
epochs_values = [50, 100, 150]

# Define the objective function for Optuna
def objective(trial):
    # Define other hyperparameters to optimize
    units = trial.suggest_int('units', 32, 256) # Define the range of numbers 
    dropout = trial.suggest_float('dropout', 0.0, 0.5, step=0.1) # Define the range of numbers
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True) # Define the range of numbers
    
    # Initialize variables to keep track of the best hyperparameters and MSE
    best_mse = float('inf')
    best_batch_size = None
    best_epochs = None
    
    # Iterate over the combinations of batch_size and epochs_values
    for batch_size in batch_sizes:
        for epochs in epochs_values:
            # Build the LSTM model with the current hyperparameters
            model = Sequential()
            model.add(LSTM(units, input_shape=(X_train.shape[1], 1)))
            model.add(Dense(1))
            optimizer = Adam(learning_rate=learning_rate)
            model.compile(loss='mean_squared_error', optimizer=optimizer)
            
            # Train the model
            early_stop = EarlyStopping(monitor='val_loss', patience=10)
            model.fit(X_train.reshape(X_train.shape[0], X_train.shape[1], 1),
                      y_train,
                      validation_split=0.2,
                      batch_size=batch_size,
                      epochs=epochs,
                      callbacks=[early_stop],
                      verbose=0)
            
            # Evaluate the model on the test set
            y_pred = model.predict(X_test.reshape(X_test.shape[0], X_test.shape[1], 1))
            mse = np.mean(np.square(y_test - y_pred))

            # Check if the current hyperparameters yield a better MSE
            if mse < best_mse:
                best_mse = mse
                best_batch_size = batch_size
                best_epochs = epochs
                print('Best Batch Size:', best_batch_size)
                print('Best Epochs:', best_epochs)

    return best_mse

# Create and run the Optuna study
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

In [None]:
# Print the best hyperparameters and the corresponding MSE
best_params = study.best_params
best_mse = study.best_value
print('Best Hyperparameters:', best_params)
print('Best MSE:', best_mse)