# Hyperparameters Tuning

In [None]:
import os
import pandas as pd

from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

from src.StockDataProcessor import StockDataProcessor
from src.LongShortTermMemory import LSTMModel

In [None]:
# Configuration
TIME_STEPS = 60
TICKER = "GOOG"
FOLDER_PREFIX = "data/day/"
TRAIN_DATA_SPLIT = 0.65

In [None]:
# --- Data & Folder Setup ---
RUN_FOLDER = f"{FOLDER_PREFIX}{TICKER}/"
WORK_DIR = os.path.abspath(RUN_FOLDER)
CSV_FILE = os.path.join(WORK_DIR, 'data.csv')
PROJECT_FOLDER = WORK_DIR

if not os.path.exists(PROJECT_FOLDER):
    os.makedirs(PROJECT_FOLDER)

In [None]:
# --- Load Data & Determine Dates ---
data = pd.read_csv(CSV_FILE, parse_dates=['Datetime'])
first_date = data['Datetime'].iloc[0]
last_date = data['Datetime'].iloc[-1]

total_rows = len(data)
validation_date_index = int(total_rows * TRAIN_DATA_SPLIT)
validation_date = data['Datetime'].iloc[validation_date_index]

In [None]:
# DATE_FORMAT = "%Y-%m-%d"
# .strftime(DATE_FORMAT)
print('Ticker: ' + TICKER)
print('Start Date: ' + first_date.__str__())
print('Validation Date: ' + validation_date.__str__())
print('End Date: ' + last_date.__str__())
print('Test Run Folder: ' + RUN_FOLDER)

In [None]:
(x_train, y_train), (x_test, y_test), (training_data, test_data) = StockDataProcessor.load_csv_transform_to_numpy(TIME_STEPS, CSV_FILE, validation_date)

In [None]:
lstm = LSTMModel(x_train.shape, 1, 1, PROJECT_FOLDER)

In [None]:
# Define the parameter grid for the search
param_grid = {
    'model__units': [50, 100, 150, 200],
    'model__dropout': [0.1, 0.2, 0.3], # should be < 0.4 as in model creation it multiples by 2.5 and suppose to be < 1.0 
    'model__activation': ['relu', 'tanh', 'sigmoid'],
    'model__optimizer': ['adam', 'rmsprop', 'sgd'],
    # 'model__batch_size': [5, 10, 20, 40],
    # 'model__epochs': [50, 100, 150]
}

# TODO: how to tune epochs and batch_size if they are not part of model creation?

In [None]:
# Create the KerasRegressor
model = KerasRegressor(model=LSTMModel.create, verbose=0)

In [None]:
# Create the GridSearchCV object
# grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, n_jobs=1)  # Use GridSearchCV
# OR
grid = RandomizedSearchCV(estimator=model, param_distributions=param_grid, cv=3, n_jobs=1)  # Use RandomizedSearchCV


In [None]:
# Fit the GridSearchCV to the data
grid_result = grid.fit(x_train, y_train)

In [None]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
# Use the best parameters to create a model for final evaluation
# best_params = grid_result.best_params_
# best_model = create_model(**best_params)  # Use the best parameters to create the model
# best_model.fit(x_train, y_train, epochs=best_params['epochs'], batch_size=best_params['batch_size'])

In [None]:
# Save hyperparameter tuning results
results_df = pd.DataFrame(grid_result.cv_results_)
results_df.to_csv(os.path.join(PROJECT_FOLDER, 'hyperparameter_tuning_results.csv'), index=False)

# Results

**Random first run:**
Best: -0.624649 using {'model__units': 150, 'model__optimizer': 'rmsprop', 'model__dropout': 0.2, 'model__activation': 'tanh'}

**Grid first run:**
Best: 0.417636 using {'model__activation': 'relu', 'model__dropout': 0.2, 'model__optimizer': 'rmsprop', 'model__units': 50}