In [None]:
import numpy as np
import tensorflow as tf
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras.callbacks import EarlyStopping
#from tensorflow.keras.callbacks import ModelCheckpoint

# Pre-processing functions
from src.data.data_helper import get_raw_data_as_dataframe
from src.models.preprocessing.preprocessor import SignalPreprocessor
from src.data.data_helper import segement_data

# Model functions
from src.models.LSTM.LSTM import LSTM
from src.models.LSTM_STFT.LSTM_STFT import LSTM_STFT
from src.models.LSTM_STFT_Dense.LSTM_STFT_Dense import LSTM_STFT_Dense

In [None]:
def get_training_data():
    # Bandpass filter parameters
    bandpass_order = 7
    high_freq = 500.0
    low_freq = 20.0
    fs = 5000.0

    raw_data = get_raw_data_as_dataframe()

    # Initialize the preprocessor
    pre_processor = SignalPreprocessor(low_freq=low_freq, high_freq=high_freq, fs=fs, order=bandpass_order)
    # Calibrate the preprocessor
    pre_processor.calibrate(raw_data)

    segmented_data = segement_data(raw_data, window_length=200 * 5, overlap=50 * 5)
    num_classes = segmented_data['label'].nunique()

    y_data = np.array(segmented_data['label'].values)
    y_data = tf.keras.utils.to_categorical(y_data, num_classes=num_classes)

    X_data = np.stack(segmented_data.drop(columns=['label', 'source'])['window_data'].values)
    X_data = pre_processor.batch_pre_process(X_data)

    input_shape = X_data.shape[1]

    return X_data, y_data, num_classes, input_shape

In [None]:
X_data, y_data, num_classes, input_shape = get_training_data()

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
        X_data, y_data, test_size=0.2
    )

In [None]:
import keras_tuner as kt

In [None]:
class LSTMHyperModel(kt.HyperModel):

    def __init__(self, input_shape, num_classes):
        self.input_shape = input_shape
        self.num_classes = num_classes

    # ----------------- build -----------------------------------------
    def build(self, hp):
        lr   = hp.Choice('learning_rate',
                         values=[1e-1, 2e-2, 5e-2, 1e-3, 2e-3, 5e-3])

        opt  = hp.Choice('optimizer',
                         values=['adam', 'rmsprop', 'nadam'])

        norm = hp.Choice('normalization',
                         values=['none', 'batch', 'layer'])

        # We only declare batch_size here; we'll use it in fit().
        hp.Choice('batch_size', values=[32, 64, 128, 256, 512])

        model = LSTM(self.input_shape,
                     self.num_classes,
                     learning_rate=lr,
                     optimizer=opt,
                     normalization=norm).get_model()
        return model

    # ----------------- fit -------------------------------------------
    def fit(self, hp, model, X_train, y_train, X_val, y_val, **kwargs):
        """
        Called by the tuner for every trial.  We inject the per-trial
        batch_size coming from hp.
        """
        batch_size = hp.get('batch_size')
        return model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            batch_size=batch_size,
            epochs=kwargs.get('epochs', 10),
            verbose=kwargs.get('verbose', 2)
        )

In [None]:
stop_early = tf.keras.callbacks.EarlyStopping(
    monitor='val_f1_score',      # metric name Keras assigns: ‘f1_score’
    mode='max',                  # we want to maximise it
    patience=5,
    restore_best_weights=True)

In [None]:
hypermodel = LSTMHyperModel(input_shape, num_classes)

In [None]:
from src.utils.path_utils import get_models_dir

model_dir = get_models_dir() / "LSTM_search"
model_dir

In [None]:
tuner = kt.BayesianOptimization(
    hypermodel,
    objective = kt.Objective("val_f1_score", direction="max"),
    max_trials=5,
    directory=model_dir,
    project_name="baseline_v2",
    overwrite=True
)

In [None]:
tuner.search(X_train, y_train,
             X_val=X_val, y_val=y_val,
             callbacks=[stop_early],
             epochs=30,
             verbose=1)

In [None]:
# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

In [None]:
# Retrieve the best trial’s hyper-parameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete.

Optimal learning rate : {best_hps.get('learning_rate')}
Optimal optimizer      : {best_hps.get('optimizer')}
Optimal normalization  : {best_hps.get('normalization')}
Optimal batch size     : {best_hps.get('batch_size')}
""")