# Test of Keras_tuner
https://keras.io/keras_tuner/

## Library Import

In [1]:
import keras_tuner
from tensorflow import keras as K
import sys
sys.path.insert(0, '../../')
from library.notebook_api.data_loader import  ModelDataLoader

2024-12-01 13:37:40.935111: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Demo in Docs

In [2]:
def build_model(hp):
  model = K.Sequential()
  model.add(K.layers.Dense(
      hp.Choice('units', [8, 16, 32]),
      activation='relu'))
  model.add(K.layers.Dense(1, activation='relu'))
  model.compile(loss='mse')
  return model

In [3]:
tuner = keras_tuner.RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=5)

Reloading Tuner from ./untitled_project/tuner0.json


## Demo using cnn+ lstm
Based on last scenario in CNN_LSTM_MFCC_Classification , define a function for test train data and function for model 

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import LabelEncoder

#data function
def get_numerical_vector_smote_v005_data():
    # Initialize ModelDataLoader with the correct directory path
    model_data_loader = ModelDataLoader(version='005')
    # Reload filtered_df from the original dataset
    filtered_df = model_data_loader.df.copy()

    # Ensure the features column doesn't have any None values
    filtered_df = filtered_df[filtered_df['features'].notnull()]

    # Include only relevant genres (ensure 'classical' is included)
    selected_genres = ['rock', 'electronic', 'hiphop', 'classical', 'jazz', 'country']

    # Filter the DataFrame for the selected genres
    filtered_df = filtered_df[filtered_df['label'].isin(selected_genres)].copy()

    # Preprocess features to ensure consistent shape
    max_length = max(filtered_df['features'].apply(lambda x: len(x)))

    # Pad or truncate features
    def pad_or_truncate(array, max_length):
        if len(array) < max_length:
            # Pad with zeros
            return np.pad(array, (0, max_length - len(array)), mode='constant')
        else:
            # Truncate to max_length
            return array[:max_length]

    # Apply padding/truncating to all features
    filtered_df['features_padded'] = filtered_df['features'].apply(lambda x: pad_or_truncate(x, max_length))

    # Convert to 2D NumPy array
    X = np.array(filtered_df['features_padded'].tolist())

    print(f"Shape of X after padding/truncating: {X.shape}")

    # Extract and encode labels
    y = filtered_df['label'].values
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    # Apply SMOTE for balancing
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y_encoded)

    # Verify the balanced class distribution
    print("Class distribution after SMOTE:")
    unique, counts = np.unique(y_resampled, return_counts=True)
    for label, count in zip(label_encoder.inverse_transform(unique), counts):
        print(f"{label}: {count}")

    # Add a channel dimension for CNN input
    X_resampled = X_resampled[..., np.newaxis]

    print(f"Shape of X_resampled: {X_resampled.shape}")
    print(f"Shape of y_resampled: {y_resampled.shape}")

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)
    return X_train, X_test, y_train, y_test 

In [28]:
#function for returning compiled model, passing hp for hyper paramerter tuning 
def get_cnn_lstm(hp):
    #usedict argument for manually set, otherwise use hp
    if isinstance(hp, dict):
        learning_rate = hp['learning_rate']
    else:
        learning_rate = hp.Choice("learning_rate", values=[0.001, 0.0001])

    # Define the fine-tuned CNN+LSTM model
    model = K.models.Sequential([
        # CNN Layers
        K.layers.Conv1D(128, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])),
        K.layers.BatchNormalization(),
        K.layers.MaxPooling1D(pool_size=2),

        K.layers.Conv1D(256, kernel_size=3, activation='relu'),
        K.layers.BatchNormalization(),
        K.layers.MaxPooling1D(pool_size=2),

        K.layers.Dropout(0.3),

        # LSTM Layers
        K.layers.LSTM(256, return_sequences=True, activation='relu'),
        K.layers.BatchNormalization(),
        K.layers.Dropout(0.4),

        K.layers.LSTM(128, return_sequences=False, activation='relu'),
        K.layers.Dropout(0.4),

        # Fully Connected Layers
        K.layers.Dense(256, activation='relu'),
        K.layers.Dropout(0.5),

        K.layers.Dense(128, activation='relu'),
        K.layers.Dropout(0.3),

        # Output Layer
        K.layers.Dense(6, activation='softmax')  # Adjust for number of classes
    ])
    # Compile the model with a custom learning rate
 
    optimizer = K.optimizers.Adam(learning_rate=learning_rate)  # Initial learning rate
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

@misc{omalley2019kerastuner,
    title        = {KerasTuner},
    author       = {O'Malley, Tom and Bursztein, Elie and Long, James and Chollet, Fran\c{c}ois and Jin, Haifeng and Invernizzi, Luca and others},
    year         = 2019,
    howpublished = {\url{https://github.com/keras-team/keras-tuner}}
}

In [6]:
X_train, X_test, y_train, y_test = get_numerical_vector_smote_v005_data()

Shape of X after padding/truncating: (16582, 15)
Class distribution after SMOTE:
classical: 7261
country: 7261
electronic: 7261
hiphop: 7261
jazz: 7261
rock: 7261
Shape of X_resampled: (43566, 15, 1)
Shape of y_resampled: (43566,)


In [33]:
# Train 
#another reference: https://keras.io/keras_tuner/guides/distributed_tuning/
# and here as well: https://keras.io/keras_tuner/getting_started/

tuner = keras_tuner.RandomSearch(
    get_cnn_lstm,
    objective='val_accuracy',
    max_trials=2,
    executions_per_trial=1,
    overwrite=True,
    directory="keras_tuner",
    project_name= "initial_test_01",
    
    )

tuner.search(
    X_train,
    y_train,
    epochs = 5,
    #steps_per_epoch=600,
    validation_data=(X_test, y_test),
    #validation_steps=100,
    callbacks=[K.callbacks.EarlyStopping("val_accuracy")],
)

best_model = tuner.get_best_models()[0]



Trial 2 Complete [00h 01m 47s]
val_accuracy: 0.4779664874076843

Best val_accuracy So Far: 0.49839338660240173
Total elapsed time: 00h 03m 52s


In [34]:
type(best_model)

keras.src.engine.sequential.Sequential

In [35]:
# Evaluate the model on the test set
test_loss, test_accuracy = best_model.evaluate(X_test, y_test, verbose=2)
print(f"\nTest Accuracy: {test_accuracy * 100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")

273/273 - 2s - loss: 1.2857 - accuracy: 0.4984 - 2s/epoch - 7ms/step

Test Accuracy: 49.84%
Test Loss: 1.2857


In [None]:
#issue with tuner is that by default it is not keeping history, 
#as workaround can train again to get history of new model 
#history = tuner.get_best_models()[0].history

In [36]:
tuner.results_summary()

Results summary
Results in keras_tuner/initial_test_01
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 0 summary
Hyperparameters:
learning_rate: 0.0001
Score: 0.49839338660240173

Trial 1 summary
Hyperparameters:
learning_rate: 0.001
Score: 0.4779664874076843


In [37]:
selected_hp = {'learning_rate':  0.0001}
type(selected_hp)
history = get_cnn_lstm(selected_hp).fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    batch_size=32,
    epochs=50,
    callbacks=[
        K.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
        #ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1),
        #lr_scheduler  # Learning rate scheduler
    ]
)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [32]:
print(history.history)

{'loss': [1.5168441534042358], 'accuracy': [0.37828531861305237], 'val_loss': [1.4481579065322876], 'val_accuracy': [0.3932752013206482]}


In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"\nTest Accuracy: {test_accuracy * 100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")