In [30]:
import pickle
import numpy as np
import pandas as pd
from datetime import datetime
import os

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import keras_tuner as kt
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping

from sklearn import metrics
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder

In [31]:
with open(r'.\News_Dataset_Splits\X_train.pkl', 'rb') as f:
    X_train = pickle.load(f)

with open(r'.\News_Dataset_Splits\X_val.pkl', 'rb') as f:
    X_val = pickle.load(f)

with open(r'.\News_Dataset_Splits\X_test.pkl', 'rb') as f:
    X_test = pickle.load(f)

with open(r'.\News_Dataset_Splits\y_train.pkl', 'rb') as f:
    y_train = pickle.load(f)

with open(r'.\News_Dataset_Splits\y_val.pkl', 'rb') as f:
    y_val = pickle.load(f)

with open(r'.\News_Dataset_Splits\y_test.pkl', 'rb') as f:
    y_test = pickle.load(f)

encoder = LabelEncoder()
encoder.fit(np.unique(y_train))
train_labels = encoder.transform(y_train)
val_labels = encoder.transform(y_val)
test_labels = encoder.transform(y_test)
num_classes = len(encoder.classes_)
train_one_hot = keras.utils.to_categorical(train_labels, num_classes=num_classes)
val_one_hot = keras.utils.to_categorical(val_labels, num_classes=num_classes)
test_one_hot = keras.utils.to_categorical(test_labels, num_classes=num_classes)

with open(r'.\embeddingMatrix_News.pkl', 'rb') as f:
    embedding_matrix = pickle.load(f)

num_tokens = len(embedding_matrix) # total vocabulary +1 or length of embedding matrix
embedding_dim = 300 # dimension of the vector of a single word
MAX_NEWS_LEN = 500 # maximum words in a review

In [32]:
def build_model(hp, max_layers, test_optimizers, test_initializers, test_regularizer, test_activations, use_BatchNormalization, 
                use_Dropout, units_min_value, units_max_value, units_step, same_units=False):
    embedding_layer = keras.layers.Embedding(
        num_tokens,
        embedding_dim,
        embeddings_initializer=keras.initializers.Constant(embedding_matrix),
        input_length=MAX_NEWS_LEN,
        trainable=True)
    
    model = keras.Sequential()
    model.add(embedding_layer)
    model.add(keras.layers.GlobalAveragePooling1D())

    num_layers = hp.Int('num_layers', 1, max_layers)

    # Determine the number of units for each layer
    if same_units:
        units = hp.Int('units', min_value=units_min_value, max_value=units_max_value, step=units_step)

    # Hyperparameters for the number of layers
    for i in range(num_layers):
        if same_units:
            current_units = units
        else:
            current_units = hp.Int(f'units_{i}', min_value=units_min_value, max_value=units_max_value, step=units_step)

        if test_activations:
            activation = hp.Choice(f'activation_{i}', ['softmax', 'elu', 'selu', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'exponential', 'linear'])
        else:
            activation = 'relu'  # Default activation

        if test_initializers:
            kernel_initializer = hp.Choice(f'kernel_initializer_{i}', ['glorot_uniform', 'he_uniform', 'lecun_uniform'])
        else:
            kernel_initializer = 'glorot_uniform'
        
        if test_regularizer:
            kernel_regularizer = hp.Choice(f'kernel_regularizer_{i}', [None, tf.keras.regularizers.l1(), tf.keras.regularizers.l2(), tf.keras.regularizers.l1_l2()])
        else:
            kernel_regularizer = None

        model.add(layers.Dense(
            units=current_units,
            activation=activation,
            kernel_initializer=kernel_initializer,
            kernel_regularizer=kernel_regularizer
        ))

        if use_BatchNormalization and hp.Boolean(f'batch_norm_{i}'):
            model.add(layers.BatchNormalization())

        if use_Dropout and hp.Boolean(f'dropout_{i}'):
            model.add(layers.Dropout(rate=hp.Float(f'dropout_rate_{i}', 0.1, 0.5, step=0.1)))

    # Output Layer
    model.add(layers.Dense(num_classes, activation='softmax'))

    # Optimizer
    if test_optimizers:
        optimizer = hp.Choice('optimizer', ['SGD', 'RMSprop', 'Adam', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam'])
    else:
        optimizer = 'adam'

    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [42]:
def build_model_s(hp, test_optimizers):
    embedding_layer = keras.layers.Embedding(
        num_tokens,
        embedding_dim,
        embeddings_initializer=keras.initializers.Constant(embedding_matrix),
        input_length=MAX_NEWS_LEN,
        trainable=True)
    
    model = keras.Sequential()
    model.add(embedding_layer)
    model.add(keras.layers.GlobalAveragePooling1D())

    model.add(layers.Dense(units=64, activation='relu', kernel_initializer='glorot_uniform'))
    model.add(layers.Dense(units=64, activation='relu', kernel_initializer='glorot_uniform'))
    model.add(layers.Dense(units=96, activation='relu', kernel_initializer='glorot_uniform'))

    # Output Layer
    model.add(layers.Dense(num_classes, activation='softmax'))

    # Optimizer
    if test_optimizers:
        optimizer = hp.Choice('optimizer', ['SGD', 'RMSprop', 'Adam', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam'])
    else:
        optimizer = 'adam'

    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [43]:
NUM_EPOCHS = 15
BATCH_SIZE = 128

now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
directory = f'KerasTuner_Logs/FNN_V2_3Layer_Optimizer_{now}'

# Callbacks
tensorboard = TensorBoard(log_dir=f'TensorBoard_Logs/FNN_V2_3Layer_Optimizer_{now}')
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

tuner = kt.GridSearch(
    lambda hp: build_model_s(hp, test_optimizers=True),
    objective=kt.Objective("val_loss", direction="min"),
    max_trials=None,
    executions_per_trial=1,
    directory=directory,
    project_name='Reviews_Classification')

In [33]:
NUM_EPOCHS = 15
BATCH_SIZE = 128

now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
directory = f'KerasTuner_Logs/FNN_V2_1Layer_Optimizer_{now}'

# Callbacks
tensorboard = TensorBoard(log_dir=f'TensorBoard_Logs/FNN_V2_1Layer_Optimizer_{now}')
'''model_checkpoint = ModelCheckpoint(
    filepath=f"SavedModels/{directory}/best_model.h5",
    monitor='val_loss',
    save_best_only=True,
    mode='min',
    verbose=1
)'''
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

tuner = kt.GridSearch(
    lambda hp: build_model(hp, max_layers=1, test_optimizers=True, test_initializers=False, test_regularizer=False, test_activations=False,
                           use_BatchNormalization=False, use_Dropout=False, units_min_value=128, units_max_value=128, units_step=128, same_units=True),
    objective=kt.Objective("val_loss", direction="min"),
    max_trials=None,
    executions_per_trial=1,
    directory=directory,
    project_name='Reviews_Classification')

In [44]:
tuner.search(x=X_train,
             y=train_one_hot,
             verbose=1,
             epochs=NUM_EPOCHS,
             batch_size=BATCH_SIZE,
             callbacks=[tensorboard, early_stopping],
             validation_data=(X_val, val_one_hot))

Trial 7 Complete [00h 03m 00s]
val_loss: 0.5447064638137817

Best val_loss So Far: 0.5410788059234619
Total elapsed time: 00h 22m 17s


In [45]:
tuner.get_best_hyperparameters()[0].values

{'optimizer': 'RMSprop'}

In [46]:
tuner.get_best_models()[0].summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 300)          145070400 
                                                                 
 global_average_pooling1d (G  (None, 300)              0         
 lobalAveragePooling1D)                                          
                                                                 
 dense (Dense)               (None, 64)                19264     
                                                                 
 dense_1 (Dense)             (None, 64)                4160      
                                                                 
 dense_2 (Dense)             (None, 96)                6240      
                                                                 
 dense_3 (Dense)             (None, 10)                970       
                                                        

In [8]:
tuner.results_summary()

Results summary
Results in dir_2024-07-05_11-38-27\Reviews_Classification
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 0001 summary
Hyperparameters:
units: 85
num_layers: 1
optimizer: Adam
Score: 1.103798508644104

Trial 0003 summary
Hyperparameters:
units: 85
num_layers: 2
optimizer: Adam
Score: 1.1182894706726074

Trial 0000 summary
Hyperparameters:
units: 85
num_layers: 1
optimizer: RMSprop
Score: 1.1338261365890503

Trial 0002 summary
Hyperparameters:
units: 85
num_layers: 2
optimizer: RMSprop
Score: 1.1509822607040405


In [10]:
'''from keras.models import load_model

# Specify the path to your .h5 model file
model_path = 'SavedModels/dir_2024-07-04_16-19-08/best_model.h5'

# Load the model
s_model = load_model(model_path)
s_model.optimizer.get_config()'''