In [1]:
import pickle
import numpy as np
import pandas as pd
from datetime import datetime
import os

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import keras_tuner as kt
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping

from sklearn import metrics
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder

In [2]:
with open(r'.\News_Dataset_Splits\X_train.pkl', 'rb') as f:
    X_train = pickle.load(f)

with open(r'.\News_Dataset_Splits\X_val.pkl', 'rb') as f:
    X_val = pickle.load(f)

with open(r'.\News_Dataset_Splits\X_test.pkl', 'rb') as f:
    X_test = pickle.load(f)

with open(r'.\News_Dataset_Splits\y_train.pkl', 'rb') as f:
    y_train = pickle.load(f)

with open(r'.\News_Dataset_Splits\y_val.pkl', 'rb') as f:
    y_val = pickle.load(f)

with open(r'.\News_Dataset_Splits\y_test.pkl', 'rb') as f:
    y_test = pickle.load(f)

encoder = LabelEncoder()
encoder.fit(np.unique(y_train))
train_labels = encoder.transform(y_train)
val_labels = encoder.transform(y_val)
test_labels = encoder.transform(y_test)
num_classes = len(encoder.classes_)
train_one_hot = keras.utils.to_categorical(train_labels, num_classes=num_classes)
val_one_hot = keras.utils.to_categorical(val_labels, num_classes=num_classes)
test_one_hot = keras.utils.to_categorical(test_labels, num_classes=num_classes)

with open(r'.\embeddingMatrix_News.pkl', 'rb') as f:
    embedding_matrix = pickle.load(f)

num_tokens = len(embedding_matrix) # total vocabulary +1 or length of embedding matrix
embedding_dim = 300 # dimension of the vector of a single word
MAX_NEWS_LEN = 500 # maximum words in a review

In [3]:
def build_model(hp, min_layers, max_layers, test_optimizers, test_initializers, test_regularizer, regularizer_choice, test_learning_rate,
                test_activations, use_BatchNormalization, use_Dropout, filters_min_value, filters_max_value, filters_step, same_filters):
    embedding_layer = keras.layers.Embedding(
        num_tokens,
        embedding_dim,
        embeddings_initializer=keras.initializers.Constant(embedding_matrix),
        input_length=MAX_NEWS_LEN,
        trainable=True)
    
    model = keras.Sequential()
    model.add(embedding_layer)

    num_layers = hp.Int('num_layers', min_layers, max_layers)

    if test_activations:
            activation = hp.Choice(f'activation', ['softplus', 'softsign', 'relu', 'tanh'])
    else:
        activation = 'relu'  # Default activation

    if test_initializers:
        kernel_initializer = hp.Choice(f'kernel_initializer', ['glorot_uniform', 'he_uniform', 'random_uniform'])
    else:
        kernel_initializer = 'he_uniform'

    if test_regularizer:
        if regularizer_choice == 'l1':
            kernel_regularizer = keras.regularizers.L1(l1=hp.Choice('l1_factor', [1e-4, 1e-2]))
        elif regularizer_choice == 'l2':
            kernel_regularizer = keras.regularizers.L2(l2=hp.Choice('l2_factor', [1e-4, 1e-2]))
        elif regularizer_choice == 'l1_l2':
            kernel_regularizer = keras.regularizers.L1L2(l1=hp.Choice('l1_l2_l1_factor', [1e-4, 1e-2]),
                                    l2=hp.Choice('l1_l2_l2_factor', [1e-4, 1e-2]))
    else:
        kernel_regularizer = None

    # Hyperparameters for the number of layers
    for i in range(num_layers):
        if same_filters:
            filters = filters_min_value
        else:
            filters = hp.Int(f'filters_{i}', min_value=filters_min_value, max_value=filters_max_value, step=filters_step)

        model.add(layers.Conv1D(
            filters=filters,
            kernel_size=5,
            activation=activation,
            kernel_initializer=kernel_initializer,
            kernel_regularizer=kernel_regularizer,
            padding='same'
        ))

        if use_BatchNormalization and hp.Boolean(f'batch_norm_{i}'):
            model.add(layers.BatchNormalization())

        model.add(layers.Dropout(rate=0.2))

        if use_Dropout:
            model.add(layers.Dropout(rate=hp.Choice(f'dropout_rate_{i}', [0.0, 0.2, 0.4])))

        model.add(layers.MaxPooling1D(pool_size=2))

    model.add(layers.GlobalAveragePooling1D())
    model.add(layers.Dense(num_classes, activation='softmax'))

    # Optimizer
    if test_optimizers:
        optimizer = hp.Choice('optimizer', ['SGD', 'RMSprop', 'Adam', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam'])
    else:
        if test_learning_rate:
            learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4, 1e-5])
        else:
            learning_rate = 0.001
        optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [4]:
NUM_EPOCHS = 12
BATCH_SIZE = 128

now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
directory = f'KerasTuner_Logs/CNN/CNN_V2_3Layer_Optimizer_{now}'

# Callbacks
tensorboard = TensorBoard(log_dir=f'TensorBoard_Logs/CNN/CNN_V2_3Layer_Optimizer_{now}')
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)

tuner = kt.GridSearch(
    lambda hp: build_model(hp, min_layers=3, max_layers=3, test_optimizers=True, test_initializers=False, test_regularizer=False, 
                           regularizer_choice='l1', test_learning_rate=False, test_activations=False, use_BatchNormalization=False, 
                           use_Dropout=False, filters_min_value=128, filters_max_value=128, filters_step=128, same_filters=True),
    objective=kt.Objective("val_loss", direction="min"),
    max_trials=None,
    executions_per_trial=1,
    directory=directory,
    project_name='Reviews_Classification')

In [21]:
def build_model_pd(hp, test_optimizers, test_initializers, test_regularizer, regularizer_choice, test_learning_rate,
                test_activations, use_BatchNormalization, use_Dropout):
    embedding_layer = keras.layers.Embedding(
        num_tokens,
        embedding_dim,
        embeddings_initializer=keras.initializers.Constant(embedding_matrix),
        input_length=MAX_NEWS_LEN,
        trainable=True)
    
    model = keras.Sequential()
    model.add(embedding_layer)

    if test_activations:
            activation = hp.Choice(f'activation', ['softplus', 'softsign', 'relu', 'tanh'])
    else:
        activation = 'softplus'  # Default activation

    if test_initializers:
        kernel_initializer = hp.Choice(f'kernel_initializer', ['glorot_uniform', 'he_uniform', 'random_uniform'])
    else:
        kernel_initializer = 'random_uniform'

    if test_regularizer:
        if regularizer_choice == 'l1':
            kernel_regularizer = keras.regularizers.L1(l1=hp.Choice('l1_factor', [1e-4, 1e-2]))
        elif regularizer_choice == 'l2':
            kernel_regularizer = keras.regularizers.L2(l2=hp.Choice('l2_factor', [1e-4, 1e-2]))
        elif regularizer_choice == 'l1_l2':
            kernel_regularizer = keras.regularizers.L1L2(l1=hp.Choice('l1_l2_l1_factor', [1e-4, 1e-2]),
                                    l2=hp.Choice('l1_l2_l2_factor', [1e-4, 1e-2]))
    else:
        kernel_regularizer = None

    model.add(layers.Conv1D(filters=128, kernel_size=5, activation=activation,
            kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, padding='same'))
    if use_BatchNormalization and hp.Boolean(f'batch_norm'):
            model.add(layers.BatchNormalization())
    if use_Dropout:
            model.add(layers.Dropout(rate=hp.Choice(f'dropout_rate_', [0.0, 0.2, 0.4])))
    model.add(layers.MaxPooling1D(pool_size=2))
    
    model.add(layers.GlobalAveragePooling1D())
    model.add(layers.Dense(num_classes, activation='softmax'))

    # Optimizer
    if test_optimizers:
        optimizer = hp.Choice('optimizer', ['SGD', 'RMSprop', 'Adam', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam'])
    else:
        if test_learning_rate:
            learning_rate = hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4, 1e-5])
        else:
            learning_rate = 0.001
        optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [22]:
NUM_EPOCHS = 12
BATCH_SIZE = 128

now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
directory = f'KerasTuner_Logs/CNN/CNN_V7_1Layer_Dropout_{now}'

# Callbacks
tensorboard = TensorBoard(log_dir=f'TensorBoard_Logs/CNN/CNN_V7_1Layer_Dropout_{now}')
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)

tuner = kt.GridSearch(
    lambda hp: build_model_pd(hp, test_optimizers=False, test_initializers=False, test_regularizer=False, 
                           regularizer_choice='l1_l2', test_learning_rate=False, test_activations=False, use_BatchNormalization=False, use_Dropout=True),
    objective=kt.Objective("val_loss", direction="min"),
    max_trials=None,
    executions_per_trial=1,
    directory=directory,
    project_name='Reviews_Classification')

In [23]:
tuner.search(x=X_train,
             y=train_one_hot,
             verbose=1,
             epochs=NUM_EPOCHS,
             batch_size=BATCH_SIZE,
             callbacks=[tensorboard, early_stopping],
             validation_data=(X_val, val_one_hot))

Trial 3 Complete [00h 04m 51s]
val_loss: 0.6041333079338074

Best val_loss So Far: 0.5988964438438416
Total elapsed time: 00h 14m 29s


In [24]:
tuner.get_best_hyperparameters()[0].values

{'dropout_rate_': 0.2}

In [7]:
tuner.get_best_models()[0].summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 250, 300)          16058400  
                                                                 
 global_average_pooling1d (G  (None, 300)              0         
 lobalAveragePooling1D)                                          
                                                                 
 dense (Dense)               (None, 128)               38528     
                                                                 
 dense_1 (Dense)             (None, 5)                 645       
                                                                 
Total params: 16,097,573
Trainable params: 16,097,573
Non-trainable params: 0
_________________________________________________________________


In [8]:
tuner.results_summary()

Results summary
Results in dir_2024-07-05_11-38-27\Reviews_Classification
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 0001 summary
Hyperparameters:
units: 85
num_layers: 1
optimizer: Adam
Score: 1.103798508644104

Trial 0003 summary
Hyperparameters:
units: 85
num_layers: 2
optimizer: Adam
Score: 1.1182894706726074

Trial 0000 summary
Hyperparameters:
units: 85
num_layers: 1
optimizer: RMSprop
Score: 1.1338261365890503

Trial 0002 summary
Hyperparameters:
units: 85
num_layers: 2
optimizer: RMSprop
Score: 1.1509822607040405
