In [1]:
import pickle
import numpy as np
import pandas as pd
from datetime import datetime
import os

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import keras_tuner as kt
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping

from sklearn import metrics
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder

In [2]:
with open(r'.\News_Dataset_Splits\X_train.pkl', 'rb') as f:
    X_train = pickle.load(f)

with open(r'.\News_Dataset_Splits\X_val.pkl', 'rb') as f:
    X_val = pickle.load(f)

with open(r'.\News_Dataset_Splits\X_test.pkl', 'rb') as f:
    X_test = pickle.load(f)

with open(r'.\News_Dataset_Splits\y_train.pkl', 'rb') as f:
    y_train = pickle.load(f)

with open(r'.\News_Dataset_Splits\y_val.pkl', 'rb') as f:
    y_val = pickle.load(f)

with open(r'.\News_Dataset_Splits\y_test.pkl', 'rb') as f:
    y_test = pickle.load(f)

encoder = LabelEncoder()
encoder.fit(np.unique(y_train))
train_labels = encoder.transform(y_train)
val_labels = encoder.transform(y_val)
test_labels = encoder.transform(y_test)
num_classes = len(encoder.classes_)
train_one_hot = keras.utils.to_categorical(train_labels, num_classes=num_classes)
val_one_hot = keras.utils.to_categorical(val_labels, num_classes=num_classes)
test_one_hot = keras.utils.to_categorical(test_labels, num_classes=num_classes)

with open(r'.\embeddingMatrix_News.pkl', 'rb') as f:
    embedding_matrix = pickle.load(f)

num_tokens = len(embedding_matrix) # total vocabulary +1 or length of embedding matrix
embedding_dim = 300 # dimension of the vector of a single word
MAX_NEWS_LEN = 500 # maximum words in a review

In [15]:
def build_model(hp, max_layers, test_optimizers, test_activations, use_Dropout,
                filters_min_value, filters_max_value, filters_step):
    embedding_layer = keras.layers.Embedding(
        num_tokens,
        embedding_dim,
        embeddings_initializer=keras.initializers.Constant(embedding_matrix),
        input_length=MAX_NEWS_LEN,
        trainable=True)
   
    model = keras.Sequential()
    model.add(embedding_layer)
    
    num_layers = hp.Int('num_layers', 2, max_layers)

    if test_activations:
            activation = hp.Choice(f'activation', ['softplus', 'softsign', 'relu', 'tanh'])
    else:
        activation = 'relu'  # Default activation

    for i in range(num_layers):
        filters = hp.Int(f'filters_{i}', min_value=filters_min_value, max_value=filters_max_value, step=filters_step)

        model.add(layers.Conv1D(
            filters=filters,
            kernel_size=5,
            activation=activation,
            padding='same'
        ))

        if use_Dropout:
            model.add(layers.Dropout(rate=hp.Choice(f'dropout_rate_{i}', [0.0, 0.2])))

        # Add pooling layer
        model.add(layers.MaxPooling1D(pool_size=2))

    model.add(layers.GlobalAveragePooling1D())
    model.add(layers.Dense(num_classes, activation='softmax'))
    
    # Optimizer
    if test_optimizers:
        optimizer = hp.Choice('optimizer', ['SGD', 'RMSprop', 'Adam', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam'])
    else:
        optimizer = 'adam'
    
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [16]:
NUM_EPOCHS = 15
BATCH_SIZE = 128

now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
directory = f'KerasTuner_Logs/CNN/CNN_V1_2Layer_Density_Optimization_{now}'

# Callbacks
tensorboard = TensorBoard(log_dir=f'TensorBoard_Logs/CNN/CNN_V1_2Layer_Density_Optimization_{now}')
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)

tuner = kt.GridSearch(
    lambda hp: build_model(hp, max_layers=2, test_optimizers=False, test_activations=False, use_Dropout=False, 
                           filters_min_value=64, filters_max_value=128, filters_step=32),
    objective=kt.Objective("val_loss", direction="min"),
    max_trials=None,
    executions_per_trial=1,
    directory=directory,
    project_name='Reviews_Classification')

In [17]:
tuner.search(x=X_train,
             y=train_one_hot,
             verbose=1,
             epochs=NUM_EPOCHS,
             batch_size=BATCH_SIZE,
             callbacks=[tensorboard, early_stopping],
             validation_data=(X_val, val_one_hot))

Trial 9 Complete [00h 04m 01s]
val_loss: 0.6305643320083618

Best val_loss So Far: 0.6104480028152466
Total elapsed time: 00h 38m 45s


In [18]:
tuner.get_best_hyperparameters()[0].values

{'num_layers': 2, 'filters_0': 128, 'filters_1': 96}

In [14]:
tuner.get_best_models()[0].summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 300)          145070400 
                                                                 
 conv1d (Conv1D)             (None, 500, 128)          192128    
                                                                 
 max_pooling1d (MaxPooling1D  (None, 250, 128)         0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 250, 128)          82048     
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 125, 128)         0         
 1D)                                                             
                                                                 
 conv1d_2 (Conv1D)           (None, 125, 96)           6

In [8]:
tuner.results_summary()

Results summary
Results in dir_2024-07-05_11-38-27\Reviews_Classification
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 0001 summary
Hyperparameters:
units: 85
num_layers: 1
optimizer: Adam
Score: 1.103798508644104

Trial 0003 summary
Hyperparameters:
units: 85
num_layers: 2
optimizer: Adam
Score: 1.1182894706726074

Trial 0000 summary
Hyperparameters:
units: 85
num_layers: 1
optimizer: RMSprop
Score: 1.1338261365890503

Trial 0002 summary
Hyperparameters:
units: 85
num_layers: 2
optimizer: RMSprop
Score: 1.1509822607040405
