In [1]:

from sklearn.model_selection import ParameterGrid
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
from gensim.models import Word2Vec, KeyedVectors

import pandas as pd
import numpy as np
import logging
import os
import json
from tensorflow.keras.preprocessing.text import tokenizer_from_json

2023-11-10 18:49:57.024750: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Data Preprocessing

1. Load the embedding matrix and tokenizer. Tokenise the data using the tokeniser

2. Convert text data from three different data frames (train_df, dev_df, and test_df) into sequences of integers. Each word in the text is replaced by its corresponding index from the word index created earlier.

3. Set a maximum sequence length (max_seq_length) and then pad the sequences to ensure that they all have the same length. \
    a. Padding is done with zeros (`0.`)

4. Create a label encoder by mapping unique labels in the 'label-coarse' column of the training data frame to integers.

5. Encode the labels for the training, development, and test data sets using this label encoder

In [6]:
# Load the reduced embedding matrix
embedding_matrix = np.load('models/embedding_matrix.npy')
vocab_size, embedding_size = embedding_matrix.shape

# Load saved tokenizer
with open('models/tokenizer.json') as f:
    tokenizer_data = json.load(f)
    tokenizer = tokenizer_from_json(tokenizer_data)

# Load dataset
train_df = pd.read_csv('TREC_dataset/train.csv')
dev_df = pd.read_csv('TREC_dataset/dev.csv')
test_df = pd.read_csv('TREC_dataset/test.csv')

# Convert text data to sequences
train_sequences = tokenizer.texts_to_sequences(train_df['text'])
dev_sequences = tokenizer.texts_to_sequences(dev_df['text'])
test_sequences = tokenizer.texts_to_sequences(test_df['text'])

# Pad sequences to ensure they have the same length
max_seq_length = 40  # Adjust as needed

train_data = pad_sequences(
    train_sequences, maxlen=max_seq_length, padding='post')
dev_data = pad_sequences(dev_sequences, maxlen=max_seq_length, padding='post')
test_data = pad_sequences(
    test_sequences, maxlen=max_seq_length, padding='post')

# Encode labels
label_encoder = {label: i for i, label in enumerate(train_df['label-coarse'].unique())}

train_labels = np.array([label_encoder[label]for label in train_df['label-coarse']])
dev_labels = np.array([label_encoder[label]for label in dev_df['label-coarse']])
test_labels = np.array([label_encoder[label]for label in test_df['label-coarse']])

## Bi-LSTM + CNN [FINALISED]

### Step 1: Define Hyperparameter Grid

1. Define a grid of hyperparameters to search, including:
   - `embedding_size`: Different embedding sizes.
   - `lstm_units`: Different LSTM units.
   - `batch_size`: Different batch sizes.

### Step 2: Logging and Model Directory Setup

2. Create a directory structure to store model logs.
3. Configure logging to track and save results to a log file.

### Step 3: Model Training Loop

In this step, we systematically train multiple models, each with different hyperparameter settings, to identify the best-performing configuration. The process is as follows:

#### Substep 1: Hyperparameter Iteration

4.1. Iterate through the predefined hyperparameter combinations.
   - For each combination, we explore various settings for:
     - `embedding_size`: The size of word embeddings.
     - `lstm_units`: The number of LSTM units in the bidirectional LSTM layer.
     - `batch_size`: The batch size used during training.

#### Substep 2: Model Architecture

4.2. Build the model architecture for the current hyperparameter combination. The architecture includes:
   - Embedding Layer: Converts input sequences into dense vector representations.
   - Bidirectional LSTM: A recurrent layer that captures contextual information bidirectionally.
   - Convolutional Layer: Applies convolutional operations to capture local patterns.
   - Global Max Pooling: Extracts the most relevant information from convolutional outputs.
   - Dropout: Regularization technique to prevent overfitting.
   - Dense Layers: Fully connected layers for classification.
   
#### Substep 3: Model Compilation

4.3. Compile the model with the following configurations:
   - Learning Rate Schedule: Uses an exponential decay schedule to adjust the learning rate.
   - Optimizer: Utilizes the Adam optimizer for gradient descent.
   
#### Substep 4: Early Stopping

4.4. Implement early stopping as a precautionary measure to prevent overfitting during training. Early stopping monitors the loss on the training set and stops training if the loss on the training set does not improve for a specified number of epochs.

#### Substep 5: Class Weights

4.5. Define class weights to address data imbalance issues. Class weights assign higher importance to underrepresented classes during training, helping the model better learn from imbalanced data.

#### Substep 6: Training and Validation

4.6. Train the model on the training dataset with the specified hyperparameters. During training, the model learns to make predictions based on input sequences. Validation is performed on a separate development dataset to assess the model's performance during training.

By systematically exploring different hyperparameter combinations and training models with varying configurations, we aim to identify the best-performing model with the most suitable hyperparameters for the text classification task.


### Step 4: Model Saving

5. After training, evaluate the model on the test set.
6. Save the model in a directory named based on its test accuracy (rounded to four decimal places).
7. Record additional information in a JSON file, including the model summary.
8. Log the saved model path and version.

Summary

This code demonstrates a systematic approach to hyperparameter tuning and model saving for text classification tasks, ensuring reproducibility and easy tracking of model performance.


In [10]:
import time
import json
import os
import io
import tensorflow as tf
from sklearn.model_selection import ParameterGrid
from tensorflow.keras.callbacks import EarlyStopping
import logging
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.layers import Bidirectional, GlobalMaxPooling1D, Dropout, Dense, Embedding, Conv1D, LSTM, BatchNormalization

# Create a directory for model logs
if not os.path.exists('model_logs'):
    os.mkdir('model_logs')

model_folder = 'BiLSTM'

# Create a directory for the current model
model_logs_dir = os.path.join('model_logs', model_folder)
if not os.path.exists(model_logs_dir):
    os.mkdir(model_logs_dir)

# Configure logging to save results to a single log file
log_filepath = os.path.join(model_logs_dir, 'model_log.txt')
logging.basicConfig(filename=log_filepath,
                    level=logging.INFO,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')


class TimeHistory(Callback):
    def on_train_begin(self, logs={}):
        self.times = []

    def on_epoch_begin(self, epoch, logs={}):
        self.epoch_start_time = time.time()

    def on_epoch_end(self, epoch, logs={}):
        self.times.append(time.time() - self.epoch_start_time)


# Define a grid of hyperparameters to search
param_grid = {
    'embedding_size': [300],    # Different embedding sizes
    'lstm_units': [32],         # Different LSTM units
    'batch_size': [128],        # Different batch sizes
}

param_combinations = list(ParameterGrid(param_grid))


# Iterate through the parameter combinations
for params in param_combinations:
    embedding_size = params['embedding_size']
    lstm_units = params['lstm_units']
    batch_size = params['batch_size']

    print(
        f"Testing hyperparameters: Embedding Size={embedding_size}, LSTM Units={lstm_units}, Batch Size={batch_size}")
    
    
    bi_lstm = tf.keras.Sequential([
        Embedding(input_dim=vocab_size, output_dim=embedding_size, input_length=max_seq_length, trainable=False),
        Bidirectional(LSTM(lstm_units, return_sequences=True)),
        Conv1D(128, 5, activation='relu', padding='same'),              # Convolutional layer
        GlobalMaxPooling1D(),                                           # Global Max Pooling
        Dropout(0.2),                                                   # Dropout for regularization
        Dense(128, activation='relu', kernel_regularizer=l2(0.01)),     # Adding L2 regularization
        BatchNormalization(),                                           # Batch normalization layer 
        Dense(5, activation='softmax')
    ])
    
    # Compile the model with a lower initial learning rate and learning rate scheduler
    initial_learning_rate = 0.01
    
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate, decay_steps=1000, decay_rate=0.9, staircase=True)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

    bi_lstm.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Define EarlyStopping callback to prevent overfitting
    early_stopping = EarlyStopping(
        monitor='loss', patience=10, restore_best_weights=True)
    time_callback = TimeHistory()

    # Train the model
    history = bi_lstm.fit(train_data, train_labels, epochs=50,
                        batch_size=batch_size,
                        validation_data=(dev_data, dev_labels),
                          callbacks=[early_stopping, time_callback],
                        )

    # Evaluate the model on the test set
    test_loss, test_accuracy = bi_lstm.evaluate(
        test_data, test_labels, verbose=2)
    
    
    model_path = os.path.join(model_logs_dir, f'model_{round(test_accuracy,4)}')
    
    # Save the entire model
    tf.keras.models.save_model(bi_lstm, model_path)
    
    model_info = {}

    for layer in bi_lstm.layers:
        # Layer name as the key
        layer_name = layer.name
        layer_info = {
            'class_name': layer.__class__.__name__,
            'config': layer.get_config(),  # Gets detailed configuration of the layer
            'number_of_parameters': layer.count_params()
        }
        model_info[layer_name] = layer_info

    # Now add this model_info to your 'info' dictionary
    info = {
        'Model': model_info,  # Detailed model information
        'Hyperparameters': params,
        'Test Loss': test_loss,
        'Test Accuracy': test_accuracy,
        'Epoch Data': epoch_data
    }


    info_path = os.path.join(
        model_path, f'model_info.json')
    with open(info_path, 'w') as info_file:
        json.dump(info, info_file)

    # Logging: You can log the saved model path and version
    print(f"Saved model: {model_path}")
    # Log the results in the same log file
    model_summary = []
    bi_lstm.summary(print_fn=lambda x: model_summary.append(x))
    model_architecture = "\n".join(model_summary)
    logging.info("Model Architecture:\n" + model_architecture)
    logging.info(
        f"Testing hyperparameters: Embedding Size={embedding_size}, LSTM Units={lstm_units}, Batch Size={batch_size}")
    logging.info(f"Test Loss: {test_loss}")
    logging.info(f"Test Accuracy: {test_accuracy}")

Testing hyperparameters: Embedding Size=300, LSTM Units=32, Batch Size=128
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
16/16 - 0s - loss: 1.0724 - accuracy: 0.8580 - 140ms/epoch - 9ms/step
INFO:tensorflow:Assets written to: model_logs/BiLSTM/model_0.858/assets
Saved model: model_logs/BiLSTM/model_0.858


## LSTM + GRU + Attention

In [None]:
# class Attention(tf.keras.layers.Layer):
#     def __init__(self):
#         super(Attention, self).__init__()

#     def build(self, input_shape):
#         self.W_q = self.add_weight("W_q", shape=(
#             input_shape[-1], input_shape[-1]))
#         self.W_k = self.add_weight("W_k", shape=(
#             input_shape[-1], input_shape[-1]))
#         self.W_v = self.add_weight("W_v", shape=(
#             input_shape[-1], input_shape[-1]))

#     def call(self, inputs):
#         Q = tf.matmul(inputs, self.W_q)
#         K = tf.matmul(inputs, self.W_k)
#         V = tf.matmul(inputs, self.W_v)

#         attention_scores = tf.matmul(Q, K, transpose_b=True)
#         attention_scores = tf.nn.softmax(attention_scores, axis=-1)

#         output = tf.matmul(attention_scores, V)
#         return output

# # Define a grid of hyperparameters to search
# param_grid = {
#     'embedding_size': [150],  # Different embedding sizes
#     'lstm_units': [64],      # Different LSTM units
#     'gru_units': [32],        # Different GRU units
#     'batch_size': [64],      # Different batch sizes
# }

# param_combinations = list(ParameterGrid(param_grid))
# print('Param Combinations: ',param_combinations)
# model_folder = 'Ensemble_Model'

# # Create a directory for the current model
# model_logs_dir = os.path.join('model_logs', model_folder)
# if not os.path.exists(model_logs_dir):
#     os.mkdir(model_logs_dir)

# # Configure logging to save results to a single log file
# log_filepath = os.path.join(model_logs_dir, 'model_log.txt')
# logging.basicConfig(filename=log_filepath,
#                     level=logging.INFO,
#                     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# # Iterate through the parameter combinations
# for params in param_combinations:
#     embedding_size = params['embedding_size']
#     lstm_units = params['lstm_units']
#     gru_units = params['gru_units']
#     batch_size = params['batch_size']

    
#     print(
#         f"Testing hyperparameters: Embedding Size={embedding_size}, LSTM Units={lstm_units}, GRU Units={gru_units}, Batch Size={batch_size}")

#     # Define your model with attention
#     # Define your simpler model architecture
#     attention_LGRB = tf.keras.Sequential([
#         tf.keras.layers.Embedding(
#             input_dim=vocab_size,
#             output_dim=embedding_size,
#             input_length=max_seq_length,
#             trainable=False
#         ),
#         # Dropout for the embedding layer
#         tf.keras.layers.SpatialDropout1D(0.2),
#         tf.keras.layers.LSTM(lstm_units, return_sequences=True,
#                              kernel_regularizer=tf.keras.regularizers.l2(0.001)),  # L2 regularization
#         tf.keras.layers.Dropout(0.3),  # Dropout to prevent overfitting
#         tf.keras.layers.GRU(gru_units, return_sequences=True),
#         tf.keras.layers.Dropout(0.3),  # Dropout to prevent overfitting
#         Attention(),
#         tf.keras.layers.GlobalAveragePooling1D(),
#         tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(
#             0.001)),  # L2 regularization
#         tf.keras.layers.Dropout(0.5),
#         tf.keras.layers.Dense(5, activation='softmax')
#     ])

#     # Compile the model with a lower initial learning rate and learning rate scheduler
#     initial_learning_rate = 0.1
#     lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
#         initial_learning_rate, decay_steps=1000, decay_rate=0.9, staircase=True)
#     optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

#     attention_LGRB.compile(optimizer=optimizer,
#                   loss='sparse_categorical_crossentropy', metrics=['accuracy'])

#     # Define EarlyStopping callback to prevent overfitting
#     early_stopping = EarlyStopping(
#         monitor='val_loss', patience=6, restore_best_weights=True)

#     # Train the model
#     history = attention_LGRB.fit(train_data, train_labels, epochs=50,
#                         batch_size=batch_size,
#                         validation_data=(dev_data, dev_labels),)

#     # Evaluate the model on the test se
#     test_loss, test_accuracy = attention_LGRB.evaluate(
#         test_data, test_labels, verbose=2)

#     # Log the results in the same log file
#     model_summary = []
#     attention_LGRB.summary(print_fn=lambda x: model_summary.append(x))
#     model_architecture = "\n".join(model_summary)
#     logging.info("Model Architecture:\n" + model_architecture)
#     logging.info(
#         f"Testing hyperparameters: Embedding Size={embedding_size}, LSTM Units={lstm_units}, GRU Units={gru_units}, Batch Size={batch_size}")
#     logging.info(f"Test Loss: {test_loss}")
#     logging.info(f"Test Accuracy: {test_accuracy}")

## CNN + Attention

In [None]:
# import json
# from tensorflow.keras.layers import Input, Conv1D,BatchNormalization, GlobalMaxPooling1D, Dropout, Dense, Embedding, Concatenate


# class Attention(tf.keras.layers.Layer):
#     def __init__(self, units):
#         super(Attention, self).__init__()
#         self.W1 = tf.keras.layers.Dense(units)
#         self.W2 = tf.keras.layers.Dense(units)
#         self.V = tf.keras.layers.Dense(1)

#     def call(self, features):
#         query_value_attention_score = self.V(tf.nn.tanh(
#             self.W1(features) + self.W2(features)))
#         attention_weights = tf.nn.softmax(query_value_attention_score, axis=1)
#         context_vector = attention_weights * features
#         context_vector = tf.reduce_sum(context_vector, axis=1)

#         return context_vector, attention_weights

# # Updated hyperparameter grid to include depth and width
# param_grid = {
#     'embedding_size': [100],     # Embedding sizes
#     'conv_depth': [1],          # Number of Conv1D layers (Depth)
#     'conv_width': [128],    # Number of filters in each Conv1D layer (Width)
#     'kernel_size': [5],         # Kernel sizes
#     'batch_size': [64],      # Batch sizes
#     'attention_units': [64]
# }


# param_combinations = list(ParameterGrid(param_grid))
# print('Param Combinations:')
# for p in param_combinations:
#     print(p)

# model_folder = 'CNN'

# # Create a directory for the current model
# model_logs_dir = os.path.join('model_logs', model_folder)
# if not os.path.exists(model_logs_dir):
#     os.mkdir(model_logs_dir)

# # Configure logging to save results to a single log file
# log_filepath = os.path.join(model_logs_dir, 'model_log.txt')
# logging.basicConfig(filename=log_filepath,
#                     level=logging.INFO,
#                     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# # In the model architecture:
# for params in param_combinations:
#     embedding_size = params['embedding_size']
#     conv_depth = params['conv_depth']
#     conv_width = params['conv_width']
#     kernel_size = params['kernel_size']
#     batch_size = params['batch_size']
#     attention_units = params['attention_units']
#     # Log the current hyperparameters
#     print(
#         f"Testing hyperparameters: Embedding Size={embedding_size}, Conv Depth={conv_depth}, Conv Width={conv_width}, Kernel Size={kernel_size}, Batch Size={batch_size}")

#     # -------------------------------------------------------------------------------------
#     # input_layer = tf.keras.layers.Input(shape=(max_seq_length,))
#     # embedding_layer = tf.keras.layers.Embedding(input_dim=vocab_size,
#     #                             output_dim=embedding_size,
#     #                             input_length=max_seq_length,
#     #                             trainable=False)(input_layer)
#     # conv_layer = tf.keras.layers.Conv1D(conv_width, 5, activation='relu')(embedding_layer)
#     # context_vector, attention_weights = Attention(attention_units)(conv_layer)
#     # dense_layer = tf.keras.layers.Dense(64, activation='relu')(context_vector)
#     # dropout_layer = tf.keras.layers.Dropout(0.6)(dense_layer)
#     # output_layer = tf.keras.layers.Dense(5, activation='softmax')(dropout_layer)

#     # conv_model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
#     # -------------------------------------------------------------------------------------
#     input_layer = Input(shape=(max_seq_length,))
#     embedding_layer = Embedding(input_dim=vocab_size,
#                     output_dim=embedding_size,
#                     input_length=max_seq_length,
#                     trainable=False)(input_layer)
#     conv_layer_1 = Conv1D(filters=conv_width, kernel_size=3, activation='relu', padding='same')(embedding_layer)
#     conv_layer_2 = Conv1D(filters=conv_width, kernel_size=4, activation='relu', padding='same')(conv_layer_1)
#     conv_layer_3 = Conv1D(filters=conv_width, kernel_size=5, activation='relu', padding='same')(conv_layer_2)
#     bn_layer_1 = BatchNormalization()(conv_layer_1)
#     bn_layer_2 = BatchNormalization()(conv_layer_2)
#     bn_layer_3 = BatchNormalization()(conv_layer_3)
#     concat_layer = Concatenate()([bn_layer_1, bn_layer_2, bn_layer_3])
#     pooling_layer = GlobalMaxPooling1D()(concat_layer)
#     context_vector, attention_weights = Attention(attention_units)(tf.expand_dims(pooling_layer, 1))
#     dense_layer = Dense(128, activation='relu')(context_vector)
#     dropout_layer = Dropout(0.5)(dense_layer)
#     output_layer = Dense(5, activation='softmax')(dropout_layer)
    
#     conv_model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
    
#     # Compile the model with a lower initial learning rate and learning rate scheduler
#     initial_learning_rate = 0.01
#     lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
#         initial_learning_rate, decay_steps=1000, decay_rate=0.9, staircase=True)
#     optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
   
#     conv_model.compile(optimizer=optimizer,
#                   loss='sparse_categorical_crossentropy', metrics=['accuracy'])

#     # Define EarlyStopping callback to prevent overfitting
#     early_stopping = EarlyStopping(
#         monitor='accuracy', patience=10, restore_best_weights=True)

#     # Train the model
#     history = conv_model.fit(train_data, train_labels, epochs=50,
#                         batch_size=batch_size,
#                         validation_data=(dev_data, dev_labels),
#                         callbacks=[early_stopping],
#                         workers=8,
#                         )

#     # Evaluate the model on the test se
#     test_loss, test_accuracy = conv_model.evaluate(
#         test_data, test_labels, verbose=2)
#     logging.info(
#         f"Testing hyperparameters: Embedding Size={embedding_size}, Conv Depth={conv_depth}, Conv Width={conv_width}, Kernel Size={kernel_size}, Batch Size={batch_size}")
    
#     model_path = os.path.join(model_logs_dir, f'model_{round(test_accuracy,4)}')

#     # Save the entire model
#     tf.keras.models.save_model(conv_model, model_path)

#     # Record additional information in a JSON file
#     info = {
#         'Model':str(conv_model.summary()),
#         'Hyperparameters': params,
#         'Test Loss': test_loss,
#         'Test Accuracy': test_accuracy
#     }

#     info_path = os.path.join(
#         model_path, f'model__info.json')
#     with open(info_path, 'w') as info_file:
#         json.dump(info, info_file)

#     # Logging: You can log the saved model path and version
#     print(f"Saved model: {model_path}")
    
#     # Log the results in the same log file
#     model_summary = []
#     conv_model.summary(print_fn=lambda x: model_summary.append(x))
#     model_architecture = "\n".join(model_summary)
#     logging.info("Model Architecture:\n" + model_architecture)
#     logging.info(f"Test Loss: {test_loss}")
#     logging.info(f"Test Accuracy: {test_accuracy}\n#################################################################################")

## XAI for chosen model explainaibility

In [None]:
import lime
from lime import lime_text
from lime.lime_text import LimeTextExplainer
import tensorflow as tf

def preprocess_text(texts):
    sequences = tokenizer.texts_to_sequences(texts)
    padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(
        sequences, maxlen=max_seq_length, padding='post')
    return padded_sequences


def predict_fn(texts):
    preprocessed_texts = preprocess_text(texts)
    return bi_lstm.predict(preprocessed_texts)


# Create a LimeTextExplainer
explainer = LimeTextExplainer(class_names=list(label_encoder.keys()))

# Choose an instance from your dataset
# Replace with an actual text from your dataset
instance = "What American composer wrote the music for `` West Side Story '' ?"

# Generate an explanation
explanation = explainer.explain_instance(
    instance, predict_fn, num_features=10, top_labels=3)

# Show the explanation for the top class
explanation.show_in_notebook(text=True)

## CNN + LSTM

In [None]:
# # from keras_self_attention import SeqSelfAttention

# # Define a grid of hyperparameters to search
# param_grid = {
#     'embedding_size': [100, 300],
#     'conv_width': [128],
#     'lstm_units': [64],
#     'batch_size': [64],
#     'dense_units': [64, 256],
#     'kernel_size': [5, 7]
# }

# param_combinations = list(ParameterGrid(param_grid))

# print('Param Combinations: ')
# for p in param_combinations:
#     print(p)

# model_folder = 'CNN+LSTM'

# # Create a directory for the current model
# model_logs_dir = os.path.join('model_logs', model_folder)
# if not os.path.exists(model_logs_dir):
#     os.mkdir(model_logs_dir)

# # Configure logging to save results to a single log file
# log_filepath = os.path.join(model_logs_dir, 'model_log.txt')
# logging.basicConfig(filename=log_filepath,
#                     level=logging.INFO,
#                     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# logged_model = False
# # In the model architecture:
# for params in param_combinations:
#     embedding_size = params['embedding_size']
#     conv_width = params['conv_width']
#     lstm_units = params['lstm_units']
#     dense_units = params['dense_units']
#     batch_size = params['batch_size']
#     kernel_size = params['kernel_size']

#     model = tf.keras.Sequential([
#         tf.keras.layers.Embedding(input_dim=vocab_size,
#                                   output_dim=embedding_size,
#                                   input_length=max_seq_length,
#                                   trainable=False),
#         tf.keras.layers.Conv1D(conv_width, kernel_size, activation='relu'),
#         tf.keras.layers.MaxPooling1D(pool_size=2),
#         tf.keras.layers.LSTM(lstm_units, return_sequences=True),
#         tf.keras.layers.GlobalAveragePooling1D(),
#         tf.keras.layers.Dense(dense_units, activation='relu',
#                               kernel_regularizer=tf.keras.regularizers.l2(0.01)),
#         tf.keras.layers.Dropout(0.5),
#         tf.keras.layers.BatchNormalization(),
#         tf.keras.layers.Dense(5, activation='softmax')
#     ])
#     # Log the results in the same log file
#     if not logged_model:
#         model_summary = []
#         model.summary(print_fn=lambda x: model_summary.append(x))
#         model_architecture = "\n".join(model_summary)
#         logging.info("Model Architecture:\n" + model_architecture)
#         logged_model = True
#     # Compile the model with a lower initial learning rate and learning rate scheduler
#     initial_learning_rate = 0.01
#     lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
#         initial_learning_rate, decay_steps=1000, decay_rate=0.9, staircase=True)
#     optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

#     model.compile(optimizer=optimizer,
#                   loss='sparse_categorical_crossentropy', metrics=['accuracy'])

#     # Define EarlyStopping callback to prevent overfitting
#     early_stopping = EarlyStopping(
#         monitor='val_loss', patience=6, restore_best_weights=True)

#     # Train the model
#     history = model.fit(train_data, train_labels, epochs=50,
#                         batch_size=batch_size,
#                         validation_data=(dev_data, dev_labels),
#                         callbacks=[early_stopping])

#     # Evaluate the model on the test se
#     test_loss, test_accuracy = model.evaluate(
#         test_data, test_labels, verbose=2)

   
#     logging.info(
#         f"Testing hyperparameters: Embedding Size={embedding_size}, Conv Width={conv_width}, LSTM Units={lstm_units}, Dense Units={dense_units}, Batch Size={batch_size}")
#     logging.info(f"Test Loss: {test_loss}")
#     logging.info(f"Test Accuracy: {test_accuracy}\n#####################################################################################################################\n")