<a href="https://colab.research.google.com/github/Abeszz/SC4002-NLP-Assignment/blob/main/SC4002.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installation & Requirements :

In [1]:
pip install datasets



In [2]:
pip install nltk



In [3]:
pip install numpy



In [4]:
pip install npm



In [5]:
pip install torch



In [6]:
glove_file_id = '17CUd7jxuh6ptIljKaz_9gJQ8-40JXJ-F'
glove_file = 'glove.6B.100d.txt'
!gdown {glove_file_id} -O {glove_file}

Downloading...
From (original): https://drive.google.com/uc?id=17CUd7jxuh6ptIljKaz_9gJQ8-40JXJ-F
From (redirected): https://drive.google.com/uc?id=17CUd7jxuh6ptIljKaz_9gJQ8-40JXJ-F&confirm=t&uuid=046e18c6-43ca-429d-831f-2048deec21dc
To: /content/glove.6B.100d.txt
100% 347M/347M [00:07<00:00, 45.2MB/s]


In [7]:
import os
import nltk
import sys

env_base_path = sys.prefix
nltk_path = os.path.join(env_base_path, 'nltk_data')
nltk.download('punkt', nltk_path)
nltk.download('punkt_tab', nltk_path)

[nltk_data] Downloading package punkt to /usr/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /usr/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [8]:
from datasets import load_dataset
dataset = load_dataset('rotten_tomatoes')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [9]:
train_dataset = dataset['train']
validation_dataset = dataset['validation']
test_dataset = dataset['test']

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence
import numpy as np
from nltk.tokenize import word_tokenize
from collections import Counter
import csv

In [11]:
# Global variables
UNKNOWN_TOKEN = '<UNKNOWN>'

In [12]:
# Functions to build vocabulary and create embedding matrix
def build_vocabulary(dataset, oov_handling_method='unknown_token'):
    vocab_counter = Counter()
    for sample in dataset:
        tokens = word_tokenize(sample['text'].lower())
        vocab_counter.update(tokens)
    vocabulary = list(vocab_counter.keys())
    if oov_handling_method == 'unknown_token':
        if UNKNOWN_TOKEN not in vocabulary:
            vocabulary.append(UNKNOWN_TOKEN)
    return vocabulary

def create_embedding_matrix(embedding_dim, vocabulary, glove_embeddings, oov_handling_method='unknown_token'):
    vocab_size = len(vocabulary)
    embedding_matrix = np.zeros((vocab_size, embedding_dim))
    word_to_index = {word: idx for idx, word in enumerate(vocabulary)}

    # Initialize special embeddings
    if oov_handling_method == 'unknown_token':
        # Use a single <UNKNOWN> token for all OOV words
        unknown_vector = np.random.normal(scale=0.6, size=(embedding_dim,))
        unknown_index = word_to_index[UNKNOWN_TOKEN]
        embedding_matrix[unknown_index] = unknown_vector

    # Fill the embedding matrix
    for word, idx in word_to_index.items():
        if word in glove_embeddings:
            embedding_matrix[idx] = glove_embeddings[word]
        else:
            if oov_handling_method == 'unknown_token':
                embedding_matrix[idx] = embedding_matrix[unknown_index]
            elif oov_handling_method == 'random':
                embedding_matrix[idx] = np.random.normal(scale=0.6, size=(embedding_dim,))
            elif oov_handling_method == 'none':
                embedding_matrix[idx] = np.zeros(embedding_dim)

    return embedding_matrix



In [13]:
# Function to load GloVe embeddings
def load_glove_embeddings(glove_file_path):
    embeddings_index = {}
    with open(glove_file_path, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.strip().split()
            word = values[0]
            vector = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = vector
    return embeddings_index

In [14]:
# TextDataset class for loading data
class TextDataset(Dataset):
    def __init__(self, dataset, vocabulary, word_to_index, oov_handling_method='unknown_token'):
        self.dataset = dataset
        self.vocabulary = vocabulary
        self.word_to_index = word_to_index
        self.oov_handling_method = oov_handling_method

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        sentence = self.dataset[idx]['text']
        label = self.dataset[idx]['label']
        tokens = word_tokenize(sentence.lower())

        indices = []
        for token in tokens:
            if token in self.word_to_index:
                indices.append(self.word_to_index[token])
            else:
                if self.oov_handling_method == 'unknown_token':
                    indices.append(self.word_to_index[UNKNOWN_TOKEN])
                elif self.oov_handling_method == 'random':
                    # Assign a unique index to each OOV word
                    if token not in self.word_to_index:
                        self.word_to_index[token] = len(self.word_to_index)
                    indices.append(self.word_to_index[token])
                elif self.oov_handling_method == 'none':
                    # Skip the word or handle as desired
                    continue
        return torch.tensor(indices, dtype=torch.long), torch.tensor(label, dtype=torch.long)


In [15]:
# Custom collate functions
def collate_fn(batch):
    inputs, labels = zip(*batch)
    inputs = [torch.tensor(x) for x in inputs]
    padded_inputs = pad_sequence(inputs, batch_first=True, padding_value=0)  # Pad sequences
    labels = torch.stack(labels)
    return padded_inputs, labels

def collate_fn_cnn(batch, max_length=100):
    inputs, labels = zip(*batch)
    inputs = [x[:max_length] if len(x) >= max_length else torch.cat([x, torch.zeros(max_length - len(x), dtype=torch.long)]) for x in inputs]
    inputs = torch.stack(inputs)
    labels = torch.tensor(labels)
    return inputs, labels


In [16]:
# SentimentRNN class
class SentimentRNN(nn.Module):
    def __init__(self, embedding_matrix, hidden_size, output_size,
                 rnn_type="RNN", num_layers=1, use_bidirectional=False,
                 use_dropout=False, use_batch_norm=False, use_layer_norm=False,
                 aggregation_method='last_hidden', freeze_embeddings=True):
        super(SentimentRNN, self).__init__()

        vocab_size, embedding_dim = embedding_matrix.shape

        # Embedding layer using pre-trained embeddings
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = not freeze_embeddings  # Control freezing

        # Choose RNN type dynamically
        if rnn_type == "LSTM":
            self.rnn = nn.LSTM(embedding_dim, hidden_size, num_layers=num_layers,
                               batch_first=True, bidirectional=use_bidirectional)
        elif rnn_type == "GRU":
            self.rnn = nn.GRU(embedding_dim, hidden_size, num_layers=num_layers,
                              batch_first=True, bidirectional=use_bidirectional)
        else:  # Default to Simple RNN
            self.rnn = nn.RNN(embedding_dim, hidden_size, num_layers=num_layers,
                              batch_first=True, bidirectional=use_bidirectional)

        # Store the aggregation method
        self.aggregation_method = aggregation_method

        # Determine the final hidden size after aggregation
        if aggregation_method == 'last_hidden':
            if use_bidirectional:
                final_hidden_size = hidden_size * 2
            else:
                final_hidden_size = hidden_size
        else:
            if use_bidirectional:
                final_hidden_size = hidden_size * 2
            else:
                final_hidden_size = hidden_size

        # Fully connected layer for classification
        self.fc = nn.Linear(final_hidden_size, output_size)

        # Optional Regularization Layers
        self.use_dropout = use_dropout
        self.use_batch_norm = use_batch_norm
        self.use_layer_norm = use_layer_norm

        if self.use_dropout:
            self.dropout = nn.Dropout(0.3)  # Dropout rate of 0.3

        if self.use_batch_norm:
            self.batch_norm = nn.BatchNorm1d(final_hidden_size)

        if self.use_layer_norm:
            self.layer_norm = nn.LayerNorm(final_hidden_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded)

        # For LSTM, hidden is a tuple of (h_n, c_n); use h_n
        if isinstance(hidden, tuple):
            hidden = hidden[0]  # h_n

        if self.aggregation_method == 'last_hidden':
            if self.rnn.bidirectional:
                final_output = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1)
            else:
                final_output = hidden[-1,:,:]
        elif self.aggregation_method == 'mean_pooling':
            final_output = output.mean(dim=1)
        elif self.aggregation_method == 'max_pooling':
            final_output, _ = torch.max(output, dim=1)
        else:
            raise ValueError(f"Unknown aggregation method: {self.aggregation_method}")

        # Apply optional regularization layers
        if self.use_batch_norm:
            final_output = self.batch_norm(final_output)

        if self.use_layer_norm:
            final_output = self.layer_norm(final_output)

        if self.use_dropout:
            final_output = self.dropout(final_output)

        return self.fc(final_output)

In [17]:
# SentimentCNN class
class SentimentCNN(nn.Module):
    def __init__(self, embedding_matrix, output_size, freeze_embeddings=True,
                 num_filters=100, filter_sizes=[3,4,5], dropout_rate=0.5):
        super(SentimentCNN, self).__init__()

        vocab_size, embedding_dim = embedding_matrix.shape

        # Embedding layer using pre-trained embeddings
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = not freeze_embeddings  # Control freezing

        # Convolutional layers with multiple filter sizes
        self.convs = nn.ModuleList([
            nn.Conv2d(in_channels=1, out_channels=num_filters, kernel_size=(fs, embedding_dim))
            for fs in filter_sizes
        ])

        # Fully connected layer
        self.fc = nn.Linear(len(filter_sizes) * num_filters, output_size)

        # Dropout layer
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = self.embedding(x)  # Shape: (batch_size, seq_length, embedding_dim)
        x = x.unsqueeze(1)  # Add channel dimension: (batch_size, 1, seq_length, embedding_dim)

        # Apply convolution and ReLU activation
        conv_outs = [torch.relu(conv(x)).squeeze(3) for conv in self.convs]

        # Apply max pooling over the sequence length
        pooled_outs = [torch.max(conv_out, dim=2)[0] for conv_out in conv_outs]

        # Concatenate pooled outputs
        cat = torch.cat(pooled_outs, dim=1)

        # Apply dropout
        out = self.dropout(cat)

        # Fully connected layer
        out = self.fc(out)
        return out


In [18]:
# Function to get optimizer
def get_optimizer(params, model):
    optimizer_type = params["optimizer_type"]
    lr = params["learning_rate"]
    weight_decay = params.get("weight_decay", 0)  # Default to 0 if not specified

    if optimizer_type == "SGD":
        momentum = params.get("momentum", 0)  # Default to 0 if not specified
        return optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)

    elif optimizer_type == "Adam":
        return optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    else:
        raise ValueError(f"Unknown optimizer type: {optimizer_type}")

In [19]:
# Function to train the model
def train_model(model, train_loader, valid_loader, test_loader, optimizer, epochs, patience, scheduler_step_size, scheduler_gamma, device):
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=scheduler_step_size, gamma=scheduler_gamma)

    train_losses = []
    val_accuracies = []
    test_accuracies = []
    best_val_accuracy = 0
    epochs_no_improve = 0

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        avg_train_loss = running_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        # Validation
        val_accuracy = evaluate_accuracy(model, valid_loader, device)
        val_accuracies.append(val_accuracy)

        print(f'Epoch {epoch+1}, Loss: {avg_train_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

        # Early stopping logic
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            epochs_no_improve = 0
            # Save the best model
            torch.save(model.state_dict(), f'best_model_{epoch+1}.pt')
        else:
            epochs_no_improve += 1

        if epochs_no_improve >= patience:
            print(f"Early stopping at epoch {epoch + 1}")
            break

        scheduler.step()

    # Load the best model before evaluating on test set
    best_epoch = val_accuracies.index(best_val_accuracy) + 1
    model.load_state_dict(torch.load(f'best_model_{best_epoch}.pt'))

    # Test Accuracy
    test_accuracy = evaluate_accuracy(model, test_loader, device)
    test_accuracies = [test_accuracy] * len(train_losses)
    print(f'Test Accuracy: {test_accuracy:.2f}%')

    return train_losses, val_accuracies, test_accuracies

In [20]:
# Function to evaluate accuracy
def evaluate_accuracy(model, data_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy


In [21]:
# Function to run experiments from CSV
def run_experiments_from_csv(params_csv, train_dataset, validation_dataset, test_dataset):
    csv_filename = 'training_results.csv'
    fieldnames = ['Epoch', 'Train Loss', 'Validation Accuracy', 'Test Accuracy',
                  'model_type', 'rnn_type', 'num_layers', 'bidirectional', 'dropout', 'batch_norm', 'layer_norm',
                  'aggregation_method', 'optimizer_type', 'learning_rate', 'momentum', 'weight_decay',
                  'batch_size', 'epochs', 'patience', 'hidden_size', 'output_size',
                  'freeze_embeddings', 'num_filters', 'filter_sizes', 'dropout_rate', 'oov_handling_method',
                  'embedding_dim', 'glove_file_path']

    # Open the CSV file with the configurations
    with open(params_csv, mode='r') as params_file:
        reader = csv.DictReader(params_file)

        # Prepare the results CSV to store the results for each run
        with open(csv_filename, mode='w', newline='') as result_file:
            writer = csv.DictWriter(result_file, fieldnames=fieldnames)
            writer.writeheader()

            for config in reader:
                # Convert necessary parameters to appropriate types
                config['num_layers'] = int(config['num_layers']) if config['num_layers'] != 'N/A' else None
                config['use_bidirectional'] = config['use_bidirectional'] == 'True' if config['use_bidirectional'] != 'N/A' else None
                config['use_dropout'] = config['use_dropout'] == 'True'
                config['use_batch_norm'] = config['use_batch_norm'] == 'True'
                config['use_layer_norm'] = config['use_layer_norm'] == 'True'
                config['batch_size'] = int(config['batch_size'])
                config['epochs'] = int(config['epochs'])
                config['patience'] = int(config['patience'])
                config['hidden_size'] = int(config['hidden_size']) if config['hidden_size'] != 'N/A' else None
                config['output_size'] = int(config['output_size'])
                config['learning_rate'] = float(config['learning_rate'])
                config['momentum'] = float(config['momentum'])
                config['weight_decay'] = float(config['weight_decay'])
                config['freeze_embeddings'] = config['freeze_embeddings'] == 'True'
                config['num_filters'] = int(config['num_filters']) if config['num_filters'] != 'N/A' else None
                config['filter_sizes'] = [int(fs) for fs in config['filter_sizes'].split(',')] if config['filter_sizes'] != 'N/A' else None
                config['dropout_rate'] = float(config['dropout_rate']) if config['dropout_rate'] != 'N/A' else None
                config['aggregation_method'] = config['aggregation_method']
                config['oov_handling_method'] = config['oov_handling_method']
                config['embedding_dim'] = int(config['embedding_dim'])
                config['glove_file_path'] = config['glove_file_path']

                # Load GloVe embeddings
                glove_embeddings = load_glove_embeddings(config['glove_file_path'])

                # Build vocabulary and create embedding matrix
                vocabulary = build_vocabulary(train_dataset, config['oov_handling_method'])
                embedding_matrix = create_embedding_matrix(config['embedding_dim'], vocabulary, glove_embeddings, config['oov_handling_method'])

                # Build word_to_index mapping
                word_to_index = {word: idx for idx, word in enumerate(vocabulary)}

                # Initialize the model based on 'model_type'
                if config['model_type'] == 'RNN':
                    model = SentimentRNN(
                        embedding_matrix=embedding_matrix,
                        hidden_size=config['hidden_size'],
                        output_size=config['output_size'],
                        rnn_type=config['rnn_type'],
                        num_layers=config['num_layers'],
                        use_bidirectional=config['use_bidirectional'],
                        use_dropout=config['use_dropout'],
                        use_batch_norm=config['use_batch_norm'],
                        use_layer_norm=config['use_layer_norm'],
                        aggregation_method=config['aggregation_method'],
                        freeze_embeddings=config['freeze_embeddings']
                    )
                    collate_function = collate_fn
                elif config['model_type'] == 'CNN':
                    model = SentimentCNN(
                        embedding_matrix=embedding_matrix,
                        output_size=config['output_size'],
                        freeze_embeddings=config['freeze_embeddings'],
                        num_filters=config['num_filters'],
                        filter_sizes=config['filter_sizes'],
                        dropout_rate=config['dropout_rate']
                    )
                    collate_function = collate_fn_cnn
                else:
                    raise ValueError(f"Unknown model type: {config['model_type']}")

                # Get the optimizer dynamically based on the config
                optimizer_params = {
                    'optimizer_type': config['optimizer_type'],
                    'learning_rate': config['learning_rate'],
                    'momentum': config['momentum'],
                    'weight_decay': config['weight_decay']
                }
                optimizer = get_optimizer(optimizer_params, model)

                # Create DataLoaders using the appropriate collate function
                train_loader = DataLoader(TextDataset(train_dataset, vocabulary, word_to_index, config['oov_handling_method']), batch_size=config['batch_size'], shuffle=True, collate_fn=collate_function)
                valid_loader = DataLoader(TextDataset(validation_dataset, vocabulary, word_to_index, config['oov_handling_method']), batch_size=config['batch_size'], collate_fn=collate_function)
                test_loader = DataLoader(TextDataset(test_dataset, vocabulary, word_to_index, config['oov_handling_method']), batch_size=config['batch_size'], collate_fn=collate_function)

                device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
                model.to(device)

                # Train the model
                train_loss, val_accuracy, test_accuracy = train_model(
                    model=model,
                    train_loader=train_loader,
                    valid_loader=valid_loader,
                    test_loader=test_loader,
                    optimizer=optimizer,
                    epochs=config['epochs'],
                    patience=config['patience'],
                    scheduler_step_size=3,
                    scheduler_gamma=0.1,
                    device=device
                )

                # Log results to CSV after training completes
                for epoch in range(len(train_loss)):  # Iterate through each epoch's results
                    writer.writerow({
                        'Epoch': epoch + 1,
                        'Train Loss': train_loss[epoch],
                        'Validation Accuracy': val_accuracy[epoch],
                        'Test Accuracy': test_accuracy[epoch],
                        'model_type': config['model_type'],
                        'rnn_type': config['rnn_type'],
                        'num_layers': config['num_layers'],
                        'bidirectional': config['use_bidirectional'],
                        'dropout': config['use_dropout'],
                        'batch_norm': config['use_batch_norm'],
                        'layer_norm': config['use_layer_norm'],
                        'aggregation_method': config['aggregation_method'],
                        'optimizer_type': config['optimizer_type'],
                        'learning_rate': config['learning_rate'],
                        'momentum': config['momentum'],
                        'weight_decay': config['weight_decay'],
                        'batch_size': config['batch_size'],
                        'epochs': config['epochs'],
                        'patience': config['patience'],
                        'hidden_size': config['hidden_size'],
                        'output_size': config['output_size'],
                        'freeze_embeddings': config['freeze_embeddings'],
                        'num_filters': config['num_filters'],
                        'filter_sizes': config['filter_sizes'],
                        'dropout_rate': config['dropout_rate'],
                        'oov_handling_method': config['oov_handling_method'],
                        'embedding_dim': config['embedding_dim'],
                        'glove_file_path': config['glove_file_path']
                    })

    print(f"Experiments completed. Results saved to {csv_filename}.")

In [24]:
# # To download experiment params from gdrive (not used now, use the create csv below for colab env)
# experiment_param_file_id = '1NsO-vQyC_CJjykdd3gbea1CdzHT5pKyY'
# experiment_param_file = 'experiment_params.csv'
# !gdown {experiment_param_file_id} -O {experiment_param_file}

Downloading...
From: https://drive.google.com/uc?id=1NsO-vQyC_CJjykdd3gbea1CdzHT5pKyY
To: /content/experiment_params.csv
  0% 0.00/449 [00:00<?, ?B/s]100% 449/449 [00:00<00:00, 1.48MB/s]


In [32]:
# Function to create CSV in colab environment

import csv

# String data in CSV format
header = [
    "model_type",          # Possible Values: 'RNN', 'CNN'                                  | Meaning: Specifies the type of model to use.                    | Type: str
    "rnn_type",            # Possible Values: 'RNN', 'LSTM', 'GRU'                          | Meaning: The type of recurrent layer to use in the RNN.         | Type: str
    "num_layers",          # Possible Values: Positive integers (e.g., 1, 2)                | Meaning: The number of layers in the RNN.                       | Type: int
    "use_bidirectional",   # Possible Values: True, False                                   | Meaning: Whether to use a bidirectional RNN.                    | Type: bool
    "use_dropout",         # Possible Values: True, False                                   | Meaning: Whether to apply dropout regularization in the model.  | Type: bool
    "use_batch_norm",      # Possible Values: True, False                                   | Meaning: Whether to apply batch normalization.                  | Type: bool
    "use_layer_norm",      # Possible Values: True, False                                   | Meaning: Whether to apply layer normalization.                  | Type: bool
    "aggregation_method",  # Possible Values: 'last_hidden', 'mean_pooling', 'max_pooling'  | Meaning: Method to aggregate the sequence of hidden states.     | Type: str
    "optimizer_type",      # Possible Values: 'SGD', 'Adam'                                 | Meaning: The optimizer to use.                                  | Type: str
    "learning_rate",       # Possible Values: Positive floats (e.g., 0.01)                  | Meaning: Learning rate for the optimizer.                       | Type: float
    "momentum",            # Possible Values: Floats between 0 and 1 (e.g., 0.9)            | Meaning: Momentum factor (only for 'SGD').                      | Type: float
    "weight_decay",        # Possible Values: Non-negative floats (e.g., 0.0001)            | Meaning: Weight decay (L2 regularization).                      | Type: float
    "batch_size",          # Possible Values: Positive integers (e.g., 32)                  | Meaning: Batch size during training.                            | Type: int
    "epochs",              # Possible Values: Positive integers (e.g., 20)                  | Meaning: Maximum number of training epochs.                     | Type: int
    "patience",            # Possible Values: Positive integers (e.g., 3)                   | Meaning: Number of epochs with no improvement before stopping.  | Type: int
    "hidden_size",         # Possible Values: Positive integers (e.g., 128)                 | Meaning: Number of features in the hidden state.                | Type: int
    "output_size",         # Possible Values: Positive integers (e.g., 2)                   | Meaning: Number of output classes.                              | Type: int
    "freeze_embeddings",   # Possible Values: True, False                                   | Meaning: Whether to freeze the embedding layer weights.         | Type: bool
    "num_filters",         # Possible Values: Positive integers (e.g., 100)                 | Meaning: Number of filters in the CNN.                          | Type: int
    "filter_sizes",        # Possible Values: List of positive integers (e.g., [3,4,5])     | Meaning: Sizes of convolutional filters.                        | Type: list of int
    "dropout_rate",        # Possible Values: Floats between 0 and 1 (e.g., 0.5)            | Meaning: Dropout rate after convolutional layers.               | Type: float
    "oov_handling_method", # Possible Values: 'unknown_token', 'random', 'none'             | Meaning: Strategy to handle OOV words.                          | Type: str
    "embedding_dim",       # Possible Values: Positive integers (e.g., 50, 100, 200, 300)   | Meaning: Dimension of word embeddings.                          | Type: int
    "glove_file_path"      # Possible Values: String (file path)                            | Meaning: Path to the GloVe embedding file.                      | Type: str
]


data = [
    [
        "RNN", "RNN", 1, False, False, False, False, "last_hidden", "Adam", 0.01, 0, 0.0001, 32, 100, 3, 128, 2, True, 0, 0, 0, "none", 100, "glove.6B.100d.txt"
    ]
]

# Define the filename
filename = "experiment_params.csv"

# Write the data to a CSV file
with open(filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(header)
    writer.writerows(data)

In [33]:
run_experiments_from_csv(experiment_param_file, train_dataset, validation_dataset, test_dataset)

  inputs = [torch.tensor(x) for x in inputs]


Epoch 1, Loss: 0.7485, Validation Accuracy: 50.00%
Epoch 2, Loss: 0.7225, Validation Accuracy: 50.00%
Epoch 3, Loss: 0.7304, Validation Accuracy: 50.00%
Epoch 4, Loss: 0.6958, Validation Accuracy: 50.09%
Epoch 5, Loss: 0.6977, Validation Accuracy: 50.00%
Epoch 6, Loss: 0.6975, Validation Accuracy: 50.00%
Epoch 7, Loss: 0.6937, Validation Accuracy: 50.09%
Early stopping at epoch 7


  model.load_state_dict(torch.load(f'best_model_{best_epoch}.pt'))


Test Accuracy: 50.00%
Experiments completed. Results saved to training_results.csv.
