# Model Training and Evaluation

## Loading Tokenized Data

### Import Libraries

In [1]:
import torch
from torch.utils.data import Dataset
from transformers import BertForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
import pickle
import pandas as pd

### Load Encodings and Labels

In [2]:
# Load training data
with open('../data/train_encodings.pkl', 'rb') as f:
    train_encodings = pickle.load(f)

with open('../data/train_labels.pkl', 'rb') as f:
    train_labels = pickle.load(f)

# Load testing data
with open('../data/test_encodings.pkl', 'rb') as f:
    test_encodings = pickle.load(f)

with open('../data/test_labels.pkl', 'rb') as f:
    test_labels = pickle.load(f)

### Check for GPU

In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f'Using device: {device}')

Using device: cpu


## Data Preparation for Modeling

### Define Custom Dataset Class

In [4]:
class IMDBDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

### Split Training and Validation Sets

In [5]:
from sklearn.model_selection import train_test_split

indices = list(range(len(train_labels)))
train_indices, val_indices = train_test_split(indices, test_size=0.1, random_state=42)

# Create subsets for training and validation
train_subset_encodings = {key: val[train_indices] for key, val in train_encodings.items()}
val_subset_encodings = {key: val[val_indices] for key, val in train_encodings.items()}
train_subset_labels = [train_labels[i] for i in train_indices]
val_subset_labels = [train_labels[i] for i in val_indices]

### Create Dataset Objects

In [6]:
train_dataset = IMDBDataset(train_subset_encodings, train_subset_labels)
val_dataset = IMDBDataset(val_subset_encodings, val_subset_labels)
test_dataset = IMDBDataset(test_encodings, test_labels)

## Model Selection and Hyperparameter Tuning

### Initialize the Model

In [7]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

### Define Evaluation Metrics

In [8]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

### Set Up Hyperparameter Search

In [9]:
from sklearn.model_selection import ParameterGrid

param_grid = {
    'learning_rate': [2e-5, 3e-5, 5e-5],
    'per_device_train_batch_size': [8, 16],
    'num_train_epochs': [2, 3, 4]
}

best_accuracy = 0
best_params = None

## Training the Model with Early Stopping

### Iterate Over Hyperparameters

In [10]:
for params in ParameterGrid(param_grid):
    print(f"Training with params: {params}")

    training_args = TrainingArguments(
        output_dir='./results',
        learning_rate=params['learning_rate'],
        per_device_train_batch_size=params['per_device_train_batch_size'],
        per_device_eval_batch_size=64,
        num_train_epochs=params['num_train_epochs'],
        evaluation_strategy='epoch',
        save_strategy='epoch',
        load_best_model_at_end=True,
        metric_for_best_model='accuracy',
        logging_dir='./logs',
        logging_steps=10,
        weight_decay=0.01,
        fp16=True if torch.cuda.is_available() else False
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
    )

    trainer.train()

    eval_results = trainer.evaluate()
    accuracy = eval_results['eval_accuracy']
    print(f"Validation Accuracy: {accuracy}")

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = params
        # Save the best model
        trainer.save_model('../models/best_model')
        tokenizer.save_pretrained('../models/best_model')

print(f"Best Hyperparameters: {best_params}")
print(f"Best Validation Accuracy: {best_accuracy}")

Training with params: {'learning_rate': 2e-05, 'num_train_epochs': 2, 'per_device_train_batch_size': 8}




  0%|          | 0/5626 [00:00<?, ?it/s]

{'loss': 0.7041, 'grad_norm': 2.6324100494384766, 'learning_rate': 1.996445076430857e-05, 'epoch': 0.0}
{'loss': 0.7291, 'grad_norm': 3.727536678314209, 'learning_rate': 1.9928901528617136e-05, 'epoch': 0.01}
{'loss': 0.7017, 'grad_norm': 5.75235652923584, 'learning_rate': 1.98933522929257e-05, 'epoch': 0.01}
{'loss': 0.7344, 'grad_norm': 9.054277420043945, 'learning_rate': 1.985780305723427e-05, 'epoch': 0.01}
{'loss': 0.6686, 'grad_norm': 6.259936809539795, 'learning_rate': 1.982225382154284e-05, 'epoch': 0.02}
{'loss': 0.675, 'grad_norm': 4.813767433166504, 'learning_rate': 1.9786704585851408e-05, 'epoch': 0.02}
{'loss': 0.6687, 'grad_norm': 3.306215524673462, 'learning_rate': 1.9751155350159973e-05, 'epoch': 0.02}


KeyboardInterrupt: 