In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter

from transformers import AutoTokenizer, AutoModel

# 1. Dataset Preparation (10%)

In [17]:
# Define correct column names
column_names = ["sentence", "label"]

def load_dataset1(train_url, test_url):
    train_df = pd.read_csv(train_url, sep='\t', names=column_names, header=None)
    test_df = pd.read_csv(test_url, sep='\t', names=column_names, header=None)
    return train_df, test_df

# URLs for SST2 dataset
train_url = "https://raw.githubusercontent.com/clairett/pytorch-sentiment-classification/master/data/SST2/train.tsv"
test_url = "https://raw.githubusercontent.com/clairett/pytorch-sentiment-classification/master/data/SST2/test.tsv"

# Load datasets
train_df, test_df = load_dataset1(train_url, test_url)

# Display first few rows to confirm correct loading
print(train_df.head())


# Use the 'sentence' column since SST2 uses it instead of 'text'
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_df['sentence'], train_df['label'], test_size=0.2, random_state=42
)
print("\n")
print(f"Number of training samples: {len(train_texts)}")
print(f"Number of validation samples: {len(val_texts)}")
print(f"Number of testing samples: {len(test_df)}")

                                            sentence  label
0  a stirring , funny and finally transporting re...      1
1  apparently reassembled from the cutting room f...      0
2  they presume their audience wo n't sit still f...      0
3  this is a visually stunning rumination on love...      1
4  jonathan parker 's bartleby should have been t...      1


Number of training samples: 5536
Number of validation samples: 1384
Number of testing samples: 1821


# 2. Construct a Multi-Layer Perceptron (MLP) model. (20%)


In [18]:
import torch
import torch.nn as nn

In [19]:
class MLPClassifier(nn.Module):
    def __init__(self, input_size):
        super(MLPClassifier, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 2)  #output with 2 labels as speciied
        )

    def forward(self, x):
        return self.model(x)

# Example input size (to be updated based on feature extraction method)
input_size = 10000  #based on the provided model architecture. 

# Initialize model
mlp_model_ = MLPClassifier(input_size)
print(mlp_model_)

# Count trainable parameters
print("\n\n############### Parameters ############### ")
total_params = sum(p.numel() for p in mlp_model_.parameters() if p.requires_grad)
print(f"Total Trainable Parameters: {total_params}")


MLPClassifier(
  (model): Sequential(
    (0): Linear(in_features=10000, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=64, bias=True)
    (7): ReLU()
    (8): Linear(in_features=64, out_features=2, bias=True)
  )
)


############### Parameters ############### 
Total Trainable Parameters: 5293122


# 3. Case 1: Implement Bag-of-Words (BoW)

BoW is a text representation technique where a document is converted into a vector based on word frequency, ignoring word order and semantics. Each unique word in the vocabulary becomes a feature, and its value represents the number of times it appears in the document.

In [20]:
from sklearn.feature_extraction.text import CountVectorizer

In [21]:

# Create Bag-of-Words vectorizer
vectorizer = CountVectorizer(max_features=10000)
#text to vector operator, limits vocab to keep the 10k most frequent words 

# Fit and transform the text data
X_train_bow = vectorizer.fit_transform(train_texts).toarray()
X_val_bow = vectorizer.transform(val_texts).toarray()
#vocabulary is learnt 

# Convert to PyTorch tensors
X_train_bow = torch.tensor(X_train_bow, dtype=torch.float32)
X_val_bow = torch.tensor(X_val_bow, dtype=torch.float32)
y_train = torch.tensor(train_labels.values, dtype=torch.long)
y_val = torch.tensor(val_labels.values, dtype=torch.long)
#converting np array to torch tensor 

print(f"BoW Feature Shape: {X_train_bow.shape}")
# (num_samples, vocab_size)

BoW Feature Shape: torch.Size([5536, 10000])


#  Case 2: Implement LLaMA-3.1 Embeddings

In [22]:
# !pip install -U "huggingface_hub[cli]"

In [23]:
# !huggingface-cli login

In [24]:
# Define model name
model_name = "google-bert/bert-base-uncased"

# Load tokenizer and model on CPU
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.pad_token if tokenizer.pad_token else "[PAD]"
bert_model = AutoModel.from_pretrained(model_name).to("cpu")

def get_bert_embeddings(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to("cpu")

    with torch.no_grad():
        outputs = bert_model(**inputs)

    # Mean pooling over the sequence dimension
    return outputs.last_hidden_state.mean(dim=1).cpu().numpy().flatten()

# # Function to get embeddings
# def get_bert_embeddings(texts, batch_size=8):
#     all_embeddings = []
    
#     for i in range(0, len(texts), batch_size):
#         batch_texts = texts[i : i + batch_size]
#         inputs = tokenizer(batch_texts, return_tensors="pt", truncation=True, padding=True).to("cpu")
        
#         with torch.no_grad():
#             outputs = bert_model(**inputs)
        
#         batch_embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
#         all_embeddings.append(batch_embeddings)

#     return np.vstack(all_embeddings)

# Example usage
text = "Implement case 2: Construct a function to use LLaMa-3.1 embeddings."
embedding = get_bert_embeddings(text)
print("Embedding shape:", embedding.shape)  # (1, hidden_size)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Embedding shape: (768,)


# 5. Train the model with 10 epochs and create the best-performing model (checkpoint.pt) on the Dataset 1. (10%)

In [25]:
from torch.utils.data import DataLoader, TensorDataset

## BoW training

In [26]:
# Define DataLoaders
batch_size = 32
train_dataset = TensorDataset(X_train_bow, y_train)
val_dataset = TensorDataset(X_val_bow, y_val)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [27]:
import os

In [28]:
# Create a directory to save models
model_dir = "saved_models_bow"
os.makedirs(model_dir, exist_ok=True)

In [29]:
input_size = 10000
mlp_model_ = MLPClassifier(input_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mlp_model_.parameters(), lr=0.001)

# Training loop
num_epochs = 10
best_acc = 0.0  # Track the best validation accuracy

for epoch in range(num_epochs):
    mlp_model_.train()
    total_loss = 0

    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = mlp_model_(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # Validation phase
    mlp_model_.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = mlp_model_(X_batch)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y_batch.cpu().numpy())

    val_acc = accuracy_score(all_labels, all_preds)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}, Val Accuracy: {val_acc:.4f}")

    # Save best model
    # if val_acc > best_acc:
    #     best_acc = val_acc
    #     torch.save(model.state_dict(), f"checkpoint_BoW_{best_acc}.pt")

    model_path = os.path.join(model_dir, f"MLP_BoW_E{epoch+1}_Acc{val_acc:.4f}.pt")

    # Save best model
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(mlp_model_.state_dict(), model_path)
        print(f"Best model saved: {model_path}")

print(f"\nBest Validation Accuracy: {best_acc:.4f}")

Epoch 1/10, Loss: 98.7630, Val Accuracy: 0.8013
Best model saved: saved_models_bow\MLP_BoW_E1_Acc0.8013.pt
Epoch 2/10, Loss: 36.6062, Val Accuracy: 0.8064
Best model saved: saved_models_bow\MLP_BoW_E2_Acc0.8064.pt
Epoch 3/10, Loss: 7.1429, Val Accuracy: 0.7897
Epoch 4/10, Loss: 1.2432, Val Accuracy: 0.7941
Epoch 5/10, Loss: 0.2780, Val Accuracy: 0.7897
Epoch 6/10, Loss: 0.2761, Val Accuracy: 0.7948
Epoch 7/10, Loss: 0.0547, Val Accuracy: 0.7984
Epoch 8/10, Loss: 0.0069, Val Accuracy: 0.7905
Epoch 9/10, Loss: 0.0014, Val Accuracy: 0.7948
Epoch 10/10, Loss: 0.0006, Val Accuracy: 0.7948

Best Validation Accuracy: 0.8064


## Bert-Base-Uncased Training 

In [30]:
model_dir_two = "saved_models_bert"
os.makedirs(model_dir_two, exist_ok=True)

In [31]:
from tqdm import tqdm

### Making Embeddings 

We are generating text embeddings using a pre-trained BERT model and preparing them for training a machine learning model. First, We load the tokenizer and model, ensuring that a valid padding token is set. Then, We define a function to convert input text into numerical embeddings by tokenizing the text, passing it through the model, and averaging the hidden states. Using `tqdm`, We apply this function to Wer training and validation text datasets while displaying a progress bar. Finally, We convert the generated embeddings and labels into PyTorch tensors and create `DataLoader` objects, which allow efficient batch processing during model training.

In [32]:
# Convert training data to embeddings with progress bar
train_embeddings = [get_bert_embeddings(text) for text in tqdm(train_texts, desc="Processing Train Embeddings")]
val_embeddings = [get_bert_embeddings(text) for text in tqdm(val_texts, desc="Processing Validation Embeddings")]

# Convert lists to NumPy arrays
train_embeddings = np.array(train_embeddings)  # Shape: (num_train_samples, 768)
val_embeddings = np.array(val_embeddings)      # Shape: (num_val_samples, 768)

# Convert labels to tensors
y_train = torch.tensor(train_labels.values, dtype=torch.long)
y_val = torch.tensor(val_labels.values, dtype=torch.long)

# Convert embeddings to PyTorch tensors
X_train = torch.tensor(train_embeddings, dtype=torch.float32)
X_val = torch.tensor(val_embeddings, dtype=torch.float32)

# Create DataLoaders
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

Processing Train Embeddings: 100%|██████████| 5536/5536 [04:25<00:00, 20.86it/s]
Processing Validation Embeddings: 100%|██████████| 1384/1384 [01:06<00:00, 20.80it/s]


In [33]:
# Define the folder path
save_dir = "saved_embeddings_bert"
os.makedirs(save_dir, exist_ok=True)  # Create the folder if it doesn't exist

# Save embeddings
torch.save(X_train, os.path.join(save_dir, 'X_train.pt'))
torch.save(X_val, os.path.join(save_dir, 'X_val.pt'))

In [34]:
print(train_embeddings.shape)  # Should be (num_samples, feature_dim)

(5536, 768)


In [35]:
import time 

In [36]:
# Move model to device
device = "cpu"

# Initialize model
input_size = X_train.shape[-1]  # Ensures correct shape
mlp_model_bert = MLPClassifier(input_size).to(device)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mlp_model_bert.parameters(), lr=0.001)

# Training loop
best_val_acc = 0.0
num_epochs = 10

for epoch in range(num_epochs):
    start_time = time.time()
    mlp_model_bert.train()
    total_loss, correct, total = 0, 0, 0
    
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)  # Move data to device
        optimizer.zero_grad()

        outputs = mlp_model_bert(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = torch.argmax(outputs, dim=1)
        correct += (preds == y_batch).sum().item()
        total += y_batch.size(0)

    train_acc = correct / total
    epoch_time = time.time() - start_time
    print(f"Epoch {epoch+1}/{num_epochs}, Time: {epoch_time:.2f}s, Loss: {total_loss:.4f}, Train Acc: {train_acc:.4f}")

    # Validation step
    mlp_model_bert.eval()
    correct, total = 0, 0
    
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)  # Move data to device
            outputs = mlp_model_bert(X_batch)
            preds = torch.argmax(outputs, dim=1)
            correct += (preds == y_batch).sum().item()
            total += y_batch.size(0)

    val_acc = correct / total
    print(f"Validation Accuracy: {val_acc:.4f}")

    model_path = os.path.join(model_dir_two, f"MLP_BERT_E{epoch+1}_Acc{val_acc:.4f}.pt")

    # Save best model
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(mlp_model_.state_dict(), model_path)
        print(f"Best model saved: {model_path}")


print(f"Best Validation Accuracy: {best_val_acc:.4f}")

Epoch 1/10, Time: 0.73s, Loss: 74.1087, Train Acc: 0.8056
Validation Accuracy: 0.8548
Best model saved: saved_models_bert\MLP_BERT_E1_Acc0.8548.pt
Epoch 2/10, Time: 0.78s, Loss: 58.9040, Train Acc: 0.8542
Validation Accuracy: 0.8598
Best model saved: saved_models_bert\MLP_BERT_E2_Acc0.8598.pt
Epoch 3/10, Time: 0.73s, Loss: 53.5123, Train Acc: 0.8640
Validation Accuracy: 0.8656
Best model saved: saved_models_bert\MLP_BERT_E3_Acc0.8656.pt
Epoch 4/10, Time: 0.93s, Loss: 50.0567, Train Acc: 0.8721
Validation Accuracy: 0.8447
Epoch 5/10, Time: 1.42s, Loss: 47.7584, Train Acc: 0.8855
Validation Accuracy: 0.8483
Epoch 6/10, Time: 1.38s, Loss: 43.0502, Train Acc: 0.8963
Validation Accuracy: 0.8627
Epoch 7/10, Time: 1.34s, Loss: 36.9073, Train Acc: 0.9068
Validation Accuracy: 0.8613
Epoch 8/10, Time: 1.33s, Loss: 32.6718, Train Acc: 0.9209
Validation Accuracy: 0.8663
Best model saved: saved_models_bert\MLP_BERT_E8_Acc0.8663.pt
Epoch 9/10, Time: 1.40s, Loss: 29.5570, Train Acc: 0.9265
Validation

# Have not touched code below this 
Save and Load Checkpoints

# Bag of Words for IMDB

## Load and Preprocess IMDB Dataset (Bag of Words)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer

# Load the IMDB dataset
imdb_url = "https://raw.githubusercontent.com/Ankit152/IMDB-sentiment-analysis/master/IMDB-Dataset.csv"
imdb_df = pd.read_csv(imdb_url)

# Convert sentiment to numerical labels ('positive' -> 1, 'negative' -> 0)
imdb_df['label'] = imdb_df['sentiment'].apply(lambda x: 1 if x.lower() == 'positive' else 0)

# Split dataset (80% training, 20% validation)
imdb_train_df, imdb_val_df = train_test_split(imdb_df, test_size=0.2, random_state=42)

# Extract text and labels
imdb_train_texts = imdb_train_df['review']
imdb_train_labels = imdb_train_df['label']
imdb_val_texts = imdb_val_df['review']
imdb_val_labels = imdb_val_df['label']

# Initialize CountVectorizer for Bag-of-Words representation
vectorizer = CountVectorizer(max_features=10000)  # Limit vocabulary size to 10,000
X_train_imdb = vectorizer.fit_transform(imdb_train_texts).toarray()
X_val_imdb = vectorizer.transform(imdb_val_texts).toarray()

# Convert features and labels to PyTorch tensors
X_train_imdb = torch.tensor(X_train_imdb, dtype=torch.float32)
X_val_imdb = torch.tensor(X_val_imdb, dtype=torch.float32)
y_train_imdb = torch.tensor(imdb_train_labels.values, dtype=torch.long)
y_val_imdb = torch.tensor(imdb_val_labels.values, dtype=torch.long)

print(f"IMDB BoW Training Features Shape: {X_train_imdb.shape}")
print(f"IMDB BoW Validation Features Shape: {X_val_imdb.shape}")


IMDB training samples: 40000
IMDB validation samples: 10000


##  Define the MLP Model

In [48]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model
input_size = 10000  # This should match the feature extraction method (BoW feature size)
model_IMDB_bow = MLPClassifier(input_size).to(device)

# Load the saved model weights
checkpoint_path = "saved_models_bow/MLP_BoW_E2_Acc0.8064.pt"
checkpoint = torch.load(checkpoint_path, map_location=device)  # Load on correct device
model_IMDB_bow.load_state_dict(checkpoint)  # Load weights

# Set model to evaluation mode for inference
model_IMDB_bow.eval()

print("Model successfully loaded!")

Model successfully loaded!


  checkpoint = torch.load(checkpoint_path, map_location=device)  # Load on correct device


## Training Loop

In [49]:
epochs = 10  # Set number of epochs
batch_size = 256  # Mini-batch size for training
num_samples = X_train_imdb.shape[0]

for epoch in range(epochs):
    model_IMDB_bow.train()  # Set to training mode
    total_loss = 0

    for i in range(0, num_samples, batch_size):
        batch_X = X_train_imdb[i:i+batch_size]
        batch_y = y_train_imdb[i:i+batch_size]

        optimizer.zero_grad()  # Reset gradients
        outputs = model_IMDB_bow(batch_X)  # Forward pass
        loss = criterion(outputs, batch_y)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        total_loss += loss.item()

    avg_loss = total_loss / (num_samples // batch_size)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")


Epoch 1/10, Loss: 1.0325
Epoch 2/10, Loss: 1.0325
Epoch 3/10, Loss: 1.0325
Epoch 4/10, Loss: 1.0325
Epoch 5/10, Loss: 1.0325
Epoch 6/10, Loss: 1.0325
Epoch 7/10, Loss: 1.0325
Epoch 8/10, Loss: 1.0325
Epoch 9/10, Loss: 1.0325
Epoch 10/10, Loss: 1.0325


## Compute Validation Loss and Accuracy

In [50]:
model_IMDB_bow.eval()  # Set to evaluation mode
total_val_loss = 0
correct_predictions = 0
num_val_samples = X_val_imdb.shape[0]

with torch.no_grad():
    for i in range(0, num_val_samples, batch_size):
        batch_X_val = X_val_imdb[i:i+batch_size]
        batch_y_val = y_val_imdb[i:i+batch_size]

        val_outputs = model_IMDB_bow(batch_X_val)  # Forward pass
        val_loss = criterion(val_outputs, batch_y_val)  # Compute loss
        total_val_loss += val_loss.item()

        val_predictions = torch.argmax(val_outputs, dim=1)
        correct_predictions += (val_predictions == batch_y_val).sum().item()

# Compute Average Validation Loss
avg_val_loss = total_val_loss / (num_val_samples // batch_size)

# Compute Validation Accuracy
val_accuracy = correct_predictions / num_val_samples

print(f"Validation Loss: {avg_val_loss:.4f}")
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")


Validation Loss: 1.0254
Validation Accuracy: 77.89%


# Bert Embedding for IMDB 

## Convert IMDB Text to BERT Embeddings

In [53]:
sample_size = 200
imdb_train_sample = imdb_train_texts[:sample_size]
imdb_val_sample = imdb_val_texts[:sample_size]

X_train_imdb_bert = get_bert_embeddings(imdb_train_sample)
X_val_imdb_bert = get_bert_embeddings(imdb_val_sample)


ValueError: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).

In [51]:
# Convert IMDB training and validation texts into embeddings
X_train_imdb_bert = np.array([get_bert_embeddings(text) for text in imdb_train_texts])
X_val_imdb_bert = np.array([get_bert_embeddings(text) for text in imdb_val_texts])

# Convert to PyTorch tensors
X_train_imdb_bert = torch.tensor(X_train_imdb_bert, dtype=torch.float32)
X_val_imdb_bert = torch.tensor(X_val_imdb_bert, dtype=torch.float32)
y_train_imdb = torch.tensor(imdb_train_labels.values, dtype=torch.long)
y_val_imdb = torch.tensor(imdb_val_labels.values, dtype=torch.long)

print(f"IMDB BERT Training Features Shape: {X_train_imdb_bert.shape}")
print(f"IMDB BERT Validation Features Shape: {X_val_imdb_bert.shape}")


KeyboardInterrupt: 

In [None]:
# Save BERT embeddings for training and validation sets
np.save("IMDB_train_BERT_embeddings.npy", X_train_imdb_bert.numpy())
np.save("IMDB_val_BERT_embeddings.npy", X_val_imdb_bert.numpy())

# Save labels as well
np.save("IMDB_train_labels.npy", y_train_imdb.numpy())
np.save("IMDB_val_labels.npy", y_val_imdb.numpy())

print("BERT embeddings and labels saved successfully!")

## Define the MLP Model

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model
input_size = 10000  # This should match the feature extraction method (BoW feature size)
model_IMDB_bow = MLPClassifier(input_size).to(device)

# Load the saved model weights
checkpoint_path = "saved_models_bert\MLP_BERT_E9_Acc0.8692.pt"
checkpoint = torch.load(checkpoint_path, map_location=device)  # Load on correct device
model_IMDB_bow.load_state_dict(checkpoint)  # Load weights

# Set model to evaluation mode for inference
model_IMDB_bow.eval()

print("Model successfully loaded!")

##  Train the MLP Model on BERT Embeddings

In [None]:
# Define model for BERT embeddings
input_size_bert = X_train_imdb_bert.shape[1]  # Adjust input size based on BERT embeddings
model_IMDB_bert = MLPClassifier(input_size_bert)

# Define optimizer and loss function
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_IMDB_bert.parameters(), lr=0.0001)

# Training loop
epochs = 10
batch_size = 32
for epoch in range(epochs):
    model_IMDB_bert.train()
    total_loss = 0
    
    for i in range(0, X_train_imdb_bert.shape[0], batch_size):
        batch_X = X_train_imdb_bert[i:i+batch_size]
        batch_y = y_train_imdb[i:i+batch_size]

        optimizer.zero_grad()
        outputs = model_IMDB_bert(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_train_loss = total_loss / (X_train_imdb_bert.shape[0] // batch_size)
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_train_loss:.4f}")


##  Compute Validation Loss and Accuracy

In [None]:
# Compute validation loss and accuracy
model_IMDB_bert.eval()
total_val_loss = 0
correct_predictions = 0
num_val_samples = X_val_imdb_bert.shape[0]

with torch.no_grad():
    for i in range(0, num_val_samples, batch_size):
        batch_X_val = X_val_imdb_bert[i:i+batch_size]
        batch_y_val = y_val_imdb[i:i+batch_size]

        val_outputs = model_IMDB_bert(batch_X_val)
        val_loss = criterion(val_outputs, batch_y_val)
        total_val_loss += val_loss.item()

        val_predictions = torch.argmax(val_outputs, dim=1)
        correct_predictions += (val_predictions == batch_y_val).sum().item()

# Compute Average Validation Loss
avg_val_loss = total_val_loss / (num_val_samples // batch_size)

# Compute Validation Accuracy
val_accuracy = correct_predictions / num_val_samples

print(f"Validation Loss (BERT): {avg_val_loss:.4f}")
print(f"Validation Accuracy (BERT): {val_accuracy * 100:.2f}%")


# Visualization (TensorBoard Integration)

In [None]:
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter("runs/text_classification")

for epoch in range(epochs):
    writer.add_scalar("Loss/train", loss.item(), epoch)
    writer.add_scalar("Loss/validation", val_loss.item(), epoch)

writer.close()
print("TensorBoard logs saved.")
