In [1]:
import os
import pandas as pd
import torch
import random
import numpy as np
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from tqdm import tqdm
from functools import partial

class MovieReviewsDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        item = self.dataframe.iloc[idx]
        return item['Content'], item['Sentiment']

def collate_fn(batch, tokenizer, device):
    texts, labels = zip(*batch)
    labels = [0 if label == 'Negative' else 1 for label in labels]
    encodings = tokenizer(texts, truncation=True, padding=True, return_tensors='pt')
    return encodings['input_ids'].to(device), encodings['attention_mask'].to(device), torch.tensor(labels).to(device)

def set_seed(seed):
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    np.random.seed(seed)

def train_and_evaluate(model, train_loader, val_loader, device, num_epochs=3, learning_rate=2e-5):
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_epochs * len(train_loader))
    model.to(device)

    best_accuracy = 0  # Initialize the best accuracy to zero
    best_model_path = "best_model.pth"  # Path to save the best model

    for epoch in range(num_epochs):
        model.train()
        for input_ids, attention_mask, labels in tqdm(train_loader, desc=f"Epoch {epoch + 1}"):
            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            scheduler.step()

        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for input_ids, attention_mask, labels in val_loader:
                outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
                logits = outputs.logits
                preds = torch.argmax(logits, dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        acc = accuracy_score(all_labels, all_preds)
        print(f'Epoch {epoch+1} - Validation Accuracy: {acc}')

        if acc > best_accuracy:
            best_accuracy = acc
            torch.save(model.state_dict(), best_model_path)
            print(f"New best model saved with accuracy: {acc}")

def evaluate_model(model, test_loader, device):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for input_ids, attention_mask, labels in test_loader:
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    cm = confusion_matrix(all_labels, all_preds)
    report = classification_report(all_labels, all_preds, target_names=['Negative', 'Positive'])
    print(f'Test Accuracy: {acc}')
    print('Confusion Matrix:')
    print(cm)
    print('Classification Report:')
    print(report)

# Set the seed for reproducibility
seed = 1
set_seed(seed)

# Load the previously saved data splits
train_df = pd.read_csv('movies_reviews_train.csv')
val_df = pd.read_csv('movies_reviews_val.csv')
test_df = pd.read_csv('movies_reviews_test.csv')
train_df = train_df[['Content','Sentiment']]
val_df = val_df[['Content','Sentiment']]
test_df = test_df[['Content','Sentiment']]



# Create datasets
train_indices = train_df.sample(frac=1, random_state=200).index
train_dataset = MovieReviewsDataset(train_df.loc[train_indices])
val_dataset = MovieReviewsDataset(val_df)
test_dataset = MovieReviewsDataset(test_df)

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# DataLoader instances
train_loader = DataLoader(train_dataset, batch_size=8, collate_fn=partial(collate_fn, tokenizer=tokenizer, device=device))
val_loader = DataLoader(val_dataset, batch_size=8, collate_fn=partial(collate_fn, tokenizer=tokenizer, device=device))
test_loader = DataLoader(test_dataset, batch_size=8, collate_fn=partial(collate_fn, tokenizer=tokenizer, device=device))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [2]:
%%time

# Train and save the best model
train_and_evaluate(model, train_loader, val_loader, device, num_epochs=4, learning_rate=2e-5)

Epoch 1: 100%|██████████| 200/200 [02:49<00:00,  1.18it/s]


Epoch 1 - Validation Accuracy: 0.85
New best model saved with accuracy: 0.85


Epoch 2: 100%|██████████| 200/200 [02:49<00:00,  1.18it/s]


Epoch 2 - Validation Accuracy: 0.885
New best model saved with accuracy: 0.885


Epoch 3: 100%|██████████| 200/200 [02:49<00:00,  1.18it/s]


Epoch 3 - Validation Accuracy: 0.89
New best model saved with accuracy: 0.89


Epoch 4: 100%|██████████| 200/200 [02:50<00:00,  1.18it/s]


Epoch 4 - Validation Accuracy: 0.905
New best model saved with accuracy: 0.905
CPU times: user 6min 22s, sys: 5min 27s, total: 11min 49s
Wall time: 12min 13s


In [3]:
# Load the best model for evaluation
model.load_state_dict(torch.load("best_model.pth"))

# Evaluate the model on the test set
evaluate_model(model, test_loader, device)

Test Accuracy: 0.915
Confusion Matrix:
[[91  9]
 [ 8 92]]
Classification Report:
              precision    recall  f1-score   support

    Negative       0.92      0.91      0.91       100
    Positive       0.91      0.92      0.92       100

    accuracy                           0.92       200
   macro avg       0.92      0.92      0.91       200
weighted avg       0.92      0.92      0.91       200

