In [None]:
# ============================================
# Module 9: Pretrained Models & Transfer Learning
# Lab 1 – Fine-Tune BERT for Sentiment Analysis
# ============================================
# Author: Dr. Dasha Trofimova
# Course: M.Sc. Applied Data Science & AI
# --------------------------------------------
# Learning Goals:
# - Understand transfer learning with pretrained language models
# - Fine-tune BERT for binary text classification (sentiment)
# - Evaluate performance using accuracy and F1
# - Apply the model for real-world text inference
# --------------------------------------------
# Lab Objectives:
# 1. Explore the IMDB dataset and its label mapping
# 2. Tokenize text for BERT input format
# 3. Configure and run Hugging Face Trainer for fine-tuning
# 4. Evaluate metrics on test data
# 5. Perform inference on custom examples
# ============================================

# Good to have: Can you try to freeze the backbone of BERT and only fine-tune lightweight classifier?
!pip install datasets transformers accelerate evaluate torch --quiet


In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
import evaluate
import numpy as np
import torch

model_name = "bert-base-uncased"
num_labels = 2  # pos / neg


In [None]:
dataset = load_dataset("imdb")
dataset
# optionally fownsample
small_train = dataset["train"].shuffle(seed=42).select(range(2000))
small_test = dataset["test"].shuffle(seed=42).select(range(1000))

small_dataset = {
    "train": small_train,
    "test": small_test
}


In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess(batch):
    return tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=256,
    )

encoded_train = small_dataset["train"].map(preprocess, batched=True)
encoded_test = small_dataset["test"].map(preprocess, batched=True)

encoded_train.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "label"]
)
encoded_test.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "label"]
)


In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels
)


In [None]:
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1": f1.compute(predictions=preds, references=labels, average="weighted")["f1"]
    }


In [None]:
training_args = TrainingArguments(
    output_dir="./bert-sentiment-checkpoints",
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=20,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    learning_rate=2e-5,
    weight_decay=0.01,
    load_best_model_at_end=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_train,
    eval_dataset=encoded_test,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()


In [None]:
metrics = trainer.evaluate()
metrics


In [None]:
def predict_sentiment(text):
    tokens = tokenizer(
        text,
        truncation=True,
        padding="max_length",
        max_length=256,
        return_tensors="pt"
    )

    # move inputs to GPU if available
    if torch.cuda.is_available():
        tokens = {k: v.to("cuda") for k, v in tokens.items()}

    model.eval()
    with torch.no_grad():
        output = model(**tokens)
        pred_id = torch.argmax(output.logits, dim=1).item()

    return "positive" if pred_id == 1 else "negative"

print(predict_sentiment("This movie was absolutely amazing."))
print(predict_sentiment("This was a boring waste of time."))

