### This tutorial walks through the basic steps of fine-tuning a pretrained BERT classification model.

In [None]:
!pip install transformers

In [None]:
# Import necessary packages

import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.optim import AdamW
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import random
import itertools
from transformers import BertTokenizer, BertModel, get_linear_schedule_with_warmup

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Setting hyperparamters!

In [None]:
# Set hyperparameters

MODEL_NAME = 'bert-base-uncased'
BATCH_SIZE = 16
LEARNING_RATE = 2e-5
MAX_LENGTH = 300
NUM_EPOCHS = 4
TRAINING_DATA_SIZE = 5000

### Loading and preparing the training data we will be using to fine-tune.

In [None]:
# Load a smaller subset of the IMDB dataset
df = pd.read_csv("https://raw.githubusercontent.com/ClaudiaECarroll/Teaching_Materials/main/IMDB_Dataset.csv")

df.info()

In [None]:
#Take a look at the data to see how it is structured.
df = df.sample(n=TRAINING_DATA_SIZE, random_state=42).reset_index(drop=True)
df.head()

In [None]:
#Take a look at our random sample
df["sentiment"].value_counts()

In [None]:
#Extract out the texts to a list for later

texts = df['review'].tolist()

In [None]:
# Create a list that tracks the postive and negative labels in numeric form.

designation_numeric = []

#df['designation']

for x in df['sentiment']:
    if x == 'positive':
        designation_numeric.append(1)
    elif x == 'negative':
        designation_numeric.append(0)
    else:
        continue


In [None]:
#Convert the numeric labels to a TensorFlow object

labels = torch.tensor(designation_numeric)

In [None]:
# Convert the text labels to numeric in the dataframe to keep track.

label_map = {'positive': 1, 'negative': 0}
df['label'] = df['sentiment'].map(label_map)

### Preparing the training data for processing by the model.

In [None]:
#Setting up the train/text split

train_texts, val_texts, train_labels, val_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)

In [None]:
# Calling the tokenizer for Bert from Hugging Face Transformers

tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)

In [None]:
# Creating the encodings (tokens) to represent our texts numerically for processing by the model.

train_encodings = tokenizer(
    list(train_texts),
    truncation=True,
    padding='max_length',
    max_length=MAX_LENGTH,
    return_tensors='pt'
)


train_dataset = TensorDataset(train_encodings['input_ids'], train_encodings['attention_mask'], train_labels)



In [None]:
# Doing the same as above for the test set

val_encodings = tokenizer(
    list(val_texts),
    truncation=True,
    padding='max_length',
    max_length=MAX_LENGTH,
    return_tensors='pt'
)


val_dataset = TensorDataset(val_encodings['input_ids'], val_encodings['attention_mask'], val_labels)

In [None]:
# Let's take a look at what the encodings look like

train_dataset[]

In [None]:
# Calling the DataLoader method fro Hugging Face transformers to manage our data being passed to the model

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

## Loading the Pre-Trained model!

In [None]:
# Load model from Hugging Face

model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2).to(device)


In [None]:
# Setting the optimizer, and learning rate scheduler setting to optimize training (math stuff)

optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)
total_steps = len(train_dataloader) * NUM_EPOCHS
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)



In [None]:
# Set loss function to track accuracy loss during training

loss_fn = nn.CrossEntropyLoss()

In [None]:
# Before we actually start fine-lets check if the GPU is available (not, you need to select T4 GPU by going to Runtime --> Change Runtime Type)

print("CUDA available:", torch.cuda.is_available())
print("Device:", torch.device("cuda" if torch.cuda.is_available() else "cpu"))
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

### Actually doing the fine-tuning!

In [None]:
for epoch in range(NUM_EPOCHS):
    print(f"Epoch {epoch + 1}/{NUM_EPOCHS}")

    # TRAINING
    model.train()
    for batch in train_dataloader:
        optimizer.zero_grad()
        input_ids = batch[0].to(device)
        attention_mask = batch[1].to(device)
        labels = batch[2].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        loss = nn.CrossEntropyLoss()(logits, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()

    # EVALUATION
    model.eval()
    predictions = []
    actual_labels = []

    with torch.no_grad():
        for batch in val_dataloader:
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            labels = batch[2].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            _, preds = torch.max(logits, dim=1)

            predictions.extend(preds.cpu().tolist())
            actual_labels.extend(labels.cpu().tolist())

    accuracy = accuracy_score(actual_labels, predictions)
    report = classification_report(actual_labels, predictions)

    print(f"Validation Accuracy: {accuracy:.4f}")
    print(report)


#### And now we wait...


### Applying the Fine-Tuned Model

In [None]:
def predict_sentiment(text, model, tokenizer, device, max_length=300):
    model.eval()
    encoding = tokenizer(text, return_tensors='pt', max_length=max_length, padding='max_length', truncation=True)
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        _, preds = torch.max(outputs.logits, dim=1)  # <-- fixed here
    return "positive" if preds.item() == 1 else "negative"


In [None]:
test_text = "This was a terrible movie!."
sentiment = predict_sentiment(test_text, model, tokenizer, device)
print(test_text)
print(f"Predicted sentiment: {sentiment}")

## Hyperparameter Optimization

In [None]:
# Define hyperparameter grid

learning_rates = [2e-5, 3e-5]
batch_sizes = [16, 32]
epochs = [2]  # keep small for demo

In [None]:
# Cartesian product of all combinations
param_grid = list(itertools.product(learning_rates, batch_sizes, epochs))


In [None]:
best_accuracy = 0
best_params = None

for i, (lr, bs, num_epochs) in enumerate(param_grid):
    print(f"\nTrial {i+1}/{len(param_grid)} — LR={lr}, Batch Size={bs}, Epochs={num_epochs}")

    # Re-initialize model
    model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2).to(device)

    # Dataloaders with current batch size
    train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=bs)

    optimizer = AdamW(model.parameters(), lr=lr)
    total_steps = len(train_loader) * num_epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

    # Training loop (simplified)
    model.train()
    for epoch in range(num_epochs):
        for batch in train_loader:
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            labels = batch[2].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = nn.CrossEntropyLoss()(outputs.logits, labels)
            loss.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

    # Evaluation
    model.eval()
    predictions = []
    actuals = []

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            labels = batch[2].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs.logits, dim=1)

            predictions.extend(preds.cpu().tolist())
            actuals.extend(labels.cpu().tolist())

    acc = accuracy_score(actuals, predictions)
    print(f"Validation Accuracy: {acc:.4f}")

    if acc > best_accuracy:
        best_accuracy = acc
        best_params = (lr, bs, num_epochs)

print(f"\n🏆 Best Validation Accuracy: {best_accuracy:.4f} with params: LR={best_params[0]}, Batch Size={best_params[1]}, Epochs={best_params[2]}")
