In [1]:
# Setup
%pip install -q transformers datasets accelerate peft evaluate scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Load Dataset

from datasets import load_dataset

dataset = load_dataset("ag_news")
print(dataset)

# Define label names
label_names = ["World", "Sports", "Business", "Sci/Tech"]

# Shuffle the training dataset
dataset["train"] = dataset["train"].shuffle()

# Show a few samples with readable labels
for i in range(5):
    text = dataset["train"][i]["text"]
    label_id = dataset["train"][i]["label"]
    label_name = label_names[label_id]
    print(f"Label: {label_name}\nText: {text}\n")

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 120000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 7600
    })
})
Label: Business
Text: Petrol prices drive up inflation A 2p increase in the average cost of petrol, as a result of surging crude oil prices, helped push UK inflation higher in October.

Label: Business
Text: Stocks tumble as oil surges again The Dow Jones industrial average fell 108 points to about 97-hundred-58. Decliners on the New York Stock Exchange outnumbered advancers by a five-to-three margin.

Label: Business
Text: Glowing jobs report boosts stocks US stocks rose as a government report showed employers added almost twice the number of workers forecast in October.  quot;The report was a reconfirmation of the solid economic growth that we #39;ve been experiencing 

Label: Sci/Tech
Text: Landing Site for Huygens On January 14, 2005, the Huygens probe will try to descend to the surface of S

In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Choose a pretrained base model
model_name = "distilbert-base-uncased"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model with classification head (4 labels for AG News)
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=4
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
# Tokenize Dataset
def tokenize_function(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=128)

tokenized_dataset = dataset.map(tokenize_function, batched=True)
tokenized_dataset = tokenized_dataset.remove_columns(["text"]) # Remove text column as we no longer need it after tokenization.
tokenized_dataset.set_format("torch")

Map: 100%|██████████| 120000/120000 [00:04<00:00, 26348.01 examples/s]
Map: 100%|██████████| 7600/7600 [00:00<00:00, 33376.20 examples/s]


In [9]:
# Evaluate Base Model

from transformers import Trainer, TrainingArguments
import evaluate
import torch

metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    return metric.compute(predictions=predictions, references=labels)

# Helper function for evaluation
def evaluate_model(trainer, name="Model"):
    perf = trainer.evaluate()
    acc = perf["eval_accuracy"] * 100
    print(f"{name} Performance: {perf}")
    print(f"{name} Accuracy: {acc:.1f}%\n")
    return perf

training_args = TrainingArguments(
    output_dir="./results_base",
    per_device_eval_batch_size=16,
    do_train=False,
    do_eval=True,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    eval_dataset=tokenized_dataset["test"],
    compute_metrics=compute_metrics
)

base_performance = evaluate_model(trainer, "Base Model")



Base Model Performance: {'eval_loss': 1.3891226053237915, 'eval_model_preparation_time': 0.0006, 'eval_accuracy': 0.2797368421052632, 'eval_runtime': 20.3597, 'eval_samples_per_second': 373.287, 'eval_steps_per_second': 23.33}
Base Model Accuracy: 28.0%



In [10]:
# Apply LoRA

from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_lin","v_lin"],
    lora_dropout=0.1,
    bias="none",
    task_type="SEQ_CLS"
)

lora_model = get_peft_model(model, lora_config)
lora_model.print_trainable_parameters()


trainable params: 741,124 || all params: 67,697,672 || trainable%: 1.0948


In [11]:
# Fine-Tune LoRA Model

from transformers import Trainer, TrainingArguments

# Training arguments
training_args = TrainingArguments(
    output_dir="./results_lora",
    num_train_epochs=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-4,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=50,
    load_best_model_at_end=True,
    save_total_limit=2,
    report_to="none"
)


trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    compute_metrics=compute_metrics
)

trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy
1,0.1898,0.210463,0.929605
2,0.1359,0.188916,0.936447




TrainOutput(global_step=15000, training_loss=0.21429175491333008, metrics={'train_runtime': 1646.1318, 'train_samples_per_second': 145.796, 'train_steps_per_second': 9.112, 'total_flos': 8084931379200000.0, 'train_loss': 0.21429175491333008, 'epoch': 2.0})

In [12]:
# Evaluate LoRA Model
lora_performance = evaluate_model(trainer, "LoRA Model")

# Save LoRA PEFT model
saved_model_directory = "lora_ag_news"
lora_model.save_pretrained(saved_model_directory)
trainer.save_model("lora_full_model")

# Check to see what files were saved
import os
print(f"\nFiles saved in '{saved_model_directory}':")
print(os.listdir(saved_model_directory))

lora_acc = lora_performance["eval_accuracy"] * 100
base_acc = base_performance["eval_accuracy"] * 100

print(f"\n=== Results Summary (model dir: {saved_model_directory}) ===")
print(f"Base Accuracy: {base_acc:.1f}%")
print(f"LoRA Accuracy: {lora_acc:.1f}%")



LoRA Model Performance: {'eval_loss': 0.1889158934354782, 'eval_accuracy': 0.9364473684210526, 'eval_runtime': 20.5097, 'eval_samples_per_second': 370.556, 'eval_steps_per_second': 23.16, 'epoch': 2.0}
LoRA Model Accuracy: 93.6%


Files saved in 'lora_ag_news':
['adapter_model.safetensors', 'README.md', 'adapter_config.json']

=== Results Summary (model dir: lora_ag_news) ===
Base Accuracy: 28.0%
LoRA Accuracy: 93.6%


In [13]:
# Final Inference with Reloaded PEFT Model
from peft import AutoPeftModelForSequenceClassification
from transformers import AutoTokenizer
import torch, random

# Reload the saved PEFT model
loaded_peft_model = AutoPeftModelForSequenceClassification.from_pretrained(saved_model_directory, num_labels=4)

# Reload tokenizer (same as base model used during training)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

# Put model on correct device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
loaded_peft_model.to(device)
loaded_peft_model.eval()

print(f"✅ Saved PEFT model loaded successfully from {saved_model_directory}")

# Inference on random samples from the test set
num_samples_to_show = 6
num_test_samples = len(tokenized_dataset["test"])
num_samples_to_show = min(num_samples_to_show, num_test_samples)

random_indices = random.sample(range(num_test_samples), num_samples_to_show)
sample_texts = [dataset["test"][i]["text"] for i in random_indices]
ground_truth_labels = [dataset["test"][i]["label"] for i in random_indices]

inputs = tokenizer(sample_texts, padding=True, truncation=True, return_tensors="pt", max_length=128)
inputs = {name: tensor.to(device) for name, tensor in inputs.items()}

with torch.no_grad():
    logits = loaded_peft_model(**inputs).logits

predictions = torch.argmax(logits, dim=-1)

print("\n🔎 Inference Results:")
for i, (text, pred, ground_truth_id) in enumerate(zip(sample_texts, predictions, ground_truth_labels)):
    predicted_label_name = label_names[pred]
    ground_truth_label_name = label_names[ground_truth_id]
    is_correct = "✅ Correct" if pred.item() == ground_truth_id else "❌ Incorrect"
    print(f"{i+1}. Text: \"{text}\"")
    print(f"   Predicted: {predicted_label_name:<10} | Ground Truth: {ground_truth_label_name:<10} | {is_correct}\n")


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Saved PEFT model loaded successfully from lora_ag_news

🔎 Inference Results:
1. Text: "Photos Plus Music Equals an Expensive iPod (washingtonpost.com) washingtonpost.com - First Apple put some color on the iPod, when it offered the iPod mini in a palette of pastel hues, and now it has put some color inside it, in the form of the new iPod Photo."
   Predicted: Sci/Tech   | Ground Truth: Sci/Tech   | ✅ Correct

2. Text: "Bryant Makes First Appearance at Trial (AP) AP - NBA star Kobe Bryant arrived at his sexual assault trial Monday as attorneys in the case who spent the weekend poring over questionnaires prepared to question potential jurors individually."
   Predicted: Sports     | Ground Truth: Sci/Tech   | ❌ Incorrect

3. Text: "Owners Seek Best Ballpark Deal for Expos (AP) AP - Trying to get the best possible ballpark deal for the Montreal Expos, major league baseball instructed its lawyers to press ahead with negotiations involving four of the areas bidding for the team."
   Predi