# Install & Import Libraries

In [None]:
# ✅ Run this cell in Colab to install necessary packages
!pip install transformers datasets scikit-learn --quiet


In [None]:
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertForSequenceClassification
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
import random
from torch.utils.data import DataLoader
from tqdm import tqdm

# ✅ Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


# Load SST-2 Dataset (via Hugging Face datasets)

In [None]:
# Load SST-2 (Stanford Sentiment Treebank)
dataset = load_dataset("glue", "sst2")

# Use pretrained tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Tokenize function
def tokenize_fn(example):
    return tokenizer(
        example["sentence"],
        padding="max_length",
        truncation=True,
        max_length=128,
    )

# Apply tokenization
tokenized = dataset.map(tokenize_fn, batched=True)
tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

# Train/Val Split & DataLoader

In [None]:
train_data = tokenized["train"]
val_data = tokenized["validation"]

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=64)


# Define LoRA Module & Inject into BERT Attention

In [None]:
class LoRALinear(nn.Module):
    def __init__(self, original_linear: nn.Linear, r: int = 4, alpha: int = 16):
        super().__init__()
        self.in_features = original_linear.in_features
        self.out_features = original_linear.out_features
        self.r = r
        self.alpha = alpha

        # Original frozen weight
        self.weight = original_linear.weight
        self.bias = original_linear.bias

        # LoRA adapters (A: down-projection, B: up-projection)
        self.A = nn.Parameter(torch.randn(r, self.in_features) * 0.01)
        self.B = nn.Parameter(torch.randn(self.out_features, r) * 0.01)

        # Scaling factor
        self.scaling = self.alpha / self.r

        # Freeze the original weight
        self.weight.requires_grad = False
        if self.bias is not None:
            self.bias.requires_grad = False

    def forward(self, x):
        # LoRA: W(x) + alpha/r * BA(x)
        lora_update = (x @ self.A.T) @ self.B.T
        return nn.functional.linear(x, self.weight) + self.scaling * lora_update


# Inject LoRA into BERT Attention Layers

In [None]:
def inject_lora_into_bert(model, r=4, alpha=16):
    for name, module in model.named_modules():
        if isinstance(module, nn.Linear) and ("attention.self.query" in name or "attention.self.value" in name):
            parent = get_parent_module(model, name)
            layer_name = name.split(".")[-1]
            setattr(parent, layer_name, LoRALinear(module, r=r, alpha=alpha))


In [None]:
def get_parent_module(model, module_name):
    components = module_name.split(".")
    for comp in components[:-1]:
        model = getattr(model, comp)
    return model


## Load Pretrained BERT & Inject LoRA

In [None]:
lora_config = {"r": 4, "alpha": 16}

# Load BERT-base for binary classification
model_lora = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
inject_lora_into_bert(model_lora, **lora_config)
model_lora.to(device)


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): LoRALinear()
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): LoRALinear()
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (dropout): Dropout(p=0.1, inplace=Fa

## Freeze All But LoRA Parameters

In [None]:
# Freeze all parameters
for param in model_lora.parameters():
    param.requires_grad = False

# Enable training only for LoRA adapters
for name, param in model_lora.named_parameters():
    if "A" in name or "B" in name:
        param.requires_grad = True


# Train LoRA-injected BERT on SST-2

In [None]:
def train(model, dataloader, optimizer, criterion):
    model.train()
    total_loss = 0
    for batch in tqdm(dataloader, desc="Training", leave=False):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    return total_loss / len(dataloader)


In [None]:
def evaluate(model, dataloader):
    model.eval()
    preds, labels = [], []
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels.extend(batch["label"].cpu().numpy())

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            pred = torch.argmax(logits, dim=-1).cpu().numpy()
            preds.extend(pred)

    acc = accuracy_score(labels, preds)
    report = classification_report(labels, preds, output_dict=True)
    return acc, report


In [None]:
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss

optimizer = AdamW(filter(lambda p: p.requires_grad, model_lora.parameters()), lr=1e-4)
criterion = CrossEntropyLoss()

epochs = 3
for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    train_loss = train(model_lora, train_loader, optimizer, criterion)
    val_acc, _ = evaluate(model_lora, val_loader)

    print(f"Train Loss: {train_loss:.4f} | Validation Accuracy: {val_acc:.4f}")



Epoch 1/3


  return forward_call(*args, **kwargs)


Train Loss: 0.3326 | Validation Accuracy: 0.8991

Epoch 2/3


  return forward_call(*args, **kwargs)


Train Loss: 0.2424 | Validation Accuracy: 0.9094

Epoch 3/3


  return forward_call(*args, **kwargs)


Train Loss: 0.2150 | Validation Accuracy: 0.9174


# Comparison — LoRA vs Full Fine-Tuning

In [24]:
def count_trainable(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

lora_params = count_trainable(model_lora)

print(f"LoRA Trainable Params: {lora_params:,}")

LoRA Trainable Params: 147,456


In [25]:
lora_acc, lora_report = evaluate(model_lora, val_loader)

print(f"\n📊 Final Accuracy:")
print(f"LoRA       : {lora_acc:.4f}")



📊 Final Accuracy:
LoRA       : 0.9174


In [26]:
from sklearn.metrics import classification_report

print("\n🧾 LoRA Classification Report:")
print(lora_report)


🧾 LoRA Classification Report:
{'0': {'precision': 0.8973214285714286, 'recall': 0.9392523364485982, 'f1-score': 0.9178082191780822, 'support': 428.0}, '1': {'precision': 0.9386792452830188, 'recall': 0.8963963963963963, 'f1-score': 0.9170506912442397, 'support': 444.0}, 'accuracy': 0.9174311926605505, 'macro avg': {'precision': 0.9180003369272237, 'recall': 0.9178243664224972, 'f1-score': 0.9174294552111609, 'support': 872.0}, 'weighted avg': {'precision': 0.9183797664383393, 'recall': 0.9174311926605505, 'f1-score': 0.9174225054136027, 'support': 872.0}}


# Inference on Real Sentences

In [28]:
def predict_sentiment(text, model, tokenizer):
    model.eval()
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)

    with torch.no_grad():
        logits = model(**inputs).logits
        probs = torch.softmax(logits, dim=-1)
        pred = torch.argmax(probs, dim=-1).item()
        confidence = probs[0, pred].item()

    label = "Positive" if pred == 1 else "Negative"
    return label, confidence


In [29]:
sentences = [
    "The movie was fantastic and thrilling!",
    "I wouldn't recommend it to anyone.",
    "It was okay, not great but not bad.",
    "This is one of the best performances I've seen.",
    "The film lacked a solid storyline.",
]

print("🧠 Inference Results\n")
for text in sentences:
    lora_label, lora_conf = predict_sentiment(text, model_lora, tokenizer)

    print(f"🔹 Sentence: {text}")
    print(f"   LoRA ➤ {lora_label} ({lora_conf:.2f})")


🧠 Inference Results

🔹 Sentence: The movie was fantastic and thrilling!
   LoRA ➤ Positive (0.98)
🔹 Sentence: I wouldn't recommend it to anyone.
   LoRA ➤ Negative (0.90)
🔹 Sentence: It was okay, not great but not bad.
   LoRA ➤ Positive (0.93)
🔹 Sentence: This is one of the best performances I've seen.
   LoRA ➤ Positive (0.98)
🔹 Sentence: The film lacked a solid storyline.
   LoRA ➤ Negative (0.98)
