**Setup: Install Dependencies**

In [None]:
%pip install transformers datasets accelerate
%pip install evaluate
%pip install -U datasets peft
%pip install scikit-learn

**Load your dataset**

In [None]:
from datasets import load_dataset

dataset = load_dataset("mteb/amazon_massive_intent", "en")
train_ds = dataset["train"]
val_ds   = dataset["validation"]
test_ds  = dataset["test"]

**Inspect labels & build mappings**

In [None]:
# labels are strings; build stable id maps
labels = sorted(set(train_ds["label"]))
label2id = {l:i for i,l in enumerate(labels)}
id2label = {i:l for l,i in label2id.items()}

num_labels = len(labels)
print("num_labels =", num_labels)

**Tokenizer + preprocessing**

In [None]:
from transformers import AutoTokenizer

model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

MAX_LEN = 128

def preprocess(batch):
    enc = tokenizer(batch["text"], truncation=True, padding="max_length", max_length=MAX_LEN)
    enc["labels"] = [label2id[l] for l in batch["label"]]
    return enc

train_ds = train_ds.map(preprocess, batched=True, remove_columns=train_ds.column_names)
val_ds   = val_ds.map(preprocess,   batched=True, remove_columns=val_ds.column_names)
test_ds  = test_ds.map(preprocess,  batched=True, remove_columns=test_ds.column_names)

train_ds.set_format(type="torch")
val_ds.set_format(type="torch")
test_ds.set_format(type="torch")

**Load base model + attach LoRA**

In [None]:
from transformers import AutoModelForSequenceClassification
from peft import LoraConfig, get_peft_model

base_model = AutoModelForSequenceClassification.from_pretrained(
    model_name, num_labels=num_labels, id2label=id2label, label2id=label2id
)

# LoRA config (works well for DistilBERT)
lora_cfg = LoraConfig(
    r=8,                 # rank
    lora_alpha=16,       # scaling
    lora_dropout=0.05,   # regularization
    bias="none",
    target_modules=["q_lin","v_lin"],  # LoRA in attention proj layers
    task_type="SEQ_CLS"
)

model = get_peft_model(base_model, lora_cfg)
model.print_trainable_parameters()   # sanity check: <1% should be trainable


**Training setup (Trainer)**

In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding
import evaluate
import numpy as np

acc = evaluate.load("accuracy")
f1  = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": acc.compute(predictions=preds, references=labels)["accuracy"],
        "f1_macro": f1.compute(predictions=preds, references=labels, average="macro")["f1"]
    }

args = TrainingArguments(
    output_dir="./results_intent_lora",
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    learning_rate=2e-4,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=10,
    weight_decay=0.01,
    lr_scheduler_type="linear",
    warmup_ratio=0.05,
    load_best_model_at_end=True,
    metric_for_best_model="f1_macro",
    report_to="none"
)

collator = DataCollatorWithPadding(tokenizer)

**Early stopping the model training**

In [None]:
from transformers import EarlyStoppingCallback
callbacks = [EarlyStoppingCallback(early_stopping_patience=2)]

In [None]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    data_collator=collator,
    compute_metrics=compute_metrics,
    callbacks=callbacks
)

trainer.train()

**Evaluate on test split**

In [None]:
metrics = trainer.evaluate(test_ds)
print(metrics)

**Save the LoRA adapter**

In [None]:
save_path = f"./{model_name}-finetuned-intent"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

**Reload for inference**

In [None]:
from peft import PeftModel
from transformers import pipeline, AutoModelForSequenceClassification

base = AutoModelForSequenceClassification.from_pretrained(
    model_name, num_labels=num_labels, id2label=id2label, label2id=label2id
)
lora_loaded = PeftModel.from_pretrained(base, save_path)

clf = pipeline("text-classification", model=lora_loaded, tokenizer=tokenizer, top_k=1)

print(clf("please cancel my alarm"))