# Transformer Model â€” DistilRoBERTa for ANLI R2 NLI

**Goal:** Fine-tune a transformer model (DistilRoBERTa) for 3-way classification.

**Workflow:**
1. Load ANLI R2
2. Tokenize text
3. Train using HuggingFace Trainer
4. Evaluate (accuracy, macro F1, confusion matrix)
5. Error analysis
6. Save model

In [1]:
!pip install transformers datasets scikit-learn


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


## 1. Imports

In [2]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
import numpy as np
import torch

# Try to import local preprocessing utilities
try:
    from src.data_loading import load_anli_r2
    from src.preprocessing import get_tokenizer, tokenize_batch
except:
    # Fallback for Colab
    def load_anli_r2():
        ds = load_dataset("facebook/anli", "plain_text")
        return ds["train_r2"], ds["dev_r2"], ds["test_r2"]

    def get_tokenizer(model_name="distilroberta-base"):
        return AutoTokenizer.from_pretrained(model_name)

    def tokenize_batch(batch, tokenizer):
        return tokenizer(
            batch["premise"],
            batch["hypothesis"],
            truncation=True,
            padding="max_length",
            max_length=256
        )

  from .autonotebook import tqdm as notebook_tqdm


RuntimeError: Failed to import transformers.trainer because of the following error (look up to see its traceback):
Failed to import transformers.integrations.integration_utils because of the following error (look up to see its traceback):
Failed to import transformers.modeling_tf_utils because of the following error (look up to see its traceback):
Your currently installed version of Keras is Keras 3, but this is not yet supported in Transformers. Please install the backwards-compatible tf-keras package with `pip install tf-keras`.

## 2. Load Dataset

In [None]:
train, val, test = load_anli_r2()
train

## 3. Tokenization

In [None]:
tokenizer = get_tokenizer("distilroberta-base")

tokenized_train = train.map(lambda b: tokenize_batch(b, tokenizer), batched=True)
tokenized_val = val.map(lambda b: tokenize_batch(b, tokenizer), batched=True)
tokenized_test = test.map(lambda b: tokenize_batch(b, tokenizer), batched=True)

# HF Trainer expects labels column named "labels"
tokenized_train = tokenized_train.rename_column("label", "labels")
tokenized_val = tokenized_val.rename_column("label", "labels")
tokenized_test = tokenized_test.rename_column("label", "labels")

tokenized_train.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
tokenized_val.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
tokenized_test.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

## 4. Define Metrics

In [None]:
def compute_metrics(pred):
    logits, labels = pred
    preds = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="macro")
    return {"accuracy": acc, "macro_f1": f1}

## 5. Initialize Model (DistilRoBERTa)

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    "distilroberta-base",
    num_labels=3
)

## 6. TrainingArguments + Trainer

In [None]:
training_args = TrainingArguments(
    output_dir="./checkpoints_roberta",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="macro_f1",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=100
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

## 7. Train Model

In [None]:
trainer.train()

## 8. Evaluate on Test Set

In [None]:
test_results = trainer.evaluate(tokenized_test)
test_results

In [None]:
# Detailed evaluation
raw_preds = trainer.predict(tokenized_test)
pred_labels = np.argmax(raw_preds.predictions, axis=-1)
true_labels = raw_preds.label_ids

print(classification_report(true_labels, pred_labels, target_names=["entailment", "neutral", "contradiction"]))
print(confusion_matrix(true_labels, pred_labels))

## 9. Error Analysis (Misclassified Examples)

In [None]:
import pandas as pd

test_df = test.to_pandas()
test_df["true_label"] = true_labels
test_df["pred_label"] = pred_labels

errors = test_df[test_df["true_label"] != test_df["pred_label"]]
errors.head(10)

## 10. Save Model

In [None]:
trainer.save_model("roberta_anli_r2")
tokenizer.save_pretrained("roberta_anli_r2")
print("Saved roberta_anli_r2 model.")