In [None]:
# INSTALL DEPENDENCIES
# ============================================================
!pip install -q transformers datasets torch scikit-learn evaluate accelerate peft sentence-transformers

from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments
)
from peft import LoraConfig, get_peft_model
import torch
import evaluate
import numpy as np
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# LOAD & PREPROCESS DATASET
# ============================================================
print("Loading dataset...")
dataset = load_dataset("potsawee/wiki_bio_gpt3_hallucination")
dataset = dataset["evaluation"]

def preprocess_labels(example):
    label_map = {
        "accurate": "No Hallucination",
        "minor_inaccurate": "Partial Hallucination",
        "major_inaccurate": "Hallucinating"
    }
    example["label_text"] = label_map[example["annotation"][0]]

    num_label_map = {"No Hallucination": 0, "Partial Hallucination": 1, "Hallucinating": 2}
    example["label"] = num_label_map[example["label_text"]]

    example["generated_text"] = " ".join(example["gpt3_sentences"])
    example["reference_text"] = example["wiki_bio_text"]
    return example

dataset = dataset.map(preprocess_labels)
dataset = dataset.remove_columns([
    "annotation", "gpt3_sentences", "wiki_bio_test_idx", "gpt3_text_samples"
])

dataset = dataset.train_test_split(test_size=0.2, seed=42)
print(dataset)



Loading dataset...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

data/evaluation-00000-of-00001-e91191b8f(…):   0%|          | 0.00/2.56M [00:00<?, ?B/s]

Generating evaluation split:   0%|          | 0/238 [00:00<?, ? examples/s]

Map:   0%|          | 0/238 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['gpt3_text', 'wiki_bio_text', 'label_text', 'label', 'generated_text', 'reference_text'],
        num_rows: 190
    })
    test: Dataset({
        features: ['gpt3_text', 'wiki_bio_text', 'label_text', 'label', 'generated_text', 'reference_text'],
        num_rows: 48
    })
})


In [None]:
# LOAD MODEL + TOKENIZER (PRE-TRAINED)
# ============================================================
model_name = "microsoft/deberta-v3-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)

base_model = AutoModelForSequenceClassification.from_pretrained(
    model_name, num_labels=3
)



Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# APPLY LoRA (instead of full fine-tuning)
# ============================================================
lora_config = LoraConfig(
    r=8,                  # Rank of LoRA matrices
    lora_alpha=16,        # Scaling
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_CLS"
)

model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()   # Shows LoRA trained params


trainable params: 149,763 || all params: 142,046,982 || trainable%: 0.1054


In [None]:
# TOKENIZE DATA
# ============================================================
def tokenize_function(examples):
    return tokenizer(
        examples["generated_text"],
        examples["reference_text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )

tokenized_dataset = dataset.map(tokenize_function, batched=True)
tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

Map:   0%|          | 0/190 [00:00<?, ? examples/s]

Map:   0%|          | 0/48 [00:00<?, ? examples/s]

In [None]:
# METRICS
# ============================================================
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(-1)
    return {
        "accuracy": accuracy_metric.compute(predictions=preds, references=labels)["accuracy"],
        "f1_macro": f1_metric.compute(predictions=preds, references=labels, average="macro")["f1"],
    }




Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

In [None]:
# TRAINING SETUP
# ============================================================
training_args = TrainingArguments(
    output_dir="./deberta-lora-hallucination",
    eval_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=8,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=2e-4,          # LoRA uses slightly higher LR
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    logging_dir="./logs",
    report_to="none"
)



In [None]:
# TRAINER
# ============================================================
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

print("\nStarting LoRA Fine-Tuning...\n")
trainer.train()


  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.



Starting LoRA Fine-Tuning...





Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro
1,No log,0.960409,0.604167,0.251082
2,No log,0.941031,0.604167,0.251082
3,No log,0.936,0.604167,0.251082
4,No log,0.9397,0.604167,0.251082
5,No log,0.94127,0.604167,0.251082
6,No log,0.939137,0.604167,0.251082
7,No log,0.938402,0.604167,0.251082
8,No log,0.93895,0.604167,0.251082




TrainOutput(global_step=192, training_loss=1.0085180600484211, metrics={'train_runtime': 5858.6529, 'train_samples_per_second': 0.259, 'train_steps_per_second': 0.033, 'total_flos': 202060518359040.0, 'train_loss': 1.0085180600484211, 'epoch': 8.0})

In [None]:
# EVALUATION
# ============================================================
print("\nEvaluating model...")
eval_results = trainer.evaluate()
print(eval_results)



Evaluating model...




{'eval_loss': 0.9604091048240662, 'eval_accuracy': 0.6041666666666666, 'eval_f1_macro': 0.2510822510822511, 'eval_runtime': 54.809, 'eval_samples_per_second': 0.876, 'eval_steps_per_second': 0.109, 'epoch': 8.0}


In [None]:
# SAVE LoRA ADAPTERS
# ============================================================
save_path = "./deberta-lora-hallucination"
trainer.save_model(save_path)
tokenizer.save_pretrained(save_path)
print(f"\nLoRA adapters saved to: {save_path}")



LoRA adapters saved to: ./deberta-lora-hallucination


In [None]:
# INFERENCE
# ============================================================
def check_hallucination(generated_text, reference_text):
    inputs = tokenizer(
        generated_text,
        reference_text,
        truncation=True,
        padding=True,
        max_length=512,
        return_tensors="pt"
    )
    with torch.no_grad():
        logits = model(**inputs).logits
    pred = torch.argmax(logits, dim=-1).item()
    mapping = {
        0: "Not Hallucinating",
        1: "Minor Hallucination",
        2: "Hallucinating"
    }
    return mapping[pred]



In [None]:
# AUTOMATIC TEST EVALUATION
# ============================================================
preds, truth = [], []
for ex in dataset["test"]:
    inputs = tokenizer(
        ex["generated_text"], ex["reference_text"],
        truncation=True, padding=True, max_length=512,
        return_tensors="pt"
    )
    with torch.no_grad():
        logits = model(**inputs).logits
    p = torch.argmax(logits, dim=-1).item()
    preds.append(p)
    truth.append(ex["label"])

acc = accuracy_score(truth, preds)
print(f"\nFinal Accuracy: {acc*100:.2f}%\n")

print("\nClassification Report:")
print(classification_report(
    truth, preds,
    target_names=["No Hallucination", "Partial Hallucination", "Hallucinating"]
))

print("\nConfusion Matrix:")
print(confusion_matrix(truth, preds))


Final Accuracy: 60.42%


Classification Report:
                       precision    recall  f1-score   support

     No Hallucination       0.00      0.00      0.00        12
Partial Hallucination       0.60      1.00      0.75        29
        Hallucinating       0.00      0.00      0.00         7

             accuracy                           0.60        48
            macro avg       0.20      0.33      0.25        48
         weighted avg       0.37      0.60      0.46        48


Confusion Matrix:
[[ 0 12  0]
 [ 0 29  0]
 [ 0  7  0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
print("\n Sample Predictions:")
for i in range(3):
    print(f"\nExample {i+1}:")
    print("Generated:", dataset['test'][i]['generated_text'][:200], "...")
    print("Reference:", dataset['test'][i]['reference_text'][:200], "...")
    label_names = ["No Hallucination", "Partial Hallucination", "Hallucinating"]
    print("True Label:", label_names[dataset["test"][i]["label"]])
    print("Predicted Label:", label_names[preds[i]])


 Sample Predictions:

Example 1:
Generated: Jean Hugo (1894–1984) was a French painter, illustrator, engraver, and sculptor. He was born in Paris, the son of the writer Victor Hugo and his wife, Juliette Drouet. He was the grandson of the poet, ...
Reference: Jean Hugo (19 November 1894 - 21 June 1984) was a painter, illustrator, theatre designer, and author. He was born in Paris and died in his home at the Mas de Fourques, near Lunel, France. Brought up i ...
True Label: No Hallucination
Predicted Label: Partial Hallucination

Example 2:
Generated: Jeanine Riley (born Jeanine Marie Riley, October 13, 1940) is an American actress, singer, and dancer. She is best known for her roles as Billie Jo Bradley on the television series Petticoat Junction  ...
Reference: Jeanine Riley (born October 1, 1940 in Madera, California, USA) is an American actress. Riley has appeared in guest roles on numerous television series ("Route 66", "The Man from U.N.C.L.E.", "The Wil ...
True Label: Partial Ha