# Set up

In [3]:
from datasets import load_dataset, ClassLabel
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
)
import numpy as np
import evaluate
import os
from datetime import datetime

ds = load_dataset("heegyu/news-category-dataset")

def join_cols(batch):
    head = batch.get("headline", "") or ""
    desc = batch.get("short_description", "") or ""
    batch["text"] = (head + " " + desc).strip()
    return batch

ds = ds.map(join_cols)

label_names = sorted(list(set(ds["train"]["category"])))
label2id = {lab: i for i, lab in enumerate(label_names)}
id2label = {i: lab for lab, i in label2id.items()}

def encode_label(batch):
    batch["labels"] = label2id[batch["category"]]
    return batch

ds = ds.map(encode_label)
ds = ds.cast_column("labels", ClassLabel(names=label_names))

tmp = ds["train"].train_test_split(test_size=0.2, seed=42, stratify_by_column="labels")
valtest = tmp["test"].train_test_split(test_size=0.5, seed=42, stratify_by_column="labels")

train_subset = tmp["train"].select(range(20000))
val_subset = valtest["train"].select(range(2000))
test_subset = valtest["test"].select(range(2000))

full_train_subset = tmp["train"]
full_val_subset = valtest["train"]
full_test_subset = valtest["test"]

checkpoint = "distilbert-base-uncased"
tok = AutoTokenizer.from_pretrained(checkpoint)

def tok_fn(batch):
    return tok(batch["text"], truncation=True, max_length=256)

def tokenize_split(dset):
    keep = {"text", "labels"}
    remove_cols = [c for c in dset.column_names if c not in keep]
    return dset.map(tok_fn, batched=True, remove_columns=remove_cols)

tok_train_subset = tokenize_split(train_subset)
tok_val_subset = tokenize_split(val_subset)
tok_test_subset = tokenize_split(test_subset)
tok_full_train = tokenize_split(full_train_subset)
tok_full_val = tokenize_split(full_val_subset)
tok_full_test = tokenize_split(full_test_subset)

accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1_micro": f1.compute(predictions=preds, references=labels, average="micro")["f1"],
        "f1_macro": f1.compute(predictions=preds, references=labels, average="macro")["f1"],
    }


Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

# Full Fine-tuning with DistilBERT_base_uncased using full dataset

In [20]:
# Model (generic, no prior news fine-tuning)
num_labels = len(label_names)
model = AutoModelForSequenceClassification.from_pretrained(
    checkpoint,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
)

# Training config
run_name = "huffpost-generic-distilbert-FULL"

args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="steps",
    logging_steps=200,
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=1,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="none",
)

collator = DataCollatorWithPadding(tok)

trainer = Trainer(
    model=model,
    args=args,
    tokenizer=tok,
    data_collator=collator,
    train_dataset=tok_full_train,
    eval_dataset=tok_full_val,
    compute_metrics=compute_metrics,
)

trainer.train()

# Evaluate on test
test_metrics = trainer.evaluate(tok_full_test)
print("Test metrics:", test_metrics)
# Save model
model.save_pretrained(run_name)
tok.save_pretrained(run_name)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1 Micro,F1 Macro
1,1.1409,1.071866,0.687921,0.687921,0.56949


Test metrics: {'eval_loss': 1.0572384595870972, 'eval_accuracy': 0.6932181549181502, 'eval_f1_micro': 0.6932181549181502, 'eval_f1_macro': 0.5674181930441067, 'eval_runtime': 46.2001, 'eval_samples_per_second': 453.528, 'eval_steps_per_second': 14.177, 'epoch': 1.0}


('huffpost-generic-distilbert-FULL\\tokenizer_config.json',
 'huffpost-generic-distilbert-FULL\\special_tokens_map.json',
 'huffpost-generic-distilbert-FULL\\vocab.txt',
 'huffpost-generic-distilbert-FULL\\added_tokens.json',
 'huffpost-generic-distilbert-FULL\\tokenizer.json')

# Prompt Tuning 100 tokens with DistilBERT_base_uncased using full dataset

In [29]:
from peft import PromptTuningConfig, get_peft_model

num_labels = len(label_names)
num_layers = 6
token_dim = 768

print("\n=== Training Prompt Tuning (100 tokens, FULL data) ===")

model = AutoModelForSequenceClassification.from_pretrained(
    checkpoint,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
)

prompt_cfg = PromptTuningConfig(
    task_type="SEQ_CLS",
    num_virtual_tokens=100,
    tokenizer_name_or_path=checkpoint,
    num_layers=num_layers,
    token_dim=token_dim,
    num_attention_heads=12,
)

model = get_peft_model(model, prompt_cfg)
model.print_trainable_parameters()

run_name = "huffpost-generic-distilbert-prompt100-FULL"
args = TrainingArguments(
    output_dir=run_name,
    run_name=run_name,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="steps",
    logging_steps=200,
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=1,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="none",
)

collator = DataCollatorWithPadding(tok)

trainer = Trainer(
    model=model,
    args=args,
    tokenizer=tok,
    data_collator=collator,
    train_dataset=tok_full_train,
    eval_dataset=tok_full_val,
    compute_metrics=compute_metrics,
)

trainer.train()

test_metrics = trainer.evaluate(tok_full_test)
print("Prompt tuning (100 tokens, FULL data) test metrics:", test_metrics)

model.save_pretrained(run_name)
tok.save_pretrained(run_name)



=== Training Prompt Tuning (100 tokens, FULL data) ===


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


trainable params: 699,690 || all params: 67,685,460 || trainable%: 1.0337


Epoch,Training Loss,Validation Loss,Accuracy,F1 Micro,F1 Macro
1,2.6588,2.542844,0.362001,0.362001,0.076463


Prompt tuning (100 tokens, FULL data) test metrics: {'eval_loss': 2.5396175384521484, 'eval_accuracy': 0.3636710733546509, 'eval_f1_micro': 0.3636710733546509, 'eval_f1_macro': 0.0767062106376314, 'eval_runtime': 95.4866, 'eval_samples_per_second': 219.434, 'eval_steps_per_second': 6.86, 'epoch': 1.0}


('huffpost-generic-distilbert-prompt100-FULL\\tokenizer_config.json',
 'huffpost-generic-distilbert-prompt100-FULL\\special_tokens_map.json',
 'huffpost-generic-distilbert-prompt100-FULL\\vocab.txt',
 'huffpost-generic-distilbert-prompt100-FULL\\added_tokens.json',
 'huffpost-generic-distilbert-prompt100-FULL\\tokenizer.json')

# Using another base model

In [23]:
from peft import LoraConfig, IA3Config, PromptTuningConfig, get_peft_model

# Common
base_model = "huffpost-generic-distilbert-FULL"
num_labels = len(label_names)

def make_trainer(model):
    args = TrainingArguments(
        output_dir="tmp",
        eval_strategy="epoch",
        save_strategy="epoch",
        logging_strategy="steps",
        logging_steps=200,
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=32,
        num_train_epochs=1,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        report_to="none",
    )
    collator = DataCollatorWithPadding(tok)
    return Trainer(
        model=model,
        args=args,
        tokenizer=tok,
        data_collator=collator,
        train_dataset=tok_train_subset,
        eval_dataset=tok_val_subset,
        compute_metrics=compute_metrics,
    )



## LoRA rank=2 using another base model

In [25]:
# LoRA
model = AutoModelForSequenceClassification.from_pretrained(
    base_model,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
)
lora_cfg = LoraConfig(
    r=2,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    target_modules=["q_lin", "k_lin", "v_lin", "out_lin"],
    task_type="SEQ_CLS",
)
model = get_peft_model(model, lora_cfg)
trainer = make_trainer(model)
trainer.train()
print("LoRA test:", trainer.evaluate(tok_test_subset))



  return Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1 Micro,F1 Macro
1,0.9701,1.049925,0.684,0.684,0.536678


LoRA test: {'eval_loss': 1.0978201627731323, 'eval_accuracy': 0.6795, 'eval_f1_micro': 0.6795, 'eval_f1_macro': 0.548948551407006, 'eval_runtime': 5.1008, 'eval_samples_per_second': 392.095, 'eval_steps_per_second': 12.351, 'epoch': 1.0}


## IA3 small using another base model

In [26]:
# IA3
model = AutoModelForSequenceClassification.from_pretrained(
    base_model,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
)
ia3_cfg = IA3Config(
    task_type="SEQ_CLS",
    target_modules=["q_lin", "v_lin"],
    feedforward_modules=[],
)
model = get_peft_model(model, ia3_cfg)
trainer = make_trainer(model)
trainer.train()
print("IA3 test:", trainer.evaluate(tok_test_subset))



  return Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1 Micro,F1 Macro
1,0.9631,1.049775,0.6845,0.6845,0.536798


IA3 test: {'eval_loss': 1.0973131656646729, 'eval_accuracy': 0.681, 'eval_f1_micro': 0.681, 'eval_f1_macro': 0.5497769557756874, 'eval_runtime': 4.6748, 'eval_samples_per_second': 427.829, 'eval_steps_per_second': 13.477, 'epoch': 1.0}


## Prompt Tuning 10 tokens with another base model

In [28]:
# Prompt Tuning
model = AutoModelForSequenceClassification.from_pretrained(
    base_model,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
)
prompt_cfg = PromptTuningConfig(
    task_type="SEQ_CLS",
    num_virtual_tokens=10,
    tokenizer_name_or_path=base_model,
    num_layers=6,
    token_dim=768,
    num_attention_heads=12,

)
model = get_peft_model(model, prompt_cfg)
trainer = make_trainer(model)
trainer.train()
print("Prompt Tuning test:", trainer.evaluate(tok_test_subset))


  return Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1 Micro,F1 Macro
1,1.1822,1.163325,0.6565,0.6565,0.500321


Prompt Tuning test: {'eval_loss': 1.2199472188949585, 'eval_accuracy': 0.6455, 'eval_f1_micro': 0.6455, 'eval_f1_macro': 0.5043429011990411, 'eval_runtime': 5.2205, 'eval_samples_per_second': 383.103, 'eval_steps_per_second': 12.068, 'epoch': 1.0}
