Google Colab Setup

In [None]:
%pip install --upgrade \
    transformers \
    datasets \
    peft \
    accelerate \
    evaluate \
    fsspec \
    huggingface_hub

Collecting transformers
  Downloading transformers-4.52.4-py3-none-any.whl.metadata (38 kB)
Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting fsspec
  Downloading fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)
Collecting huggingface_hub
  Downloading huggingface_hub-0.32.4-py3-none-any.whl.metadata (14 kB)
Collecting fsspec
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux20

In [None]:
!rm -rf ~/.cache/huggingface/datasets
!rm -rf ~/.cache/huggingface/hub

LoRA finetune distilBert on GLUE-SST2 (`peft` version)

In [None]:
import os
import numpy as np
import evaluate
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
)
from peft import LoraConfig, get_peft_model, TaskType

# Load dataset (SST-2)
dataset = load_dataset("glue", "sst2")
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def preprocess_function(examples):
    return tokenizer(examples['sentence'], truncation=True)

dataset = dataset.map(preprocess_function, batched=True)
dataset = dataset.rename_column("label", "labels")
dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

train_dataset = dataset["train"]
eval_dataset = dataset["validation"]

# Load base model and apply LoRA
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)

peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=16,
    target_modules=["q_lin", "v_lin"],
    lora_dropout=0.1,
    bias="none",
    modules_to_save=[]
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

# Define evaluation metric
metric = evaluate.load("glue", "sst2")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    learning_rate=2e-4,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=100,
    eval_strategy="no",
    save_strategy="no",
    report_to="none"
)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Train
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    data_collator=data_collator
)

trainer.train()

# Evaluate
eval_results = trainer.evaluate()
print("Eval results:", eval_results)

# Save model
if not os.path.exists("./model"):
    os.makedirs("./model")

save_path = os.path.join("./model", "lora_distilbert_sst2")
trainer.save_model(save_path)
print(f"[\u2713] Training complete. Model saved to {save_path}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 739,586 || all params: 67,694,596 || trainable%: 1.0925


Downloading builder script:   0%|          | 0.00/5.75k [00:00<?, ?B/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
100,0.4784
200,0.3501
300,0.3017
400,0.3293
500,0.3396
600,0.3405
700,0.288
800,0.2944
900,0.3338
1000,0.2922


Eval results: {'eval_loss': 0.3038165271282196, 'eval_accuracy': 0.8956422018348624, 'eval_runtime': 1.3143, 'eval_samples_per_second': 663.452, 'eval_steps_per_second': 41.846, 'epoch': 3.0}
[✓] Training complete. Model saved to ./model/lora_distilbert_sst2


Predict

In [None]:
from peft import PeftModel, PeftConfig

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

peft_config = PeftConfig.from_pretrained("./model/lora_distilbert_sst2")
base_model = AutoModelForSequenceClassification.from_pretrained(peft_config.base_model_name_or_path, num_labels=2)
model = PeftModel.from_pretrained(base_model, "./model/lora_distilbert_sst2")
model.eval()

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): DistilBertForSequenceClassification(
      (distilbert): DistilBertModel(
        (embeddings): Embeddings(
          (word_embeddings): Embedding(30522, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (transformer): Transformer(
          (layer): ModuleList(
            (0-5): 6 x TransformerBlock(
              (attention): DistilBertSdpaAttention(
                (dropout): Dropout(p=0.1, inplace=False)
                (q_lin): lora.Linear(
                  (base_layer): Linear(in_features=768, out_features=768, bias=True)
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.1, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=76

In [None]:
import torch

sentence = "Having chicken for dinner is not bad."
inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
    outputs = model(**inputs)
    prediction = torch.argmax(outputs.logits, dim=1).item()

label_map = {0: "Negative", 1: "Positive"}
print(f"Pred label: {label_map[prediction]}")

Pred label: Positive


Download

In [None]:
!zip -r lora_distilbert_sst2 /content/model/lora_distilbert_sst2

  adding: content/model/lora_distilbert_sst2/ (stored 0%)
  adding: content/model/lora_distilbert_sst2/tokenizer.json (deflated 71%)
  adding: content/model/lora_distilbert_sst2/training_args.bin (deflated 52%)
  adding: content/model/lora_distilbert_sst2/adapter_model.safetensors (deflated 7%)
  adding: content/model/lora_distilbert_sst2/adapter_config.json (deflated 55%)
  adding: content/model/lora_distilbert_sst2/vocab.txt (deflated 53%)
  adding: content/model/lora_distilbert_sst2/tokenizer_config.json (deflated 75%)
  adding: content/model/lora_distilbert_sst2/special_tokens_map.json (deflated 42%)
  adding: content/model/lora_distilbert_sst2/README.md (deflated 66%)


In [None]:
from google.colab import files
files.download('lora_distilbert_sst2.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>