<a href="https://colab.research.google.com/github/Jacob-Michael-Morris/Jacob-Michael-Morris/blob/main/Applying_Lightweight_Fine_Tuning_to_a_Foundation_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required libraries
!pip install transformers
!pip install peft
!pip install datasets
!pip install numpy
!pip install scikit-learn
!pip install pandas

# Import libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EvalPrediction, DataCollatorWithPadding
from datasets import load_dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
from peft import LoraConfig, TaskType, get_peft_model, AutoPeftModelForSequenceClassification

# Load pretrained model and tokenizer
model_name = "distilbert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=4)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load and preprocess dataset
dataset = load_dataset("ag_news")
splits = ["train", "test"]
tokenized_dataset = {split: dataset[split].map(lambda x: tokenizer(x["text"], truncation=True, padding=True), batched=True) for split in splits}

# Define evaluation metrics
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}

# Evaluate the pretrained model
trainer = Trainer(
    model=model,
    args=TrainingArguments(
        output_dir="./data/sentiment_analysis_base",
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        num_train_epochs=2,
        weight_decay=0.01,
        load_best_model_at_end=True,
    ),
    train_dataset=tokenized_dataset["train"].shuffle(seed=42).select(range(250)),
    eval_dataset=tokenized_dataset["test"].shuffle(seed=42).select(range(250)),
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

base_model_evaluation = trainer.evaluate()

[31mERROR: Operation cancelled by user[0m[31m
[0mCollecting peft
  Downloading peft-0.13.0-py3-none-any.whl.metadata (13 kB)
Downloading peft-0.13.0-py3-none-any.whl (322 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.5/322.5 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: peft
Successfully installed peft-0.13.0
Collecting datasets
  Downloading datasets-3.0.1-py3-none-any.whl.metadata (20 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.1-py3-none-any.whl (471 kB)
[2K  

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



README.md:   0%|          | 0.00/8.07k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]



In [None]:
# Perform Lightweight Fine-Tuning with LoRA
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=[
        "distilbert.transformer.layer.0.attention.q_lin",
        "distilbert.transformer.layer.0.attention.k_lin",
        "distilbert.transformer.layer.0.attention.v_lin",
        "distilbert.transformer.layer.1.attention.q_lin",
        "distilbert.transformer.layer.1.attention.k_lin",
        "distilbert.transformer.layer.1.attention.v_lin",
        "distilbert.transformer.layer.2.attention.q_lin",
        "distilbert.transformer.layer.2.attention.k_lin",
        "distilbert.transformer.layer.2.attention.v_lin",
        "distilbert.transformer.layer.3.attention.q_lin",
        "distilbert.transformer.layer.3.attention.k_lin",
        "distilbert.transformer.layer.3.attention.v_lin",
        "distilbert.transformer.layer.4.attention.q_lin",
        "distilbert.transformer.layer.4.attention.k_lin",
        "distilbert.transformer.layer.4.attention.v_lin",
        "distilbert.transformer.layer.5.attention.q_lin",
        "distilbert.transformer.layer.5.attention.k_lin",
        "distilbert.transformer.layer.5.attention.v_lin",
    ],
)

peft_model = get_peft_model(model, peft_config)

trainer_peft = Trainer(
    model=peft_model,
    args=TrainingArguments(
        output_dir="./data/sentiment_analysis_peft",
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        num_train_epochs=4,
        weight_decay=0.01,
        load_best_model_at_end=True,
    ),
    train_dataset=tokenized_dataset["train"].shuffle(seed=42).select(range(250)),
    eval_dataset=tokenized_dataset["test"].shuffle(seed=42).select(range(250)),
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

trainer_peft.train()
peft_model.save_pretrained("data/news_peft")

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,1.373945,0.264
2,No log,1.353672,0.324
3,No log,1.34278,0.352
4,No log,1.338725,0.368


In [None]:
# Load the saved PEFT model
inference_model = AutoPeftModelForSequenceClassification.from_pretrained(
    "data/news_peft",
    num_labels=4
)
inference_model.config.pad_token_id = inference_model.config.eos_token_id

# Evaluate the fine-tuned model
trainer_peft.model = inference_model
peft_evaluation_results = trainer_peft.evaluate()

# Compare results
print(f"Original Model Performance: {base_model_evaluation}")
print(f"PEFT Model Performance: {peft_evaluation_results}")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Original Model Performance: {'eval_loss': 1.3981856107711792, 'eval_accuracy': 0.268, 'eval_runtime': 135.8008, 'eval_samples_per_second': 1.841, 'eval_steps_per_second': 0.118}
PEFT Model Performance: {'eval_loss': 1.3387246131896973, 'eval_accuracy': 0.368, 'eval_runtime': 115.2812, 'eval_samples_per_second': 2.169, 'eval_steps_per_second': 0.139, 'epoch': 4.0}
