# Starter Notebook

Install and import required libraries

In [1]:
!pip install transformers datasets evaluate accelerate peft trl bitsandbytes
!pip install nvidia-ml-py3

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting trl
  Downloading trl-0.16.1-py3-none-any.whl.metadata (12 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-non

In [2]:
import os
import pandas as pd
import torch
from transformers import RobertaModel, RobertaTokenizer, TrainingArguments, Trainer, DataCollatorWithPadding, RobertaForSequenceClassification
from peft import LoraConfig, get_peft_model, PeftModel
from datasets import load_dataset, Dataset, ClassLabel
import pickle

## Load Tokenizer and Preprocess Data

In [3]:
base_model = 'roberta-base'

dataset = load_dataset('ag_news', split='train')
tokenizer = RobertaTokenizer.from_pretrained(base_model)

def preprocess(examples):
    tokenized = tokenizer(examples['text'], truncation=True, padding=True)
    return tokenized

tokenized_dataset = dataset.map(preprocess, batched=True,  remove_columns=["text"])
tokenized_dataset = tokenized_dataset.rename_column("label", "labels")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/8.07k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

In [4]:
# Extract the number of classess and their names
num_labels = dataset.features['label'].num_classes
class_names = dataset.features["label"].names
print(f"number of labels: {num_labels}")
print(f"the labels: {class_names}")

# Create an id2label mapping
# We will need this for our classifier.
id2label = {i: label for i, label in enumerate(class_names)}

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")


number of labels: 4
the labels: ['World', 'Sports', 'Business', 'Sci/Tech']


## Load Pre-trained Model
Set up config for pretrained model and download it from hugging face

In [5]:
model = RobertaForSequenceClassification.from_pretrained(
    base_model,
    id2label=id2label)
model

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

## Anything from here on can be modified

In [6]:
# Split the original training set
split_datasets = tokenized_dataset.train_test_split(test_size=640, seed=42)
train_dataset = split_datasets['train']
eval_dataset = split_datasets['test']

## Setup LoRA Config
Setup PEFT config and get peft model for finetuning

In [7]:
# To track evaluation accuracy during training
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    # Calculate accuracy
    accuracy = accuracy_score(labels, preds)
    return {
        'accuracy': accuracy
    }

In [8]:
from torch.utils.data import DataLoader
import evaluate
from tqdm import tqdm

def evaluate_model(inference_model, dataset, labelled=True, batch_size=8, data_collator=None):
    """
    Evaluate a PEFT model on a dataset.

    Args:
        inference_model: The model to evaluate.
        dataset: The dataset (Hugging Face Dataset) to run inference on.
        labelled (bool): If True, the dataset includes labels and metrics will be computed.
                         If False, only predictions will be returned.
        batch_size (int): Batch size for inference.
        data_collator: Function to collate batches. If None, the default collate_fn is used.

    Returns:
        If labelled is True, returns a tuple (metrics, predictions)
        If labelled is False, returns the predictions.
    """
    # Create the DataLoader
    eval_dataloader = DataLoader(dataset, batch_size=batch_size, collate_fn=data_collator)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    inference_model.to(device)
    inference_model.eval()

    all_predictions = []
    if labelled:
        metric = evaluate.load('accuracy')

    # Loop over the DataLoader
    for batch in tqdm(eval_dataloader):
        # Move each tensor in the batch to the device
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = inference_model(**batch)
        predictions = outputs.logits.argmax(dim=-1)
        all_predictions.append(predictions.cpu())

        if labelled:
            # Expecting that labels are provided under the "labels" key.
            references = batch["labels"]
            metric.add_batch(
                predictions=predictions.cpu().numpy(),
                references=references.cpu().numpy()
            )

    # Concatenate predictions from all batches
    all_predictions = torch.cat(all_predictions, dim=0)

    if labelled:
        eval_metric = metric.compute()
        print("Evaluation Metric:", eval_metric)
        return eval_metric, all_predictions
    else:
        return all_predictions

In [9]:
# PEFT Config
# peft_config = LoraConfig(
#     r=8,
#     lora_alpha=16,
#     lora_dropout=0.1,
#     bias = 'none',
#     target_modules = ['query', 'value'],
#     task_type="SEQ_CLS",
# )
r_list = [2, 6, 10]
lora_alpha_mul_list = [1, 2, 3]
lora_dropout_list = [0.1, 0.3, 0.5]
bias_list = ['none', 'lora_only', 'all']
for r in r_list:
  for lora_alpha_mul in lora_alpha_mul_list:
    lora_alpha = r * lora_alpha_mul
    for lora_dropout in lora_dropout_list:
      bias = 'lora_only'
      target_modules = ['query', 'value']
      task_type = 'SEQ_CLS'

      peft_config = LoraConfig(
            r = r,
            lora_alpha=lora_alpha,
            lora_dropout=lora_dropout,
            bias = bias,
            target_modules = target_modules,
            task_type=task_type,
      )
      peft_model = get_peft_model(model, peft_config)

      print('PEFT Model')
      peft_model.print_trainable_parameters()

      output_dir = f"results/r={r}/lora_alpha={lora_alpha}/lora_dropout={lora_dropout}"
      print(output_dir)
      training_args = TrainingArguments(
        output_dir=output_dir,
        report_to='wandb',
        eval_strategy='steps',
        logging_steps=100,
        learning_rate=2e-5,
        # learning_rate=1e-5,
        num_train_epochs=5,
        max_steps=1500,
        use_cpu=False,
        dataloader_num_workers=4,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=64,
        optim="adamw_torch",
        gradient_checkpointing=False,
        gradient_checkpointing_kwargs={'use_reentrant':True}
      )
      # Setup Training args
      def get_trainer(model):
        return  Trainer(
          model=model,
          args=training_args,
          compute_metrics=compute_metrics,
          train_dataset=train_dataset,
          eval_dataset=eval_dataset,
          data_collator=data_collator,
        )

      peft_lora_finetuning_trainer = get_trainer(peft_model)

      result = peft_lora_finetuning_trainer.train()




PEFT Model
trainable params: 685,828 || all params: 125,316,104 || trainable%: 0.5473
results/r=2/lora_alpha=2/lora_dropout=0.1


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdkf595915002[0m ([33mdkf595915002-new-york-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss,Accuracy
100,1.3807,1.371031,0.4
200,1.3625,1.350281,0.729688
300,1.3391,1.322352,0.753125
400,1.2938,1.269959,0.846875
500,1.225,1.167092,0.88125
600,1.0738,0.975245,0.86875
700,0.8422,0.676922,0.885938
800,0.609,0.471604,0.884375
900,0.4597,0.390631,0.889062
1000,0.4061,0.361999,0.892188


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 685,828 || all params: 125,316,104 || trainable%: 0.5473
results/r=2/lora_alpha=2/lora_dropout=0.3


Step,Training Loss,Validation Loss,Accuracy
100,1.3697,1.350222,0.632812
200,1.328,1.299692,0.842187
300,1.2582,1.203895,0.85625
400,1.0873,0.956377,0.873437
500,0.7717,0.553463,0.890625
600,0.5082,0.408371,0.876563
700,0.4056,0.355978,0.884375
800,0.407,0.339591,0.882812
900,0.3579,0.334102,0.890625
1000,0.3492,0.328342,0.896875


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 685,828 || all params: 125,316,104 || trainable%: 0.5473
results/r=2/lora_alpha=2/lora_dropout=0.5


Step,Training Loss,Validation Loss,Accuracy
100,1.3482,1.307705,0.784375
200,1.2477,1.166205,0.871875
300,0.9953,0.788971,0.88125
400,0.6222,0.470132,0.879687
500,0.4481,0.366011,0.8875
600,0.3915,0.350129,0.873437
700,0.3517,0.335007,0.88125
800,0.3828,0.32574,0.882812
900,0.3399,0.326317,0.8875
1000,0.3382,0.321435,0.89375


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 685,828 || all params: 125,316,104 || trainable%: 0.5473
results/r=2/lora_alpha=4/lora_dropout=0.1


Step,Training Loss,Validation Loss,Accuracy
100,1.2928,1.170966,0.857812
200,0.9725,0.704424,0.876563
300,0.5768,0.418234,0.879687
400,0.3841,0.35077,0.88125
500,0.3509,0.327093,0.892188


Step,Training Loss,Validation Loss,Accuracy
100,1.2928,1.170966,0.857812
200,0.9725,0.704424,0.876563
300,0.5768,0.418234,0.879687
400,0.3841,0.35077,0.88125
500,0.3509,0.327093,0.892188
600,0.3487,0.33525,0.879687
700,0.325,0.331096,0.884375
800,0.3631,0.32066,0.8875
900,0.3245,0.32427,0.889062
1000,0.3249,0.318112,0.892188


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 685,828 || all params: 125,316,104 || trainable%: 0.5473
results/r=2/lora_alpha=4/lora_dropout=0.3


Step,Training Loss,Validation Loss,Accuracy
100,1.23,1.036854,0.873437
200,0.841,0.578605,0.878125
300,0.504,0.385146,0.878125
400,0.3613,0.343032,0.878125
500,0.3416,0.323791,0.890625
600,0.3438,0.333329,0.88125
700,0.3211,0.33057,0.882812
800,0.3595,0.319536,0.890625
900,0.3226,0.323826,0.8875
1000,0.3215,0.317073,0.892188


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 685,828 || all params: 125,316,104 || trainable%: 0.5473
results/r=2/lora_alpha=4/lora_dropout=0.5


Step,Training Loss,Validation Loss,Accuracy
100,1.1684,0.927202,0.88125
200,0.7458,0.503499,0.882812
300,0.4623,0.368563,0.878125
400,0.3483,0.339335,0.88125
500,0.3359,0.321732,0.889062
600,0.3394,0.331153,0.882812
700,0.3193,0.329899,0.88125
800,0.3565,0.319323,0.890625
900,0.3219,0.323206,0.8875
1000,0.319,0.316414,0.890625


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 685,828 || all params: 125,316,104 || trainable%: 0.5473
results/r=2/lora_alpha=6/lora_dropout=0.1


Step,Training Loss,Validation Loss,Accuracy
100,1.1006,0.795774,0.885938
200,0.6331,0.420122,0.882812
300,0.413,0.348238,0.88125
400,0.3298,0.336542,0.884375
500,0.3251,0.319859,0.885938
600,0.331,0.331827,0.879687
700,0.3175,0.331254,0.88125
800,0.3501,0.319579,0.890625
900,0.3185,0.323432,0.8875
1000,0.314,0.316348,0.890625


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 685,828 || all params: 125,316,104 || trainable%: 0.5473
results/r=2/lora_alpha=6/lora_dropout=0.3


Step,Training Loss,Validation Loss,Accuracy
100,1.0595,0.736075,0.885938
200,0.5945,0.402552,0.885938
300,0.4013,0.3441,0.884375
400,0.3252,0.334717,0.884375
500,0.3226,0.318882,0.889062
600,0.3292,0.33022,0.878125
700,0.3155,0.329551,0.882812
800,0.3484,0.319021,0.890625
900,0.3173,0.323096,0.889062
1000,0.312,0.315901,0.890625


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 685,828 || all params: 125,316,104 || trainable%: 0.5473
results/r=2/lora_alpha=6/lora_dropout=0.5


Step,Training Loss,Validation Loss,Accuracy
100,1.0226,0.688518,0.885938
200,0.5654,0.390712,0.889062
300,0.3933,0.341226,0.8875
400,0.322,0.333666,0.882812
500,0.3205,0.318467,0.892188
600,0.3264,0.328844,0.878125
700,0.3147,0.329214,0.884375
800,0.347,0.319172,0.890625
900,0.3171,0.323241,0.885938
1000,0.3107,0.315902,0.890625


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 833,284 || all params: 125,463,560 || trainable%: 0.6642
results/r=6/lora_alpha=6/lora_dropout=0.1


Step,Training Loss,Validation Loss,Accuracy
100,0.9817,0.626919,0.885938
200,0.52,0.367708,0.89375
300,0.3741,0.333514,0.889062
400,0.3123,0.332821,0.8875
500,0.3141,0.318092,0.892188
600,0.3209,0.329338,0.88125
700,0.3119,0.329252,0.882812
800,0.3427,0.318163,0.892188
900,0.3148,0.32263,0.884375
1000,0.3071,0.315829,0.892188


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 833,284 || all params: 125,463,560 || trainable%: 0.6642
results/r=6/lora_alpha=6/lora_dropout=0.3


Step,Training Loss,Validation Loss,Accuracy
100,0.9512,0.592762,0.884375
200,0.5006,0.360942,0.89375
300,0.3683,0.331396,0.889062
400,0.3095,0.330921,0.889062
500,0.3119,0.317761,0.892188
600,0.3194,0.328298,0.882812
700,0.3107,0.328364,0.88125
800,0.3414,0.317876,0.890625
900,0.3135,0.322308,0.884375
1000,0.3055,0.315603,0.892188


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 833,284 || all params: 125,463,560 || trainable%: 0.6642
results/r=6/lora_alpha=6/lora_dropout=0.5


Step,Training Loss,Validation Loss,Accuracy
100,0.9236,0.563914,0.8875
200,0.4841,0.355619,0.89375
300,0.3639,0.329867,0.889062
400,0.3076,0.330163,0.890625
500,0.3104,0.317552,0.892188
600,0.317,0.327495,0.882812
700,0.3101,0.327932,0.884375
800,0.3407,0.318069,0.890625
900,0.313,0.322245,0.884375
1000,0.3044,0.315735,0.89375


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 833,284 || all params: 125,463,560 || trainable%: 0.6642
results/r=6/lora_alpha=12/lora_dropout=0.1


Step,Training Loss,Validation Loss,Accuracy
100,0.8787,0.498626,0.890625
200,0.4437,0.343904,0.889062
300,0.3545,0.327922,0.892188
400,0.3029,0.3326,0.889062
500,0.3071,0.319403,0.892188
600,0.3117,0.330331,0.8875
700,0.3098,0.330341,0.8875
800,0.3362,0.319862,0.890625
900,0.3117,0.323355,0.884375
1000,0.3022,0.317219,0.892188


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 833,284 || all params: 125,463,560 || trainable%: 0.6642
results/r=6/lora_alpha=12/lora_dropout=0.3


Step,Training Loss,Validation Loss,Accuracy
100,0.8616,0.485997,0.890625
200,0.4376,0.342044,0.889062
300,0.3519,0.326994,0.892188
400,0.3011,0.330986,0.889062
500,0.3055,0.318864,0.892188
600,0.3111,0.329726,0.8875
700,0.3086,0.329573,0.885938
800,0.3354,0.319819,0.892188
900,0.3109,0.323275,0.885938
1000,0.3006,0.317087,0.892188


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 833,284 || all params: 125,463,560 || trainable%: 0.6642
results/r=6/lora_alpha=12/lora_dropout=0.5


Step,Training Loss,Validation Loss,Accuracy
100,0.8454,0.473894,0.890625
200,0.4308,0.340391,0.8875
300,0.3503,0.32636,0.892188
400,0.3003,0.330085,0.889062
500,0.3044,0.31903,0.892188
600,0.3086,0.329321,0.890625
700,0.3079,0.329998,0.885938
800,0.3351,0.320293,0.890625
900,0.3102,0.323372,0.885938
1000,0.2997,0.317389,0.890625


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 833,284 || all params: 125,463,560 || trainable%: 0.6642
results/r=6/lora_alpha=18/lora_dropout=0.1


Step,Training Loss,Validation Loss,Accuracy
100,0.815,0.443441,0.895312
200,0.4153,0.339477,0.885938
300,0.3479,0.327723,0.892188
400,0.2986,0.332099,0.885938
500,0.3037,0.320397,0.892188
600,0.3059,0.331817,0.889062
700,0.3084,0.331037,0.885938
800,0.3319,0.321074,0.890625
900,0.3098,0.323849,0.885938
1000,0.299,0.318127,0.889062


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 833,284 || all params: 125,463,560 || trainable%: 0.6642
results/r=6/lora_alpha=18/lora_dropout=0.3


Step,Training Loss,Validation Loss,Accuracy
100,0.803,0.435823,0.895312
200,0.4115,0.337979,0.889062
300,0.3459,0.326954,0.892188
400,0.2971,0.33066,0.885938
500,0.3027,0.320073,0.892188
600,0.3059,0.332019,0.889062
700,0.3073,0.330546,0.885938
800,0.3314,0.321359,0.890625
900,0.3091,0.324028,0.8875
1000,0.2974,0.318285,0.890625


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 833,284 || all params: 125,463,560 || trainable%: 0.6642
results/r=6/lora_alpha=18/lora_dropout=0.5


Step,Training Loss,Validation Loss,Accuracy
100,0.7924,0.430927,0.895312
200,0.4082,0.337182,0.8875
300,0.3452,0.326185,0.892188
400,0.2968,0.32972,0.8875
500,0.3014,0.320152,0.895312
600,0.3034,0.331106,0.890625
700,0.3066,0.331374,0.885938
800,0.3315,0.321662,0.890625
900,0.3086,0.324022,0.890625
1000,0.297,0.318389,0.889062


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 980,740 || all params: 125,611,016 || trainable%: 0.7808
results/r=10/lora_alpha=10/lora_dropout=0.1


Step,Training Loss,Validation Loss,Accuracy
100,0.7893,0.432338,0.9
200,0.403,0.33094,0.8875
300,0.3374,0.323083,0.8875
400,0.2926,0.328723,0.885938
500,0.2989,0.318184,0.890625
600,0.3034,0.32869,0.890625
700,0.3056,0.328377,0.889062
800,0.3291,0.319025,0.890625
900,0.3077,0.322796,0.8875
1000,0.297,0.317471,0.8875


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 980,740 || all params: 125,611,016 || trainable%: 0.7808
results/r=10/lora_alpha=10/lora_dropout=0.3


Step,Training Loss,Validation Loss,Accuracy
100,0.7751,0.423907,0.9
200,0.3983,0.329633,0.889062
300,0.3351,0.322447,0.889062
400,0.2908,0.327617,0.8875
500,0.2977,0.318201,0.890625
600,0.3024,0.328452,0.890625
700,0.3042,0.32806,0.889062
800,0.3285,0.319095,0.890625
900,0.3069,0.322842,0.8875
1000,0.2956,0.317555,0.889062


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 980,740 || all params: 125,611,016 || trainable%: 0.7808
results/r=10/lora_alpha=10/lora_dropout=0.5


Step,Training Loss,Validation Loss,Accuracy
100,0.7622,0.416865,0.9
200,0.3938,0.328722,0.890625
300,0.3339,0.321938,0.889062
400,0.2898,0.326964,0.885938
500,0.2965,0.318109,0.892188
600,0.3006,0.328013,0.890625
700,0.3036,0.328145,0.889062
800,0.3278,0.319386,0.889062
900,0.3063,0.322585,0.889062
1000,0.2951,0.317682,0.889062


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 980,740 || all params: 125,611,016 || trainable%: 0.7808
results/r=10/lora_alpha=20/lora_dropout=0.1


Step,Training Loss,Validation Loss,Accuracy
100,0.7344,0.39227,0.896875
200,0.3818,0.33112,0.892188
300,0.334,0.324991,0.8875
400,0.2895,0.328932,0.8875
500,0.2968,0.319436,0.895312
600,0.2978,0.331235,0.890625
700,0.305,0.329675,0.8875
800,0.3249,0.320086,0.8875
900,0.3054,0.323106,0.895312
1000,0.2938,0.317888,0.895312


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 980,740 || all params: 125,611,016 || trainable%: 0.7808
results/r=10/lora_alpha=20/lora_dropout=0.3


Step,Training Loss,Validation Loss,Accuracy
100,0.7259,0.389188,0.895312
200,0.3799,0.330028,0.892188
300,0.3326,0.324075,0.8875
400,0.288,0.328048,0.885938
500,0.2959,0.31947,0.896875
600,0.2973,0.331419,0.889062
700,0.3035,0.329887,0.889062
800,0.325,0.320538,0.8875
900,0.3051,0.323517,0.895312
1000,0.2926,0.318103,0.89375


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 980,740 || all params: 125,611,016 || trainable%: 0.7808
results/r=10/lora_alpha=20/lora_dropout=0.5


Step,Training Loss,Validation Loss,Accuracy
100,0.7186,0.387733,0.895312
200,0.3775,0.329672,0.89375
300,0.3324,0.323388,0.8875
400,0.2876,0.327101,0.8875
500,0.2948,0.319297,0.895312
600,0.2953,0.330528,0.890625
700,0.3026,0.330283,0.889062
800,0.3248,0.320761,0.8875
900,0.305,0.323425,0.89375
1000,0.2927,0.318295,0.89375


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 980,740 || all params: 125,611,016 || trainable%: 0.7808
results/r=10/lora_alpha=30/lora_dropout=0.1


Step,Training Loss,Validation Loss,Accuracy
100,0.6984,0.374386,0.89375
200,0.3722,0.333341,0.889062
300,0.3331,0.327702,0.8875
400,0.2881,0.328484,0.889062
500,0.2969,0.319966,0.9
600,0.295,0.333355,0.889062
700,0.3048,0.329858,0.890625
800,0.3228,0.320512,0.8875
900,0.3042,0.323158,0.898438
1000,0.292,0.317934,0.898438


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 980,740 || all params: 125,611,016 || trainable%: 0.7808
results/r=10/lora_alpha=30/lora_dropout=0.3


Step,Training Loss,Validation Loss,Accuracy
100,0.693,0.374003,0.89375
200,0.3715,0.332111,0.892188
300,0.3319,0.326291,0.8875
400,0.2864,0.327581,0.890625
500,0.296,0.32008,0.898438
600,0.2943,0.333398,0.889062
700,0.3032,0.330369,0.889062
800,0.3232,0.321119,0.8875
900,0.3039,0.323569,0.896875
1000,0.2908,0.318376,0.898438


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


PEFT Model
trainable params: 980,740 || all params: 125,611,016 || trainable%: 0.7808
results/r=10/lora_alpha=30/lora_dropout=0.5


Step,Training Loss,Validation Loss,Accuracy
100,0.6887,0.374104,0.89375
200,0.37,0.331624,0.892188
300,0.3318,0.32531,0.889062
400,0.2862,0.326695,0.890625
500,0.2947,0.319912,0.9
600,0.2923,0.332365,0.890625
700,0.3022,0.33038,0.889062
800,0.3227,0.321391,0.884375
900,0.3043,0.323386,0.896875
1000,0.2914,0.318553,0.896875


In [10]:
peft_model = get_peft_model(model, peft_config)

In [11]:
# print("Trainable parameters:")
# for name, param in peft_model.named_parameters():
#     if param.requires_grad:
#         print(name)

In [12]:
print('PEFT Model')
peft_model.print_trainable_parameters()

PEFT Model
trainable params: 980,740 || all params: 125,611,016 || trainable%: 0.7808


## Training Setup

In [13]:
# To track evaluation accuracy during training
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    # Calculate accuracy
    accuracy = accuracy_score(labels, preds)
    return {
        'accuracy': accuracy
    }
# Setup Training args
output_dir = "results"
training_args = TrainingArguments(
    output_dir=output_dir,
    report_to=None,
    eval_strategy='steps',
    logging_steps=100,
    learning_rate=2e-5,
    # learning_rate=1e-5,
    num_train_epochs=5,
    max_steps=1500,
    use_cpu=False,
    dataloader_num_workers=4,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    optim="adamw_torch",
    gradient_checkpointing=False,
    gradient_checkpointing_kwargs={'use_reentrant':True}
)

def get_trainer(model):
      return  Trainer(
          model=model,
          args=training_args,
          compute_metrics=compute_metrics,
          train_dataset=train_dataset,
          eval_dataset=eval_dataset,
          data_collator=data_collator,
      )

### Start Training

In [14]:
peft_lora_finetuning_trainer = get_trainer(peft_model)

result = peft_lora_finetuning_trainer.train()

No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss,Validation Loss,Accuracy
100,0.6816,0.371333,0.892188
200,0.3681,0.331171,0.892188
300,0.3308,0.32519,0.889062
400,0.2854,0.325948,0.889062
500,0.2942,0.320231,0.896875
600,0.2917,0.332667,0.892188
700,0.302,0.33144,0.889062
800,0.3225,0.321962,0.885938
900,0.3042,0.323736,0.898438
1000,0.2911,0.319028,0.896875


## Evaluate Finetuned Model


### Performing Inference on Custom Input
Uncomment following functions for running inference on custom inputs

In [None]:
def classify(model, tokenizer, text):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    inputs = tokenizer(text, truncation=True, padding=True, return_tensors="pt").to(device)
    output = model(**inputs)

    prediction = output.logits.argmax(dim=-1).item()

    print(f'\n Class: {prediction}, Label: {id2label[prediction]}, Text: {text}')
    return id2label[prediction]

In [None]:
classify( peft_model, tokenizer, "Kederis proclaims innocence Olympic champion Kostas Kederis today left hospital ahead of his date with IOC inquisitors claiming his ...")
classify( peft_model, tokenizer, "Wall St. Bears Claw Back Into the Black (Reuters) Reuters - Short-sellers, Wall Street's dwindling\band of ultra-cynics, are seeing green again.")

### Run Inference on eval_dataset

In [None]:
from torch.utils.data import DataLoader
import evaluate
from tqdm import tqdm

def evaluate_model(inference_model, dataset, labelled=True, batch_size=8, data_collator=None):
    """
    Evaluate a PEFT model on a dataset.

    Args:
        inference_model: The model to evaluate.
        dataset: The dataset (Hugging Face Dataset) to run inference on.
        labelled (bool): If True, the dataset includes labels and metrics will be computed.
                         If False, only predictions will be returned.
        batch_size (int): Batch size for inference.
        data_collator: Function to collate batches. If None, the default collate_fn is used.

    Returns:
        If labelled is True, returns a tuple (metrics, predictions)
        If labelled is False, returns the predictions.
    """
    # Create the DataLoader
    eval_dataloader = DataLoader(dataset, batch_size=batch_size, collate_fn=data_collator)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    inference_model.to(device)
    inference_model.eval()

    all_predictions = []
    if labelled:
        metric = evaluate.load('accuracy')

    # Loop over the DataLoader
    for batch in tqdm(eval_dataloader):
        # Move each tensor in the batch to the device
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = inference_model(**batch)
        predictions = outputs.logits.argmax(dim=-1)
        all_predictions.append(predictions.cpu())

        if labelled:
            # Expecting that labels are provided under the "labels" key.
            references = batch["labels"]
            metric.add_batch(
                predictions=predictions.cpu().numpy(),
                references=references.cpu().numpy()
            )

    # Concatenate predictions from all batches
    all_predictions = torch.cat(all_predictions, dim=0)

    if labelled:
        eval_metric = metric.compute()
        print("Evaluation Metric:", eval_metric)
        return eval_metric, all_predictions
    else:
        return all_predictions

In [None]:
# Check evaluation accuracy
_, _ = evaluate_model(peft_model, eval_dataset, True, 8, data_collator)

In [None]:
from peft import PeftConfig
# 保存 checkpoint 的目录
checkpoints_dir = "results"

# 遍历所有 checkpoint 子目录
for checkpoint_name in sorted(os.listdir(checkpoints_dir)):
    checkpoint_path = os.path.join(checkpoints_dir, checkpoint_name)
    if os.path.isdir(checkpoint_path) and checkpoint_name.startswith("checkpoint"):

        print(f"\nEvaluating: {checkpoint_name}")

        # 加载PEFT配置
        peft_config = PeftConfig.from_pretrained(checkpoint_path)

        # 加载基础模型
        base = RobertaForSequenceClassification.from_pretrained(
            peft_config.base_model_name_or_path,
            id2label=id2label  # 你训练时用的id2label
        )

        # 加载带LoRA参数的模型
        model = PeftModel.from_pretrained(base, checkpoint_path)

        # 调用你原本的评估函数
        metrics, _ = evaluate_model(model, eval_dataset, labelled=True, batch_size=8, data_collator=data_collator)
        print(f"Accuracy at {checkpoint_name}: {metrics['accuracy']:.4f}")

### Run Inference on unlabelled dataset

In [20]:
#Load your unlabelled data
unlabelled_dataset = pd.read_pickle("test_unlabelled.pkl")
test_dataset = unlabelled_dataset.map(preprocess, batched=True, remove_columns=["text"])
unlabelled_dataset

FileNotFoundError: [Errno 2] No such file or directory: 'test_unlabelled.pkl'

In [None]:
# Run inference and save predictions
preds = evaluate_model(peft_model, test_dataset, False, 8, data_collator)
df_output = pd.DataFrame({
    'ID': range(len(preds)),
    'Label': preds.numpy()  # or preds.tolist()
})
df_output.to_csv(os.path.join(output_dir,"inference_output.csv"), index=False)
print("Inference complete. Predictions saved to inference_output.csv")

In [None]:
# 加载PEFT配置
best_checkpoint_path = "results/checkpoint-2500"
peft_config = PeftConfig.from_pretrained(best_checkpoint_path)

# 加载基础模型
base = RobertaForSequenceClassification.from_pretrained(
    peft_config.base_model_name_or_path,
    id2label=id2label  # 你训练时用的id2label
)

# 加载带LoRA参数的模型
model = PeftModel.from_pretrained(base, best_checkpoint_path)
preds = evaluate_model(model, test_dataset, False, 8, data_collator)
df_output = pd.DataFrame({
    'ID': range(len(preds)),
    'Label': preds.numpy()  # or preds.tolist()
})
df_output.to_csv(os.path.join(output_dir,"inference_output_checkpoint_2500.csv"), index=False)
print("Inference complete. Predictions saved to inference_output.csv")