In [1]:
!pip install -q accelerate peft bitsandbytes transformers trl

In [2]:
import os
import torch
import transformers
from datasets import load_dataset
from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import BitsAndBytesConfig
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding
from trl import SFTTrainer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
import numpy as np

In [None]:
dataset = load_dataset("dair-ai/emotion")

#Convert to instruction format
def convert_to_instruction_format(example):
  return{
      "instruction" : "You are a emotion classification model. Classify the emotion of the following sentence.",
      "input" : example["text"],
      "output" : example["label"]
  }

dataset=dataset.map(convert_to_instruction_format)

In [4]:
model = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model)

In [5]:
# Convert instruction + input into a single prompt
def preprocess(example):
  text=f"{example['instruction']}\nInput:{example['input']}\nOutput:"
  inputs=tokenizer(text, truncation=True, padding="max_length", max_length=128)
  label=int(example["output"])
  inputs["labels"]=label

  return inputs

tokenized = dataset.map(preprocess)

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [6]:
# Setting up QLoRA config using peft

bnb_config=BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

base_model=AutoModelForSequenceClassification.from_pretrained(
    model,
    num_labels=6,
    quantization_config=bnb_config,
    device_map='auto'
)

base_model = prepare_model_for_kbit_training(base_model)

config=LoraConfig(
    r=8,
    lora_alpha=8,
    lora_dropout=0.05,
    target_modules=["query", "key", "value"],
    bias="none",
    task_type="SEQ_CLS"
)

model = get_peft_model(base_model, config)
model.print_trainable_parameters()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 446,982 || all params: 109,933,836 || trainable%: 0.4066


In [7]:
def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    return {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

In [8]:
# Training using huggingface trainer

training_args=TrainingArguments(
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    learning_rate=2e-4,
    fp16=True
)

trainer=Trainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["validation"],
    compute_metrics=compute_metrics,
    data_collator=DataCollatorWithPadding(tokenizer)
)

  trainer=Trainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [11]:
trainer.train()

  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5331,0.404914,0.871,0.877523,0.871,0.872409
2,0.3061,0.265609,0.918,0.918535,0.918,0.917333
3,0.2223,0.240454,0.9275,0.928922,0.9275,0.927773
4,0.217,0.207356,0.9295,0.93029,0.9295,0.929747
5,0.1757,0.196195,0.935,0.936123,0.935,0.935405


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=10000, training_loss=0.3779602546691895, metrics={'train_runtime': 1501.1153, 'train_samples_per_second': 53.294, 'train_steps_per_second': 6.662, 'total_flos': 5289872670720000.0, 'train_loss': 0.3779602546691895, 'epoch': 5.0})

In [12]:
metrics = trainer.evaluate(eval_dataset=tokenized["test"])
print("Test Metrics:", metrics)

Test Metrics: {'eval_loss': 0.22194141149520874, 'eval_accuracy': 0.9205, 'eval_precision': 0.9217563779612207, 'eval_recall': 0.9205, 'eval_f1': 0.9209798176326583, 'eval_runtime': 11.2062, 'eval_samples_per_second': 178.472, 'eval_steps_per_second': 22.309, 'epoch': 5.0}
