***Libraries***

In [1]:
import transformers
import datasets
from datasets import Dataset,load_dataset,DatasetDict
import peft
import bitsandbytes
import accelerate
import evaluate
from evaluate import evaluator
import seqeval
from transformers import pipeline, AutoTokenizer,AutoModelForSequenceClassification,Trainer,TrainingArguments,DataCollatorWithPadding
from transformers import AutoModelForQuestionAnswering,BitsAndBytesConfig,AutoModelForTokenClassification,default_data_collator
from peft import LoraConfig,get_peft_model,TaskType,PeftModel,prepare_model_for_kbit_training
import numpy as np
import pandas as pd 
import torch
import os
from tqdm import tqdm
import scipy.stats
metric_accuracy = evaluate.load('accuracy')
metric_f1 = evaluate.load("f1")
metric_seqeval = evaluate.load("seqeval") 
metric_squad = evaluate.load("squad")
import warnings
warnings.filterwarnings(action = 'ignore')

  from .autonotebook import tqdm as notebook_tqdm
The 8-bit optimizer is not available on your device, only available on CUDA for now.


In [2]:
from transformers import DataCollatorForTokenClassification

In [3]:
import random

In [4]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


In [5]:
set_seed(42)

***BERT-PEFT-LORA***

In [6]:
dataset = load_dataset("glue","sst2")

In [7]:
low_resource_sample = 512
train_dataset_small = dataset["train"].select(range(low_resource_sample))

In [8]:
def preprocess_func(examples):
    return tokenizer(examples["sentence"],truncation = True , padding = True)

In [9]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [10]:
tokenized_train = train_dataset_small.map(preprocess_func,batched=True)
tokenized_eval = dataset["validation"].map(preprocess_func,batched = True)

Map: 100%|██████████████████████████████████████████████████████████████████| 872/872 [00:00<00:00, 4540.07 examples/s]


In [13]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

In [14]:
model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    quantization_config=bnb_config,
    num_labels=2,
    id2label={0: "NEGATIVE", 1: "POSITIVE"},
    label2id={"NEGATIVE": 0, "POSITIVE": 1}
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
model = prepare_model_for_kbit_training(model)

In [16]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["query", "key", "value"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_CLS
)

In [17]:
model = get_peft_model(model, lora_config)

In [18]:
model.print_trainable_parameters()

trainable params: 443,906 || all params: 109,927,684 || trainable%: 0.4038


In [20]:
def compute_metrics(eval_pred):
    predictions,labels = eval_pred
    predictions = np.argmax(predictions,axis = 1)
    return metric_accuracy.compute(predictions=predictions,references=labels)

In [21]:
training_args = TrainingArguments(
    output_dir="./result_bert_qlora_sst2",
    learning_rate=3e-4,
    per_device_train_batch_size=4,  
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs_qlora",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    report_to="none",
    seed=42,
    fp16=True  
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics
)


In [22]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.6135,0.544188,0.713303
2,0.4981,0.449202,0.841743
3,0.1332,0.459764,0.850917
4,0.4373,0.505042,0.864679
5,0.0446,0.518418,0.854358


TrainOutput(global_step=640, training_loss=0.40482987463474274, metrics={'train_runtime': 37586.75, 'train_samples_per_second': 0.068, 'train_steps_per_second': 0.017, 'total_flos': 74052925685760.0, 'train_loss': 0.40482987463474274, 'epoch': 5.0})

In [23]:
trainer.save_model("./MY_Bert_QLoRa_Model")


qlora_result = trainer.evaluate()
print("QLoRA:", qlora_result)

QLoRA: {'eval_loss': 0.5050415396690369, 'eval_accuracy': 0.8646788990825688, 'eval_runtime': 993.8885, 'eval_samples_per_second': 0.877, 'eval_steps_per_second': 0.11, 'epoch': 5.0}
