In [None]:
# @title Mount Google Drive if in Colab
import os
if os.path.isdir('/content/MyDrive'):
    from google.colab import drive
    drive.mount('/content/MyDrive')
    BASE_DIR = 'content/MyDrive/00-github/to-check-on-PEFT/to-check-on-PEFT'
else:
    BASE_DIR = os.path.dirname(os.path.abspath(__file__))

In [None]:
# @title Import Libraries
import os
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset, load_metric
from peft import VBLoRAConfig, get_peft_model

In [None]:
# @title names and parameters
MODEL_NAME = 'bert-base-uncased'  # @param {type:"string"}
DATASET_NAME = 'glue'  # @param {type:"string"}
DATASET_CONFIG_NAME = 'mrpc'  # @param ["mrpc", "sst2", "cola", "qnli", "qqp", "stsb", "mnli", "rte", "wnli"]
NUM_LABELS = 2  # @param {type:"integer"}
TASK_TYPE = 'SEQ_CLS'  # @param ["CAUSAL_LM","SEQ_2_SEQ_LM","SEQ_CLS","TOKEN_CLS","QUESTION_ANS","FEATURE_EXTRACTION","MULTIPLE_CHOICE","IMAGE_CLASSIFICATION","AUDIO_CLS"]
BATCH_SIZE = 16  # @param {type:"integer"}
EPOCHS = 3  # @param {type:"integer"}
LEARNING_RATE = 2e-5  # @param {type:"number"}
LORA_RANK = 8  # @param {type:"integer"}
LORA_ALPHA = 16  # @param {type:"integer"}
LORA_DROPOUT = 0.1  # @param {type:"number"}

In [None]:
# @title Model and Tokenizer Setup
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=NUM_LABELS)

In [None]:
# @title Prepare VB-LoRA Configuration
peft_config = VBLoRAConfig(
    r=LORA_RANK,
    lora_alpha=LORA_ALPHA,
    target_modules=["query", "value", "key", "dense"], # Adjust based on model architecture 
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="SEQ_CLS",
)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

In [None]:
# @title Load GLUE tasks
dataset = load_dataset(DATASET_NAME, DATASET_CONFIG_NAME)
metric = load_metric('glue', DATASET_CONFIG_NAME)

In [None]:
# @title Tokenization and Preprocessing
def preprocess_fn(example):
    # for binary classification tasks
    return tokenizer(example["sentence"], truncation=True, padding="max_length", max_length=128)

encoded = dataset["train"].map(preprocess_fn, batched=False)
encoded_eval = dataset["validation"].map(preprocess_fn, batched=False)