In [1]:
!pip install transformers peft datasets



In [2]:
from datasets import load_dataset
dataset = load_dataset('SetFit/emotion')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Repo card metadata block was not found. Setting CardData to empty.


In [3]:
train_data = dataset["train"]
test_data = dataset["test"]

In [4]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [5]:
# Tokenization function
def tokenize_function(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True)

# Apply tokenization
train_dataset = train_data.map(tokenize_function, batched=True)
test_dataset = test_data.map(tokenize_function, batched=True)

# Convert to PyTorch format
train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [6]:
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=6)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
print(model)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [8]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType

# Define LoRA Configuration
lora_config = LoraConfig(
    r=8,  # Low-rank adaptation dimension
    lora_alpha=32,  # Scaling factor
    lora_dropout=0.05,  # Dropout rate
    target_modules=["query", "value"]  # Apply LoRA to self-attention layers only
)

# Prepare model for LoRA
base_model = prepare_model_for_kbit_training(model)

# Convert model into LoRA-enabled model
peft_model = get_peft_model(model, lora_config)

# Print trainable parameters
peft_model.print_trainable_parameters()

trainable params: 294,912 || all params: 109,781,766 || trainable%: 0.2686


In [9]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments( # configuration class that defines how training should happen
    output_dir="./model_checkpoints",  # Where to save model
    num_train_epochs=1,  # Train for 3 epochs
    per_device_train_batch_size=16,  # 16 samples per GPU/CPU
    eval_strategy="epoch",  # Evaluate after every epoch
    save_strategy="epoch",  # Save model after each epoch
    logging_steps=10,  # Log training metrics every 10 steps
    load_best_model_at_end=True,  # Automatically load best checkpoint
    fp16=True  # Use mixed precision for faster training (if GPU supports it)
)

# A high-level class that automates training, evaluation, and saving models.
# It wraps around your model and dataset, handling:

# Training loops.
# Evaluation during training.
# Model saving & checkpointing.

trainer = Trainer(
    model=peft_model,  # LoRA fine-tuned model
    args=training_args,  # Training settings
    train_dataset=train_dataset,  # Training data
    eval_dataset=test_dataset,  # Test data
    tokenizer=tokenizer  # Tokenizer for processing text,
)

trainer.train()

  trainer = Trainer(
[34m[1mwandb[0m: Currently logged in as: [33msaroelzamathew259[0m ([33msaroelzamathew259-lektik[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss
1,1.5395,1.453953


TrainOutput(global_step=1000, training_loss=1.5849349365234375, metrics={'train_runtime': 359.2401, 'train_samples_per_second': 44.538, 'train_steps_per_second': 2.784, 'total_flos': 4224423591936000.0, 'train_loss': 1.5849349365234375, 'epoch': 1.0})

In [10]:
!pip install -U bitsandbytes



In [11]:
from transformers import BitsAndBytesConfig, AutoModelForSequenceClassification
from peft import LoraConfig, get_peft_model # Added import for get_peft_model
import torch

# Apply 4-bit quantization to reduce memory
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Enable 4-bit quantization
    bnb_4bit_compute_dtype=torch.float16,  # Compute in float16
    bnb_4bit_use_double_quant=True,  # Double quantization for efficiency
)

# Load quantized BERT model
base_model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=6,
    quantization_config=bnb_config
)

# Define LoRA Configuration (Moved from cell 9PHsWwHUBPAC)
lora_config = LoraConfig(
    r=8,  # Low-rank adaptation dimension
    lora_alpha=32,  # Scaling factor
    lora_dropout=0.05,  # Dropout rate
    target_modules=["query", "value"]  # Apply LoRA to self-attention layers only
)

# Apply LoRA on top of QLoRA
peft_model = get_peft_model(base_model, lora_config)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [17]:
from huggingface_hub import login
from google.colab import userdata

# Log in to the Hugging Face Hub
login(token=userdata.get("HF_TOKEN"))

In [23]:
from transformers import pipeline

# Load a different, public text classification model from HF Hub
emotion_classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")

# Test on new sentences
print(emotion_classifier("I am so happy today!"))
print(emotion_classifier("I feel terrible and sad."))

Device set to use cuda:0


[{'label': 'POSITIVE', 'score': 0.9998766183853149}]
[{'label': 'NEGATIVE', 'score': 0.9994778037071228}]
