<a href="https://colab.research.google.com/github/Sourabh92133/pricer_llm_fine_tuning/blob/main/notebooks/training_qlora.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# QLoRA Fine-Tuning â€“ Training Notebook

This notebook performs QLoRA fine-tuning of LLaMA 3.1 (8B) on the
`ed-donner/pricer-data` dataset for price prediction.

Responsibilities:
- Load dataset
- Configure 4-bit QLoRA
- Fine-tune model using TRL SFTTrainer
- Push LoRA adapters to Hugging Face Hub


In [None]:
# pip installs

!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124
!pip install -q --upgrade requests==2.32.3 bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 datasets==3.2.0 peft==0.14.0 trl==0.14.0 matplotlib wandb

In [None]:
# imports

import os
import re
import math
from tqdm import tqdm
from google.colab import userdata
from huggingface_hub import login
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, set_seed, BitsAndBytesConfig
from datasets import load_dataset, Dataset, DatasetDict
import wandb
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from datetime import datetime
import matplotlib.pyplot as plt

In [None]:
Base_Model="meta-llama/Meta-Llama-3.1-8B"
Project_Name="pricer"
HF_USER="sourabh004"     # you use you HF-USER name
Dataset_Name="ed-donner/pricer-data"    # i used this data but you can use your own dataset
MAX_SEQUENCE_LENGTH=182

# Run name for saving the model in the hub

Run_Name=f"{datetime.now():%y-%m-%d_%H.%M.%S}"     # this will show you current time
Project_Run_Name=f"{Project_Name}-{Run_Name}"      # this will show you current project name
Hf_Model_Name=f"{HF_USER}/{Project_Run_Name}"      # this is hugging face project name

# Hyperparameter for Qlora
Lora_R=32
Lora_Alpha=64
Lora_Target_Modules=["q_proj","k_proj","v_proj","o_proj"]
Lora_Dropout=0.1     # 10 percent of total neurons will be deactivated everytime to prevent form overfitting
Quant_4_Bit=True

# Hyperparamter for Training
Epochs=1
Batch_Size=1
Gradient_Accumulation=1
Learning_Rate=1e-4
Learning_Scheduler_Type="cosine"
Warmup_Ratio=0.3     # so learning rate will first warm up then it will decrease
Optimizer="paged_adamw_32bit"

# now saving model to hub
Steps=50
Save_Steps=2000
LOG_TO_WANDB=True

%matplotlib inline      # this  is only for notebook

In [None]:
Hf_Model_Name

In [None]:
# Hugging Face Authentication

# If running on Google Colab, you may set HF_TOKEN like this:

# os.environ["HF_TOKEN"] = userdata.get("HF_TOKEN")

HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN is None:
    raise ValueError(
        "HF_TOKEN environment variable not set. "
        "Please set it via environment variables or Colab userdata."
    )

login(HF_TOKEN, add_to_git_credential=True)


In [None]:
# login to weights and biases
# This notebook expects WANDB_API_KEY to be set as an environment variable.
# Example (Google Colab):

# os.environ["WANDB_API_KEY"] = userdata.get("WANDB")

if LOG_TO_WANDB:
    wandb.login()

    # All logs from this run will be grouped under this project name
    os.environ["WANDB_PROJECT"] = Project_Name

    # Upload model weights at each checkpoint if logging is enabled
    os.environ["WANDB_LOG_MODEL"] = "checkpoint"

    # Track gradients during training
    os.environ["WANDB_WATCH"] = "gradients"


In [None]:
dataset=load_dataset(Dataset_Name)

In [None]:
train=dataset["train"]
test=dataset["test"]


In [None]:
# let us reduce the size of training data to 20k datapoints
train=train.select(range(20000))      # i reduced the size of training datset , it's your choice whether you want to reduce or not

In [None]:
if LOG_TO_WANDB:
  wandb.init(project=Project_Name,name=Run_Name)   # this is to start new experiment run

In [None]:
if Quant_4_Bit:
  quant_config=BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_use_double_quant=True,
      bnb_4bit_compute_dtype=torch.bfloat16,
      bnb_4bit_quant_type="nf4"
  )
else:
  quant_config=BitsAndBytesConfig(
      load_in_8bit=True,
      bnb_8bit_compute_dtype=torch.bloat16,
  )

In [None]:
# load tokenizer and model
tokenizer=AutoTokenizer.from_pretrained(Base_Model,trust_remote_code=True)
tokenizer.pad_token=tokenizer.eos_token
tokenizer.padding_side="right"

base_model=AutoModelForCausalLM.from_pretrained(
    Base_Model,
    quantization_config=quant_config,
    device_map="auto"
)
base_model.generation_config.pad_token_id = tokenizer.pad_token_id

print(f"Memory footprint: {base_model.get_memory_footprint() / 1e6:.1f} MB")

In [None]:
# let us build data collator that will tell trainer everything written till price is $ is context and it has to predict token next to it
from trl import DataCollatorForCompletionOnlyLM
response_template="Price is $"                 # this will tell trainer that to teach model to predict token next to this
collator=DataCollatorForCompletionOnlyLM(response_template,tokenizer=tokenizer)

## Training Configuration

In this section, we set up the configuration required for fine-tuning.

We create **two key objects**:

1. **LoRA Configuration (`LoraConfig`)**  
   Defines the Low-Rank Adaptation (LoRA) hyperparameters used for parameter-efficient fine-tuning, such as rank, scaling factor, dropout, and target modules.

2. **Training Configuration (`SFTConfig`)**  
   Specifies the overall training setup, including batch size, learning rate, optimizer, scheduler, logging, checkpointing, and Hugging Face Hub integration.


In [None]:
lora_parameters=LoraConfig(
    r=Lora_R,
    lora_alpha=Lora_Alpha,
    lora_dropout=Lora_Dropout,
    target_modules=Lora_Target_Modules,
    bias="none",
    task_type="CAUSAL_LM"
)
train_parameters=SFTConfig(
    output_dir=Project_Run_Name,
    num_train_epochs=Epochs,
    per_device_train_batch_size=Batch_Size,
    per_device_eval_batch_size=1,
    eval_strategy="no",
    gradient_accumulation_steps=Gradient_Accumulation,
    optim=Optimizer,
    save_steps=Save_Steps,
    save_total_limit=10,
    logging_steps=Steps,
    learning_rate=Learning_Rate,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=Warmup_Ratio,
    group_by_length=True,
    lr_scheduler_type=Learning_Scheduler_Type,
    report_to="wandb" if LOG_TO_WANDB else None,
    run_name=Run_Name,
    max_seq_length=MAX_SEQUENCE_LENGTH,
    dataset_text_field="text",
    save_strategy="steps",
    hub_strategy="every_save",
    push_to_hub=True,
    hub_model_id=Hf_Model_Name,
    hub_private_repo=True
)
fine_tuning=SFTTrainer(
    model=base_model,
    train_dataset=train,
    peft_config=lora_parameters,
    args=train_parameters,
    data_collator=collator
)

In [None]:
# bias = null ensures that bias i.e model parameters are not changed only lora parameters are changed or trained

# For "CAUSAL_LM":
# Adapters are added to:

  # attention projections (q_proj, v_proj, etc.)

  # Training uses next-token prediction

# If you give wrong task_type:

  # LoRA may attach incorrectly

  # Training can silently degrade

In [None]:
# now we will kick off fine tunning
fine_tuning.train()
# pushing fine tuned model to hugging face hub
fine_tuning.push_to_hub(Hf_Model_Name,private=True)
print(f"save to hub :{Project_Run_Name}")

In [None]:
if LOG_TO_WANDB:
  wandb.finish()