## **Fine-Tuning the Base Model using QLoRA**

In [None]:
# pip installs

!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124
!pip install -q --upgrade requests==2.32.3 bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 datasets==3.2.0 peft==0.14.0 trl==0.14.0 matplotlib wandb

In [None]:

import os
import re
import math
from tqdm import tqdm
from google.colab import userdata
from huggingface_hub import login
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, set_seed, BitsAndBytesConfig
from datasets import load_dataset, Dataset, DatasetDict
import wandb
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from datetime import datetime
import matplotlib.pyplot as plt

In [None]:
# constants

BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
PROJECT_NAME = "pricer"
HF_USER = "ksharma9719"

# Data

DATASET_NAME = f"{HF_USER}/lite-data"
MAX_SEQUENCE_LENGTH = 182

# Run name for saving the model in the hub

RUN_NAME = f"{datetime.now():%Y-%m-%d_%H-%M-%S}"
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
HUB_MODEL_NAME = f"{HF_USER}/{PROJECT_RUN_NAME}"

# Hyperparameters for QLoRA
LORA_R = 32
LORA_ALPHA = 64
TARGET_MODULES = ["q_proj", "v_proj", "k_proj", "o_proj"]
LORA_DROPOUT = 0.1
QUANT_4_BIT = True

# Hyperparameters for training

EPOCHS = 1
BATCH_SIZE = 4
LEARNING_RATE = 1e-4
GRADIENT_ACCUMULATION_STEPS = 1
LR_SCHEDULER_TYPE = 'cosine'
WARMUP_RATIO = 0.93
OPTIMIZER = 'paged_adamw_32bit'

# Admin Config

STEPS = 50
SAVE_STEPS = 2000
LOG_TO_WANDB = True

%matplotlib inline


In [None]:
# Logging into HuggingFace

hf_token = userdata.get("HF_TOKEN")
login(hf_token, add_to_git_credential=True)

In [None]:
# Logging into Weights and Biases

wandb_api_key = userdata.get("WANDB_API_KEY")
os.environ["WANDB_API_KEY"] = wandb_api_key
wandb.login()

# Configuring Weights and Biases to record against our project

os.environ["WANDB_PROJECT"] = PROJECT_NAME
os.environ["WANDB_LOG_MODEL"] = "checkpoint" if LOG_TO_WANDB else "end"
os.environ["WANDB_WATCH"] = "gradients"


[34m[1mwandb[0m: Currently logged in as: [33mjksharma[0m ([33mjksharma-chhattisgarh-swami-vivekanand-technical-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
# Loading the dataset

dataset = load_dataset(
    "ksharma9719/lite-data",
    data_files={
        "train": "data/train-00000-of-00001.parquet",
        "test": "data/test-00000-of-00001.parquet"
    }
)

train = dataset['train']
test = dataset['test']

In [None]:
train = train.shuffle(seed=42).select(range(2000))
test = test.shuffle(seed=42).select(range(500))


In [None]:
test[0]

{'text': 'How much does this cost to the nearest dollar?\n\nGrandeur Single Dummy, Vintage Brass Fifth Avenue Plate with Provence Crystal Knob\nProduct Description The crisply tailored look of the Fifth Avenue Plate in vintage brass has a universal appeal that works well with almost any home. The addition of the oval Provence Knob, with its graceful arcs, will suggest a rustic, yet sophisticated charm. All Grandeur knobs are created from 24% lead crystal for unparalleled clarity and beauty, and mounted on a solid (not plated) forged brass base. From the Manufacturer The crisply tailored look of the Fifth Avenue Plate in vintage brass has a universal appeal that works well with almost any home. The addition of the oval Provence Knob, with its graceful arcs, will suggest a rustic, yet sophisticated charm. All Grandeur knobs are created from 24\n\nPrice is $',
 'price': 180.0}

In [None]:
if LOG_TO_WANDB:
  wandb.init(project=PROJECT_NAME, name=RUN_NAME)

### **Now Loading the Tokenizer and the Model**  
The model is "quantized" - we are reducing the precision to 4 bits.

In [None]:
# Pick the right quantization

if QUANT_4_BIT:
  quant_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_use_double_quant=True,
      bnb_4bit_compute_dtype=torch.bfloat16,
      bnb_4bit_quant_type="nf4"
  )
else:
  quant_config = BitsAndBytesConfig(
      load_in_8bit=True,
      bnb_8bit_compute_dtype=torch.bfloat16
  )

In [None]:
# Loading the tokenizer and the model

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side='right'

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quant_config,
    device_map="auto"
)

base_model.generation_config.pad_token_id = tokenizer.pad_token_id

# the memory footprint

print(f"Memory footprint: {base_model.get_memory_footprint() / 1e6:.1f} MB")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Memory footprint: 5591.5 MB


### **Data Collator**

It's important that we ensure during Training that we are not trying to train the model to predict the description of products; only their price.

We need to tell the trainer that everything up to "Price is $" is there to give context to the model to predict the next token, but does not need to be learned.

The trainer needs to teach the model to predict the token(s) after "Price is $".

There is a complicated way to do this by setting Masks, but luckily HuggingFace provides a super simple helper class to take care of this for us.

In [None]:
from trl import DataCollatorForCompletionOnlyLM

response_template = "Price is $"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

### **AND NOW**  

We set up the configuration for Training
We need to create 2 objects:

A LoraConfig object with our hyperparameters for LoRA

An SFTConfig with our overall Training parameters

In [None]:
# First, specify the configuration parameters for LoRA

lora_parameters = LoraConfig(
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    r=LORA_R,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=TARGET_MODULES,
)

# Next, specify the general configuration parameters for training

train_parameters = SFTConfig(
    output_dir=PROJECT_RUN_NAME,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=1,
    eval_strategy="no",
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    optim=OPTIMIZER,
    save_steps=SAVE_STEPS,
    save_total_limit=10,
    logging_steps=STEPS,
    learning_rate=LEARNING_RATE,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=WARMUP_RATIO,
    group_by_length=True,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    report_to="wandb" if LOG_TO_WANDB else None,
    run_name=RUN_NAME,
    max_seq_length=MAX_SEQUENCE_LENGTH,
    dataset_text_field="text",
    save_strategy="steps",
    hub_strategy="every_save",
    push_to_hub=True,
    hub_model_id=HUB_MODEL_NAME,
    hub_private_repo=True
)

# And now, the Supervised Fine Tuning Trainer will carry out the fine-tuning
# Given these 2 sets of configuration parameters
# The latest version of trl is showing a warning about labels - please ignore this warning
# But let me know if you don't see good training results (loss coming down).

fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=train,
    peft_config=lora_parameters,
    args=train_parameters,
    data_collator=collator
  )

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

### Kicking off the fine-tuning

In [None]:
# Fine-Tune !!

fine_tuning.train()

# Push our fine-tuned model to hugging face
fine_tuning.model.push_to_hub(PROJECT_RUN_NAME, private=True)
print(f"Saved to the hub:  {PROJECT_RUN_NAME}")



Step,Training Loss
50,2.0885
100,1.386
150,1.4592
200,1.4754
250,1.4431
300,1.4109
350,1.4232
400,1.4072
450,1.4688
500,1.4065


[34m[1mwandb[0m: Adding directory to artifact (./pricer-2025-08-24_19-23-58/checkpoint-500)... Done. 6.4s


README.md:   0%|          | 0.00/1.49k [00:00<?, ?B/s]

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ..._19-23-58/adapter_model.safetensors:  38%|###8      | 41.9MB /  109MB            

No files have been modified since last commit. Skipping to prevent empty commit.


Saved to the hub:  pricer-2025-08-24_19-23-58


In [None]:
if LOG_TO_WANDB:
  wandb.finish()