# Llama Product Pricer

## Training :

In [None]:
# pip installs

!pip install -q --upgrade torch==2.5.1+cu124 torchvision==0.20.1+cu124 torchaudio==2.5.1+cu124 --index-url https://download.pytorch.org/whl/cu124
!pip install -q --upgrade requests==2.32.3 bitsandbytes==0.46.0 transformers==4.48.3 accelerate==1.3.0 datasets==3.2.0 peft==0.14.0 trl==0.14.0 matplotlib wandb

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m908.2/908.2 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.3/7.3 MB[0m [31m147.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m127.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m111.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m70.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m129.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [None]:
# imports

import os
import re
import math
from tqdm import tqdm
# from google.colab import userdata
from huggingface_hub import login
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, set_seed, BitsAndBytesConfig
from datasets import load_dataset, Dataset, DatasetDict
import wandb
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from datetime import datetime
import matplotlib.pyplot as plt

In [None]:
# Constants

BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
PROJECT_NAME = "product-pricer-llama"
HF_USER = "imonghose" # your HF name here!

# Data

DATASET_NAME = f"{HF_USER}/pricer-data" # load previously curated dataset for Amazon reviews
MAX_SEQUENCE_LENGTH = 182

# Run name for saving the model in the hub

RUN_NAME =  f"{datetime.now():%Y-%m-%d_%H.%M.%S}" # unique name for each run
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}" # e.g. pricer-2024-06-15_12.30.45
HUB_MODEL_NAME = f"{HF_USER}/{PROJECT_RUN_NAME}" # e.g. imonghose/pricer-2024-06-15_12.30.45

# Hyperparameters for QLoRA

LORA_R = 32
LORA_ALPHA = 64
TARGET_MODULES = ["q_proj", "v_proj", "k_proj", "o_proj"]
LORA_DROPOUT = 0.1
QUANT_4_BIT = True

# Hyperparameters for Training

EPOCHS = 1 # you can do more epochs if you wish, but only 1 is needed - more is probably overkill
BATCH_SIZE = 4 # on an A100 box this can go up to 16
GRADIENT_ACCUMULATION_STEPS = 1
LEARNING_RATE = 1e-4
LR_SCHEDULER_TYPE = 'cosine'
WARMUP_RATIO = 0.03 # Initial 3% of STEPS will be used for warmup (i.e gradually increase LR to set value)
OPTIMIZER = "paged_adamw_32bit"

STEPS = 50 # number of batch steps before data is logged to wandb
SAVE_STEPS = 1000 # number of batch steps before model uploaded to hub
LOG_TO_WANDB = True

%matplotlib inline

In [None]:
HUB_MODEL_NAME

'imonghose/product-pricer-llama-2025-10-19_09.16.39'

### Log in to HuggingFace and Weights & Biases

In [None]:
# Log in to HuggingFace

import os
from dotenv import load_dotenv
from huggingface_hub import login

# Load .env file
load_dotenv()

# Get the token
hf_token = os.getenv("HF_TOKEN")

# ✅ Check if token loaded
if hf_token is None or hf_token.strip() == "":
    raise ValueError("❌ Hugging Face token not found. Please check your .env file.")
else:
    print("✅ Hugging Face token loaded successfully!")

# Login
login(hf_token, add_to_git_credential=True)

✅ Hugging Face token loaded successfully!


Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [None]:
# hf_token = userdata.get('HF_TOKEN')
# login(hf_token, add_to_git_credential=True)

In [None]:
# Log in to Weights & Biases
# wandb_api_key = userdata.get('WANDB_API_KEY')
# os.environ["WANDB_API_KEY"] = wandb_api_key
# wandb.login()

import os
import wandb
os.environ["WANDB_API_KEY"] = "70fd46c0115ec1e5e100b5ab94b84eac20a5eba5"
from wandb.integration.openai.fine_tuning import WandbLogger
wandb.login()  # reads WANDB_API_KEY from env; no prompt

# Configure Weights & Biases to record against our project
os.environ["WANDB_PROJECT"] = PROJECT_NAME
os.environ["WANDB_LOG_MODEL"] = "checkpoint" if LOG_TO_WANDB else "end"
os.environ["WANDB_WATCH"] = "gradients"

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Currently logged in as: [33mimonghose[0m ([33mimonghose-otto-von-guericke-university-magdeburg[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
dataset = load_dataset(DATASET_NAME)
train = dataset['train']
test = dataset['test']

README.md:   0%|          | 0.00/416 [00:00<?, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/187M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/922k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/400000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [None]:
print(train[0])
print(test[0])

{'text': 'How much does this cost to the nearest dollar?\n\nDelphi FG0166 Fuel Pump Module\nDelphi brings 80 years of OE Heritage into each Delphi pump, ensuring quality and fitment for each Delphi part. Part is validated, tested and matched to the right vehicle application Delphi brings 80 years of OE Heritage into each Delphi assembly, ensuring quality and fitment for each Delphi part Always be sure to check and clean fuel tank to avoid unnecessary returns Rigorous OE-testing ensures the pump can withstand extreme temperatures Brand Delphi, Fit Type Vehicle Specific Fit, Dimensions LxWxH 19.7 x 7.7 x 5.1 inches, Weight 2.2 Pounds, Auto Part Position Unknown, Operation Mode Mechanical, Manufacturer Delphi, Model FUEL PUMP, Dimensions 19.7\n\nPrice is $227.00', 'price': 226.95}
{'text': "How much does this cost to the nearest dollar?\n\nOEM AC Compressor w/A/C Repair Kit For Ford F150 F-150 V8 & Lincoln Mark LT 2007 2008 - BuyAutoParts NEW\nAs one of the world's largest automotive part

In [None]:
# if you wish to reduce the training dataset to 20,000 points instead, then uncomment this line:
train = train.select(range(20000))

In [None]:
if LOG_TO_WANDB:
  wandb.init(project=PROJECT_NAME, name=RUN_NAME)

[34m[1mwandb[0m: Detected [openai] in use.
[34m[1mwandb[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
[34m[1mwandb[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/


## Now load the Tokenizer and Model

The model is "quantized" - we are reducing the precision to 4 bits.

In [None]:
# pick the right quantization

if QUANT_4_BIT:
  quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
  )
else:
  quant_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_compute_dtype=torch.bfloat16
  )

In [None]:
# Load the Tokenizer and the Model

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quant_config,
    device_map="auto",
)
base_model.generation_config.pad_token_id = tokenizer.pad_token_id

print(f"Memory footprint: {base_model.get_memory_footprint() / 1e6:.1f} MB")

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/826 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

Memory footprint: 5591.5 MB


# Data Collator

It's important that we ensure during Training that we are not trying to train the model to predict the description of products; only their price.

We need to tell the trainer that everything up to "Price is $" is there to give context to the model to predict the next token, but does not need to be learned.

The trainer needs to teach the model to predict the token(s) after "Price is $".

For this, we use HuggingFace's helper class "DataCollatorForCompletionOnlyLM"

In [None]:
# text = (
#   "Given the following product, output its price.\n"
#   "Product: ACME Widget\n"
#   "Price is $1299.99</s>"
# )
# The collator will:
# tokenize the whole thing,
# find the tokenized "Price is $",
# set labels to -100 up to that point,
# keep labels on 1299.99 (and until the end token).
# i.e In every sample, start computing loss after the literal Price is $, so the model learns only to generate the price.

from trl import DataCollatorForCompletionOnlyLM
response_template = "Price is $"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

# AND NOW

## We set up the configuration for Training

We need to create 2 objects:

A LoraConfig object with our hyperparameters for LoRA

An SFTConfig with our overall Training parameters

In [None]:
# First, specify the configuration parameters for LoRA

lora_parameters = LoraConfig(
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    r=LORA_R,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=TARGET_MODULES,
)

# Next, specify the general configuration parameters for training

train_parameters = SFTConfig(
    output_dir=PROJECT_RUN_NAME,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=1,
    eval_strategy="no",
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    optim=OPTIMIZER,
    save_steps=SAVE_STEPS,
    save_total_limit=10,
    logging_steps=STEPS,
    learning_rate=LEARNING_RATE,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=WARMUP_RATIO,
    group_by_length=True,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    report_to="wandb" if LOG_TO_WANDB else None,
    run_name=RUN_NAME,
    max_seq_length=MAX_SEQUENCE_LENGTH,
    dataset_text_field="text",
    save_strategy="steps",
    hub_strategy="every_save",
    push_to_hub=True,
    hub_model_id=HUB_MODEL_NAME,
    hub_private_repo=True
)

# And now, the Supervised Fine Tuning Trainer will carry out the fine-tuning
# Given these 2 sets of configuration parameters
# The latest version of trl is showing a warning about labels - please ignore this warning
# But let me know if you don't see good training results (loss coming down).

fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=train,
    peft_config=lora_parameters,
    args=train_parameters,
    data_collator=collator
  )

Map:   0%|          | 0/20000 [00:00<?, ? examples/s]

## Kick off fine-tuning!

In [None]:
# Fine-tune!
fine_tuning.train()
# fine_tuning.train(resume_from_checkpoint=True)

# Push our fine-tuned model to Hugging Face
fine_tuning.model.push_to_hub(PROJECT_RUN_NAME, private=True)
print(f"Saved to the hub: {PROJECT_RUN_NAME}")





Step,Training Loss
50,2.3004
100,1.8991
150,1.9916
200,1.9834
250,1.9974
300,1.9097
350,1.9391
400,1.9392
450,1.9545
500,1.9236


[34m[1mwandb[0m: Adding directory to artifact (product-pricer-llama-2025-10-19_09.16.39/checkpoint-1000)... Done. 1.3s
[34m[1mwandb[0m: Adding directory to artifact (product-pricer-llama-2025-10-19_09.16.39/checkpoint-2000)... Done. 7.0s
[34m[1mwandb[0m: Adding directory to artifact (product-pricer-llama-2025-10-19_09.16.39/checkpoint-3000)... Done. 2.9s
[34m[1mwandb[0m: Adding directory to artifact (product-pricer-llama-2025-10-19_09.16.39/checkpoint-4000)... Done. 6.4s


Step,Training Loss
50,2.3004
100,1.8991
150,1.9916
200,1.9834
250,1.9974
300,1.9097
350,1.9391
400,1.9392
450,1.9545
500,1.9236


[34m[1mwandb[0m: Adding directory to artifact (product-pricer-llama-2025-10-19_09.16.39/checkpoint-5000)... Done. 6.3s


README.md:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...adapter_model.safetensors:  31%|###       | 33.5MB /  109MB            

No files have been modified since last commit. Skipping to prevent empty commit.


Saved to the hub: product-pricer-llama-2025-10-19_09.16.39


In [None]:
if LOG_TO_WANDB:
  wandb.finish()

0,1
train/epoch,▁▁▁▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train/grad_norm,▂▁▃▃▄▃▂▁▂▅▃▁▁▄▂▅▃▁▃▄▅▃▂▂▇▂▅▆▃▃█▃▃▆▂▁▄▂▂▃
train/learning_rate,▆████████▇▇▇▆▆▆▅▅▅▅▅▄▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
train/loss,█▄▄▃▃▃▃▂▃▃▃▄▃▃▂▃▂▂▂▂▂▃▁▂▂▂▂▂▂▂▂▁▁▁▂▂▂▂▁▁

0,1
total_flos,1.6068018209970586e+17
train/epoch,1.0
train/global_step,5000.0
train/grad_norm,4.09805
train/learning_rate,0.0
train/loss,1.8063
train_loss,1.87197
train_runtime,61906.8974
train_samples_per_second,0.323
train_steps_per_second,0.081
