# LLM Fine-Tuning Comparison: Base vs Fine-Tuned Model

This notebook compares the outputs of the base Llama-2-7b-hf model and a fine-tuned version using LoRA/QLoRA adapters. The fine-tuned model uses parameter-efficient fine-tuning (PEFT) via LoRA/QLoRA.

In [None]:
# Install required libraries
!pip install peft transformers sentencepiece protobuf bitsandbytes

In [None]:
from huggingface_hub import login

# Login to Hugging Face
login(new_session=False)

In [None]:
from peft import PeftModel, PeftConfig

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline

base_model = "meta-llama/Llama-2-7b-hf"

adapter_repo = "DaviLago/amazon-titles-llama-finetuned"

# Load base tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)

# QLoRA quantization config
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="bfloat16",
)

# Load base model with QLoRA
base = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quantization_config, device_map="auto")

In [None]:
# Prompt
prompt = "You are a friendly and knowledgeable shop attendant in a retail store that sells products. The customer will ask a question about a title. Give him the answer based on the title. The customer wants to know: What are 'Girls Ballet Tutu Neon Pink' details?"

In [None]:
# Base model
base_pipe = pipeline(task="text-generation", model=base, tokenizer=tokenizer, max_length=256, num_return_sequences=1, max_new_tokens=30)
base_output = base_pipe(prompt)[0]["generated_text"]
print("Base model output:")
print(base_output)

In [None]:
# Load fine-tuned model (LoRA/QLoRA adapter)
ft = AutoModelForCausalLM.from_pretrained(base_model, quantization_config=quantization_config, device_map="auto")
ft = PeftModel.from_pretrained(ft, adapter_repo)

In [None]:
# Fine-tuned model
ft_pipe = pipeline(task="text-generation", model=ft, tokenizer=tokenizer, max_length=256, num_return_sequences=1, max_new_tokens=30)
ft_output = ft_pipe(prompt)[0]["generated_text"]
print("\nFine-tuned model output:")
print(ft_output)

In [None]:
# Display training data for comparison
print("\nRelated training data for fine-tuned model output:")
print("Content: High quality 3 layer ballet tutu. 12 inches in length")
print("Content: Dance tutu for girls ages 2-8 years. Perfect for dance practice, recitals and performances, costumes or just for fun!")