<a href="https://colab.research.google.com/github/21J41A0449/FineTuningLlama3B/blob/main/FineTuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 📚 Step 1: Import tools
# Transformers = brings the brain (model)
# Tokenizer = cuts sentences into Lego blocks
# Datasets = helps us load our question-answer pairs
# PEFT = lets us use the LoRA trick (train only a small part of brain)
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset
from peft import LoraConfig, get_peft_model

# 🧠 Step 2: Pick which brain (model) we want
# Here we choose the LLaMA-2-3B model (a 3 billion parameter brain)
model_name = "meta-llama/Llama-2-3b-hf"

# ✂️ Step 3: Bring the Lego cutter (tokenizer)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 🧩 Step 4: Load the brain into memory
# load_in_4bit=True makes the brain smaller (compressed) so it fits into our computer
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto"   # "auto" puts the brain on GPU if we have one
)

# 🎨 Step 5: LoRA setup
# Instead of painting the whole wall, we only paint small patches
# This saves paint (GPU memory) and time
config = LoraConfig(
    r=16,                          # size of LoRA patches
    lora_alpha=32,                 # how strong the patches are
    target_modules=["q_proj","v_proj"], # parts of the brain we will train
    lora_dropout=0.05,              # chance to drop info (helps generalize)
    bias="none",
    task_type="CAUSAL_LM"          # "Causal LM" means text generation
)

# Put the LoRA patches onto our brain
model = get_peft_model(model, config)

# 📂 Step 6: Load our dataset
# Example: a JSON file with question-answer pairs
# Each entry looks like:
# {"instruction": "refund question", "input": "Can I get a refund?", "output": "Yes, within 14 days."}
dataset = load_dataset("json", data_files={"train": "train.json", "validation": "valid.json"})

# 🏗️ Step 7: Convert text to Lego blocks (tokens)
def format_example(example):
    # We join instruction + input into a single question
    question = example["instruction"] + "\n" + example["input"]
    # Answer is the output
    answer = example["output"]
    # Tokenize both question and answer
    return tokenizer(question + answer, truncation=True)

# Apply this function to whole dataset
tokenized_dataset = dataset.map(format_example)

# 🏫 Step 8: Set classroom rules (training arguments)
training_args = TrainingArguments(
    per_device_train_batch_size=4,    # how many questions to read at once
    per_device_eval_batch_size=4,     # same for test set
    gradient_accumulation_steps=4,    # pretend we have a bigger batch
    warmup_steps=50,                  # do some warmup before running fast
    max_steps=500,                    # number of training steps (like study sessions)
    learning_rate=2e-4,               # how fast to learn (too high = forgetful, too low = lazy)
    fp16=True,                        # half precision = faster training
    logging_steps=10,                 # how often to show progress
    output_dir="./llama-finetuned"    # where to save the trained brain
)

# 📖 Step 9: Create a trainer (teacher for the brain)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"]
)

# 🏃 Step 10: Start training!
trainer.train()

# 💾 Step 11: Save the trained brain
model.save_pretrained("llama-3b-lora")
tokenizer.save_pretrained("llama-3b-lora")

# 🎉 Done! Now our brain knows company rules (like refund policy)
