# Install Dependencies

In [8]:
!pip install --upgrade trl > /dev/null
!pip show trl


Name: trl
Version: 0.16.1
Summary: Train transformer language models with reinforcement learning.
Home-page: https://github.com/huggingface/trl
Author: Leandro von Werra
Author-email: leandro.vonwerra@gmail.com
License: Apache 2.0
Location: /usr/local/lib/python3.11/dist-packages
Requires: accelerate, datasets, rich, transformers
Required-by: 


In [1]:
!pip install -q transformers datasets accelerate peft trl bitsandbytes

In [2]:
from huggingface_hub import login
from google.colab import userdata

# Make sure you set HF_TOKEN in Colab secrets first
login(token=userdata.get("HF_TOKEN"))

In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_id = "meta-llama/Llama-3.2-3B"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)

KeyboardInterrupt: 

In [4]:
import json
from datasets import Dataset

with open("train_data.json") as f:
    raw = json.load(f)

formatted = []
for r in raw:
    msgs = r["messages"]
    system_msg = next((m["content"] for m in msgs if m["role"]=="system"),
                      "You are a marketing strategist specialized in creating market segmentation analyses for startups.")
    user_msg   = next(m["content"] for m in msgs if m["role"]=="user")
    bot_msg    = next(m["content"] for m in msgs if m["role"]=="assistant")
    text = (
        "<|start_of_turn|>system\n"  + system_msg + "<|end_of_turn|>\n"
      + "<|start_of_turn|>user\n"    + user_msg   + "<|end_of_turn|>\n"
      + "<|start_of_turn|>assistant\n" + bot_msg    + "<|end_of_turn|>"
    )
    formatted.append({
        "prompt": user_msg,
        "response": bot_msg,
        "text": text
    })

dataset = Dataset.from_list(formatted)
assert "<|start_of_turn|>" in dataset[0]["text"]

In [5]:
from transformers import DataCollatorForLanguageModeling

# 1) Add pad + turn markers
tokenizer.add_special_tokens({
    "pad_token": "[PAD]",
    "additional_special_tokens": ["<|start_of_turn|>", "<|end_of_turn|>"]
})
model.resize_token_embeddings(len(tokenizer))

# 2) Tokenize + set labels
def tokenize_fn(batch):
    tok = tokenizer(
        batch["text"],
        truncation=True,
        max_length=4096,
        padding="max_length",
        return_tensors="pt",
    )
    tok["labels"] = tok.input_ids.clone()
    return {k: v.tolist() for k,v in tok.items()}

tokenized = dataset.map(
    tokenize_fn,
    batched=True,
    remove_columns=dataset.column_names
)

# 3) Instantiate a collator for causal LM
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Map:   0%|          | 0/20 [00:00<?, ? examples/s]

In [6]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Prep the base for k-bit training
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
    task_type="CAUSAL_LM",
    lora_dropout=0.05,
    bias="none"
)

model = get_peft_model(model, lora_config)

In [11]:
from trl import SFTTrainer
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="llama3-finetuned",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    bf16=True,
    optim="paged_adamw_8bit",
    logging_steps=1,
    save_strategy="steps",
    save_steps=100,
    save_total_limit=2,
    max_steps=90,       # train longer for actual learning
    report_to="none"
)

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
    processing_class=tokenizer,
    data_collator=data_collator,
    peft_config=lora_config,
)

Truncating train dataset:   0%|          | 0/20 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [10]:
from trl import SFTTrainer
print(SFTTrainer.__module__)
print(SFTTrainer.__init__)


trl.trainer.sft_trainer
<function SFTTrainer.__init__ at 0x7cc3dbd4ade0>


In [12]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
1,1.009
2,0.9139
3,0.8464
4,0.7554
5,0.7597
6,0.5963
7,0.5896
8,0.4659
9,0.4118
10,0.4447




TrainOutput(global_step=90, training_loss=0.18588637221190665, metrics={'train_runtime': 4014.9712, 'train_samples_per_second': 0.09, 'train_steps_per_second': 0.022, 'total_flos': 6261507411148800.0, 'train_loss': 0.18588637221190665})

In [3]:
import torch
from transformers import AutoTokenizer

model_id = "meta-llama/Llama-3.2-3B"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

# Add special tokens back to the tokenizer
tokenizer.add_special_tokens({
    "pad_token": "[PAD]",
    "additional_special_tokens": ["<|start_of_turn|>", "<|end_of_turn|>"]
})


3

In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# Step 1: Check if merged model already exists
import os
if os.path.exists("llama3-finetuned-merged"):
    print("Merged model already exists, skipping merge step.")
else:
    print("Loading tokenizer first...")
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    # Add the same special tokens that were used during fine-tuning
    print("Adding special tokens...")
    tokenizer.add_special_tokens({
        "pad_token": "[PAD]",
        "additional_special_tokens": ["<|start_of_turn|>", "<|end_of_turn|>"]
    })

    print("Loading base model...")
    base_model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        torch_dtype=torch.bfloat16,
        low_cpu_mem_usage=True
    )

    # Resize token embeddings to match the fine-tuned model's vocabulary size
    print("Resizing token embeddings...")
    base_model.resize_token_embeddings(len(tokenizer))

    print("Loading adapter...")
    merged = PeftModel.from_pretrained(base_model, "llama3-lora-adapter")

    print("Merging models...")
    merged = merged.merge_and_unload()

    print("Saving merged model...")
    merged.save_pretrained("llama3-finetuned-merged", safe_serialization=True)
    tokenizer.save_pretrained("llama3-finetuned-merged")

    # Free up memory
    del base_model
    del merged
    torch.cuda.empty_cache()


Loading tokenizer first...
Adding special tokens...
Loading base model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Resizing token embeddings...
Loading adapter...
Merging models...
Saving merged model...


In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_dir = "llama3-finetuned-merged"
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_dir)

# Add system message to get better results
system_msg = "You are a marketing strategist specialized in creating market segmentation analyses for startups."
prompt = "Create a market segmentation table with 16 points for this startup idea: AI based leetcode platform"

formatted_prompt = f"<|start_of_turn|>system\n{system_msg}<|end_of_turn|>\n<|start_of_turn|>user\n{prompt}<|end_of_turn|>\n<|start_of_turn|>assistant\n"

inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

output = model.generate(
    **inputs,
    max_new_tokens=2000,  # Increased to allow for complete market segmentation
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.1,
    eos_token_id=tokenizer.eos_token_id
)

print(tokenizer.decode(output[0], skip_special_tokens=True))


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


system
You are a marketing strategist specialized in creating market segmentation analyses for startups.
user
Create a market segmentation table with 16 points for this startup idea: AI based leetcode platform
assistant
[
  {
    "Title/Description": "End User",
    "Educational Institutions": "Students, Faculty, Administrative Staff",
    "Corporate Training Providers": "Training Managers, Instructors",
    "Individuals Seeking Career Change": "Job Seekers",
    "Online Learning Platforms": "Teachers, Tutors"
  },
  {
    "Title/Description": "Task",
    "Educational Institutions": "Facilitating practice sessions based on user score and feedback",
    "Corporate Training Providers": "Tracking progress and providing personalized feedback",
    "Individuals Seeking Career Change": "Simulating real-world scenarios for skill development",
    "Online Learning Platforms": "Offering AI-tailored quizzes and assignments"
  },
  {
    "Title/Description": "Benefit",
    "Educational Institutio

In [5]:
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Save LoRA adapter
trainer.model.save_pretrained("llama3-lora-adapter")

# Load the tokenizer first
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Add the same special tokens used during training
tokenizer.add_special_tokens({
    "pad_token": "[PAD]",
    "additional_special_tokens": ["<|start_of_turn|>", "<|end_of_turn|>"]
})

# Now load the base model
base = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True
)

# Resize the token embeddings to match the tokenizer's vocab size
base.resize_token_embeddings(len(tokenizer))

# Now load the LoRA adapter and merge
merged = PeftModel.from_pretrained(base, "llama3-lora-adapter")
merged = merged.merge_and_unload()

# Save the merged model and tokenizer
merged.save_pretrained("llama3-finetuned-merged", safe_serialization=True)
tokenizer.save_pretrained("llama3-finetuned-merged")


NameError: name 'trainer' is not defined

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained(
    "llama3-finetuned-merged", device_map="auto", torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained("llama3-finetuned-merged")

system_msg = "You are a marketing strategist specialized in creating market segmentation analyses for startups."
user_prompt = "Create a market segmentation table with 16 points for this startup idea: AI based leetcode platform"

text_in = (
    "<|start_of_turn|>system\n"  + system_msg + "<|end_of_turn|>\n"
  + "<|start_of_turn|>user\n"    + user_prompt + "<|end_of_turn|>\n"
  + "<|start_of_turn|>assistant\n"
)
inputs = tokenizer(text_in, return_tensors="pt").to(model.device)

out = model.generate(
    **inputs,
    max_new_tokens=2000,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.1,
    eos_token_id=tokenizer.eos_token_id
)

# Print full chat + then clean assistant reply
full = tokenizer.decode(out[0], skip_special_tokens=True)
print(full)

# Extract just the assistant segment
if "<|start_of_turn|>assistant\n" in full:
    clean = full.split("<|start_of_turn|>assistant\n")[1]
    print("\n\n— Assistant:\n", clean)

In [6]:
# Install zip and zip the directories using shell commands
# (This may take a moment if zip isn't already installed)
!apt-get update -qq && apt-get install -qq zip

# Zip the fine-tuned Gemma3 LoRA adapters
!zip -r /mnt/data/fine_tuned_gemma3.zip fine_tuned_gemma3

# Zip the merged full model
!zip -r /mnt/data/fine_tuned_science_gemma3.zip fine_tuned_science_gemma3

# Zip the outputs (checkpoints and logs)
!zip -r /mnt/data/outputs.zip outputs

print("✅ Created zip archives:")
print(" - /mnt/data/fine_tuned_gemma3.zip")
print(" - /mnt/data/fine_tuned_science_gemma3.zip")
print(" - /mnt/data/outputs.zip")


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)

zip error: Nothing to do! (try: zip -r /mnt/data/fine_tuned_gemma3.zip . -i fine_tuned_gemma3)

zip error: Nothing to do! (try: zip -r /mnt/data/fine_tuned_science_gemma3.zip . -i fine_tuned_science_gemma3)

zip error: Nothing to do! (try: zip -r /mnt/data/outputs.zip . -i outputs)
✅ Created zip archives:
 - /mnt/data/fine_tuned_gemma3.zip
 - /mnt/data/fine_tuned_science_gemma3.zip
 - /mnt/data/outputs.zip


In [7]:
!zip -r llama3_all.zip llama3-finetuned llama3-finetuned-merged llama3-lora-adapter


  adding: llama3-finetuned/ (stored 0%)
  adding: llama3-finetuned/checkpoint-90/ (stored 0%)
  adding: llama3-finetuned/checkpoint-90/tokenizer_config.json (deflated 96%)
  adding: llama3-finetuned/checkpoint-90/rng_state.pth (deflated 25%)
  adding: llama3-finetuned/checkpoint-90/special_tokens_map.json (deflated 76%)
  adding: llama3-finetuned/checkpoint-90/adapter_config.json (deflated 56%)
  adding: llama3-finetuned/checkpoint-90/training_args.bin (deflated 51%)
  adding: llama3-finetuned/checkpoint-90/README.md (deflated 66%)
  adding: llama3-finetuned/checkpoint-90/adapter_model.safetensors (deflated 52%)
  adding: llama3-finetuned/checkpoint-90/scheduler.pt (deflated 56%)
  adding: llama3-finetuned/checkpoint-90/optimizer.pt (deflated 10%)
  adding: llama3-finetuned/checkpoint-90/trainer_state.json (deflated 83%)
  adding: llama3-finetuned/checkpoint-90/tokenizer.json (deflated 85%)
  adding: llama3-finetuned-merged/ (stored 0%)
  adding: llama3-finetuned-merged/tokenizer_confi