# LAMP Fine-Tuning with Unsloth

Fine-tune small LLMs (Llama 3.2 3B, Gemma 3 4B, Phi-4 Mini) on the LAMP dataset
to generate valid JSON light programs from natural language.

**Requirements:** Google Colab with GPU runtime (T4 or A100)

In [None]:
# Step 0: Install dependencies
!pip install unsloth
!pip install --no-deps trl peft accelerate bitsandbytes

In [None]:
# Step 1: Clone the repo and load data
!git clone https://github.com/ArianMoeini/lampAI.git
%cd lampAI/finetuning

In [None]:
# Step 2: Choose your model
# Options: "llama", "gemma", "phi"
MODEL_CHOICE = "llama"  # <-- CHANGE THIS

MODEL_CONFIGS = {
    "llama": {
        "model_name": "unsloth/Llama-3.2-3B-Instruct",
        "output_name": "lamp-llama-3b",
        "max_seq_length": 4096,
    },
    "gemma": {
        "model_name": "unsloth/gemma-3-4b-it",
        "output_name": "lamp-gemma-4b",
        "max_seq_length": 4096,
    },
    "phi": {
        "model_name": "unsloth/Phi-4-mini-instruct",
        "output_name": "lamp-phi-mini",
        "max_seq_length": 4096,
    },
}

config = MODEL_CONFIGS[MODEL_CHOICE]
print(f"Selected: {config['model_name']}")

In [None]:
# Step 3: Load model with 4-bit quantization
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=config["model_name"],
    max_seq_length=config["max_seq_length"],
    dtype=None,
    load_in_4bit=True,
)

print(f"Model loaded: {config['model_name']}")

In [None]:
# Step 4: Configure LoRA adapters
model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
    lora_alpha=64,
    lora_dropout=0.05,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=42,
)

trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
print(f"Trainable: {trainable:,} / {total:,} ({trainable/total*100:.2f}%)")

In [None]:
# Step 5: Load and format dataset
import json
from datasets import Dataset

def read_jsonl(path):
    items = []
    with open(path) as f:
        for line in f:
            items.append(json.loads(line.strip()))
    return items

train_data = Dataset.from_list(read_jsonl("data/train.jsonl"))
val_data = Dataset.from_list(read_jsonl("data/val.jsonl"))

print(f"Train: {len(train_data)}, Val: {len(val_data)}")

# Apply chat template
def format_convos(examples):
    texts = []
    for convos in examples["conversations"]:
        text = tokenizer.apply_chat_template(
            convos, tokenize=False, add_generation_prompt=False
        )
        texts.append(text)
    return {"text": texts}

train_data = train_data.map(format_convos, batched=True)
val_data = val_data.map(format_convos, batched=True)

# Preview one example
print("\n--- Example ---")
print(train_data[0]["text"][:500])

In [None]:
# Step 6: Train!
from trl import SFTTrainer, SFTConfig

training_args = SFTConfig(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    warmup_ratio=0.05,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    seed=42,
    output_dir=f"outputs/{config['output_name']}",
    save_strategy="epoch",
    eval_strategy="epoch",
    report_to="none",
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_data,
    eval_dataset=val_data,
    args=training_args,
    dataset_text_field="text",
    max_seq_length=config["max_seq_length"],
    packing=False,
)

trainer.train()

In [None]:
# Step 7: Quick test before exporting
FastLanguageModel.for_inference(model)

test_prompts = [
    "warm and cozy",
    "show a heart",
    "countdown from 5",
    "party mode",
    "simulate a thunderstorm",
]

system_prompt = train_data[0]["conversations"][0]["content"]

for prompt in test_prompts:
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": f"Create a light program for this request.\n\nRequest: {prompt}\n\nRespond with ONLY a JSON program. No text."},
    ]
    inputs = tokenizer.apply_chat_template(
        messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
    ).to("cuda")
    
    outputs = model.generate(
        input_ids=inputs, max_new_tokens=2048,
        temperature=0.3, do_sample=True,
    )
    response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
    
    # Check if valid JSON
    try:
        parsed = json.loads(response)
        status = "VALID JSON"
    except:
        status = "INVALID"
    
    print(f"\n[{status}] {prompt}")
    print(f"  {response[:200]}")

In [None]:
# Step 8: Export to GGUF for Ollama
import os
os.makedirs("exports", exist_ok=True)

# Export Q4_K_M (recommended for Pi 5)
model.save_pretrained_gguf(
    f"exports/{config['output_name']}",
    tokenizer,
    quantization_method="q4_k_m",
)
print(f"GGUF Q4_K_M saved to exports/{config['output_name']}")

# Also export Q8_0 as backup
model.save_pretrained_gguf(
    f"exports/{config['output_name']}-q8",
    tokenizer,
    quantization_method="q8_0",
)
print(f"GGUF Q8_0 saved to exports/{config['output_name']}-q8")

In [None]:
# Step 9: Create Ollama Modelfile
modelfile_content = f"""FROM ./{config['output_name']}-unsloth.Q4_K_M.gguf
PARAMETER temperature 0.3
PARAMETER num_predict 4096
PARAMETER stop <|eot_id|>
PARAMETER stop <end_of_turn>
"""

with open(f"exports/Modelfile.{config['output_name']}", "w") as f:
    f.write(modelfile_content)

print(f"Modelfile saved!")
print(f"\nTo deploy on your Pi 5:")
print(f"  1. Copy the GGUF file and Modelfile to your Pi")
print(f"  2. Run: ollama create {config['output_name']} -f Modelfile.{config['output_name']}")
print(f"  3. Test: ollama run {config['output_name']} 'warm and cozy'")

In [None]:
# Step 10: Download the GGUF files
# On Colab, this will trigger a browser download
from google.colab import files

import glob
gguf_files = glob.glob("exports/**/*.gguf", recursive=True)
for f in gguf_files:
    print(f"Downloading: {f} ({os.path.getsize(f) / 1e9:.1f} GB)")
    files.download(f)

# Also download the Modelfile
modelfiles = glob.glob("exports/Modelfile.*")
for f in modelfiles:
    files.download(f)