# 03 - First Fine-Tuning: Mini QLoRA Training

This notebook performs a mini fine-tuning run with sample data to validate the full pipeline:
- Load a quantized model and apply LoRA adapters
- Train for 1-2 epochs on sample data
- Monitor training loss
- Compare model output before and after fine-tuning

In [None]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import sys
sys.path.insert(0, "..")
import torch
import json
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
from datasets import Dataset
from src.data_utils import load_jsonl, get_tokenizer, load_yaml_config
from src.eval_metrics import parse_tool_calls_from_output, measure_vram_usage

## Load Model and Apply LoRA

In [None]:
MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, quantization_config=bnb_config, device_map="auto")
model = prepare_model_for_kbit_training(model)
lora_config = LoraConfig(r=8, lora_alpha=16, lora_dropout=0.05, target_modules="all-linear", bias="none", task_type="CAUSAL_LM")
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
tokenizer = get_tokenizer(MODEL_NAME)

## Before Fine-Tuning: Baseline Inference

In [None]:
tools = json.load(open("../data/samples/gennx_tool_schemas_tier1.json"))
test_messages = [
    {"role": "system", "content": "You are a structural engineering assistant for GEN NX."},
    {"role": "user", "content": "절점 1번을 원점에 추가해줘"},
]
text = tokenizer.apply_chat_template(test_messages, tools=tools, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt").to(model.device)
with torch.no_grad():
    outputs = model.generate(**inputs, max_new_tokens=512, do_sample=False)
baseline_response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=False)
print("=== Baseline Response ===")
print(baseline_response)

## Prepare Training Data

In [None]:
samples = load_jsonl("../data/samples/gennx_tool_calling_samples.jsonl")
train_dataset = Dataset.from_list(samples)
print(f"Training samples: {len(train_dataset)}")

## Train (2 Epochs)

In [None]:
training_args = TrainingArguments(
    output_dir="../models/checkpoints/notebook_test",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=2,
    fp16=True, bf16=False,
    optim="paged_adamw_8bit",
    logging_steps=1,
    save_strategy="epoch",
    dataloader_pin_memory=False,
    dataloader_num_workers=0,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    report_to="none",
)
trainer = SFTTrainer(
    model=model, tokenizer=tokenizer, args=training_args,
    train_dataset=train_dataset,
    max_seq_length=2048, packing=False,
)
result = trainer.train()
print(f"Training loss: {result.training_loss:.4f}")

## Loss Curve

In [None]:
import matplotlib.pyplot as plt
logs = [log for log in trainer.state.log_history if "loss" in log]
steps = [log["step"] for log in logs]
losses = [log["loss"] for log in logs]
plt.figure(figsize=(8, 4))
plt.plot(steps, losses, marker="o")
plt.xlabel("Step")
plt.ylabel("Loss")
plt.title("Training Loss")
plt.grid(True)
plt.tight_layout()
plt.show()

## After Fine-Tuning: Inference Comparison

In [None]:
inputs = tokenizer(text, return_tensors="pt").to(model.device)
with torch.no_grad():
    outputs = model.generate(**inputs, max_new_tokens=512, do_sample=False)
finetuned_response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=False)
print("=== Fine-Tuned Response ===")
print(finetuned_response)
print("\n=== Comparison ===")
print(f"Baseline tool calls: {len(parse_tool_calls_from_output(baseline_response))}")
print(f"Fine-tuned tool calls: {len(parse_tool_calls_from_output(finetuned_response))}")

## VRAM Usage

In [None]:
vram = measure_vram_usage()
print(f"VRAM: {vram['used_mb']:.0f} MB / {vram['total_mb']:.0f} MB ({vram['percent']:.1f}%)")