# Fine-tune Gemma-2-2b for Code Audit (Kaggle T4)

**Production Notebook** | 2026 | ~25 min training

**Workflow:**
1. Run Cell 1 (Install) → RESTART KERNEL
2. Run Cells 2-7 (skip Cell 1)

## 1. Install (then RESTART KERNEL!)

In [None]:
# KAGGLE UNSLOTH E2E FIX - 2026
# Fixes: torch._inductor.config, numpy binary, torch.int1 (torchao)

import subprocess

def run(cmd, msg=""):
    if msg: print(msg)
    subprocess.run(cmd, shell=True, capture_output=True)

print("="*60)
print("KAGGLE UNSLOTH COMPLETE INSTALL")
print("="*60)

# Step 1: Uninstall conflicting packages
run("pip uninstall -y torchao -q", "[1/6] Removing torchao (causes torch.int1 error)...")

# Step 2: Upgrade PyTorch
run(
    "pip install -q torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu121",
    "[2/6] Installing PyTorch 2.5.1..."
)

# Step 3: Force reinstall numpy
run("pip install -q --force-reinstall numpy", "[3/6] Reinstalling numpy...")

# Step 4: Install Unsloth
run(
    'pip install -q "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"',
    "[4/6] Installing Unsloth..."
)
run("pip install -q unsloth_zoo", "     + unsloth_zoo...")

# Step 5: Training deps (pin transformers to avoid torchao)
run(
    "pip install -q trl==0.9.6 peft accelerate bitsandbytes datasets transformers==4.44.2",
    "[5/6] Installing training dependencies..."
)

# Step 6: xformers
run("pip install -q xformers==0.0.28.post3", "[6/6] Installing xformers...")

# Verify torchao is gone
result = subprocess.run("pip show torchao", shell=True, capture_output=True)
torchao_status = "REMOVED" if result.returncode != 0 else "WARNING: Still installed!"

print("\n" + "="*60)
print("[OK] Installation complete!")
print(f"[OK] torchao: {torchao_status}")
print("="*60)
print("\n" + "*"*60)
print("***  RESTART KERNEL NOW!  ***")
print("***  Then run Cell 2 (skip this cell)  ***")
print("*"*60)

## 2. Verify (run after kernel restart)

In [None]:
# MUST import unsloth FIRST (before transformers)
import unsloth

import torch
import numpy as np

print(f"[OK] torch: {torch.__version__}")
print(f"[OK] numpy: {np.__version__}")
print(f"[OK] CUDA: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"[OK] GPU: {torch.cuda.get_device_name(0)}")
    print(f"[OK] VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

from unsloth import FastLanguageModel
print("\n[OK] Unsloth imported successfully!")

## 3. Load Model

In [None]:
MODEL_NAME = "unsloth/gemma-2-2b-it-bnb-4bit"
MAX_SEQ_LENGTH = 2048

print(f"[INFO] Loading {MODEL_NAME}...")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=None,
    load_in_4bit=True,
)

print(f"\n[OK] Model loaded!")
print(f"[OK] VRAM: {torch.cuda.memory_allocated() / 1e9:.2f} GB")

## 4. Add LoRA Adapters

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)

print("[OK] LoRA added!")
model.print_trainable_parameters()

## 5. Load Dataset

In [None]:
from datasets import load_dataset
import glob

# Find dataset
for p in ["/kaggle/input/audit-dataset/audit_dataset.jsonl",
          "/kaggle/input/*/audit_dataset.jsonl",
          "/kaggle/input/*/*.jsonl"]:
    m = glob.glob(p)
    if m:
        dataset_path = m[0]
        break
else:
    raise FileNotFoundError("Upload audit_dataset.jsonl!")

print(f"[INFO] Loading: {dataset_path}")
dataset = load_dataset("json", data_files=dataset_path, split="train")

ALPACA = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{}

### Response:
{}"""

EOS = tokenizer.eos_token

def fmt(ex):
    return {"text": [ALPACA.format(i, o) + EOS for i, o in zip(ex["instruction"], ex["output"])]}

dataset = dataset.map(fmt, batched=True)
print(f"[OK] {len(dataset)} examples")

## 6. Train (~25 min)

In [None]:
from transformers import TrainingArguments
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        output_dir="./outputs",
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=100,
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        report_to="none",
    ),
)

print("[INFO] Training...")
stats = trainer.train()
print(f"\n[OK] Done in {stats.metrics['train_runtime']:.0f}s")

## 7. Save Model

In [None]:
# Test
FastLanguageModel.for_inference(model)
test = "Analyze test coverage: 330 files, 5 executable, 0% coverage"
inputs = tokenizer([ALPACA.format(test, "")], return_tensors="pt").to("cuda")
out = model.generate(**inputs, max_new_tokens=256, use_cache=True)
print("[TEST]\n", tokenizer.decode(out[0]))

# Save
model.save_pretrained("audit-gemma-v1")
tokenizer.save_pretrained("audit-gemma-v1")
print("\n[OK] Saved to audit-gemma-v1/")