# Fine-tune Gemma-2-2b for Code Audit (Kaggle T4)

**Production Notebook** | Amit Rosen | Ashkelon, Israel | 2026

| Setting | Value |
|---------|-------|
| Model | `unsloth/gemma-2-2b-it-bnb-4bit` |
| GPU | Tesla T4 (16GB VRAM) |
| Train Time | ~25 min |
| Dataset | 100 audit examples |

**Setup:** Upload `audit_dataset.jsonl` to Kaggle Input

## 1. Install Dependencies

**FIX:** Upgrade torch 2.4.0 → 2.5.1 (required for `torch._inductor.config`)

In [None]:
# KAGGLE UNSLOTH INSTALL - TORCH 2.5.1 FIX
# Fixes: AttributeError: module 'torch._inductor' has no attribute 'config'

import subprocess
import sys

def pip_install(packages, msg):
    """Silent pip install with status"""
    print(f"{msg}")
    cmd = f"pip install -q {packages}"
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    if result.returncode != 0 and 'error' in result.stderr.lower():
        print(f"  [WARN] {result.stderr[-300:]}")

# STEP 1: Upgrade PyTorch to 2.5.1 (has torch._inductor.config)
pip_install(
    "torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu121",
    "[1/5] Upgrading PyTorch 2.4.0 → 2.5.1 (fixes _inductor.config)..."
)

# STEP 2: Install numpy 1.26.4 (compatibility)
pip_install("numpy==1.26.4", "[2/5] Installing numpy 1.26.4...")

# STEP 3: Install Unsloth + unsloth_zoo
pip_install(
    '"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"',
    "[3/5] Installing Unsloth..."
)
pip_install("unsloth_zoo", "     + unsloth_zoo...")

# STEP 4: Install training dependencies
pip_install(
    "trl==0.9.6 peft accelerate bitsandbytes datasets",
    "[4/5] Installing training dependencies..."
)

# STEP 5: Install xformers (compatible with torch 2.5.1)
pip_install("xformers==0.0.28.post3", "[5/5] Installing xformers...")

print("\n" + "="*50)
print("[OK] Installation complete!")
print("="*50)

# Verify torch upgrade
import torch
print(f"\n[OK] torch: {torch.__version__}")
print(f"[OK] CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"[OK] GPU: {torch.cuda.get_device_name(0)}")
    print(f"[OK] VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

# Verify _inductor.config exists
try:
    import torch._inductor.config
    print("[OK] torch._inductor.config: Available")
except AttributeError:
    print("[FAIL] torch._inductor.config: Missing - restart kernel!")

## 2. Load Model

Loading `gemma-2-2b-it` in 4-bit quantization (~5GB VRAM)

In [None]:
import torch
from unsloth import FastLanguageModel

# Configuration
MODEL_NAME = "unsloth/gemma-2-2b-it-bnb-4bit"
MAX_SEQ_LENGTH = 2048
DTYPE = None  # Auto-detect (float16 on T4)
LOAD_IN_4BIT = True

print(f"[INFO] Loading {MODEL_NAME}...")
print(f"[INFO] Sequence length: {MAX_SEQ_LENGTH}")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=DTYPE,
    load_in_4bit=LOAD_IN_4BIT,
)

print(f"\n[OK] Model loaded successfully!")
print(f"[OK] VRAM used: {torch.cuda.memory_allocated() / 1e9:.2f} GB")

## 3. Add LoRA Adapters

LoRA config: r=16, targeting all attention + MLP layers

In [None]:
# LoRA Configuration
LORA_R = 16
LORA_ALPHA = 16
LORA_DROPOUT = 0
TARGET_MODULES = [
    "q_proj", "k_proj", "v_proj", "o_proj",  # Attention
    "gate_proj", "up_proj", "down_proj"       # MLP
]

print(f"[INFO] Adding LoRA adapters (r={LORA_R})...")

model = FastLanguageModel.get_peft_model(
    model,
    r=LORA_R,
    target_modules=TARGET_MODULES,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)

print("\n[OK] LoRA adapters added!")
model.print_trainable_parameters()

## 4. Load Audit Dataset

Loading `audit_dataset.jsonl` with Alpaca prompt format

In [None]:
from datasets import load_dataset
import glob

# Find dataset in Kaggle input folders
DATASET_PATHS = [
    "/kaggle/input/audit-dataset/audit_dataset.jsonl",
    "/kaggle/input/*/audit_dataset.jsonl",
    "/kaggle/input/*/*.jsonl",
]

dataset_path = None
for pattern in DATASET_PATHS:
    matches = glob.glob(pattern)
    if matches:
        dataset_path = matches[0]
        break

if not dataset_path:
    raise FileNotFoundError(
        "Dataset not found! Upload audit_dataset.jsonl to Kaggle Input."
    )

print(f"[INFO] Loading dataset from: {dataset_path}")
dataset = load_dataset("json", data_files=dataset_path, split="train")

# Alpaca prompt template
ALPACA_PROMPT = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token

def format_prompt(examples):
    texts = []
    for instruction, output in zip(examples["instruction"], examples["output"]):
        text = ALPACA_PROMPT.format(instruction, output) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

print("[INFO] Formatting dataset...")
dataset = dataset.map(format_prompt, batched=True)

print(f"\n[OK] Loaded {len(dataset)} examples")
print(f"[SAMPLE] {dataset[0]['text'][:200]}...")

## 5. Train Model

SFTTrainer with Unsloth optimization (~25 min on T4)

In [None]:
from transformers import TrainingArguments
from trl import SFTTrainer

# Training configuration (optimized for T4 GPU)
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        output_dir="./outputs",
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=100,
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        report_to="none",
    ),
)

print("[INFO] Starting training...")
print("[INFO] Expected time: ~25 minutes")
print("="*50)

trainer_stats = trainer.train()

print("\n" + "="*50)
print("[OK] Training complete!")
print(f"[OK] Time: {trainer_stats.metrics['train_runtime']:.1f}s")
print(f"[OK] Samples/sec: {trainer_stats.metrics['train_samples_per_second']:.2f}")

## 6. Save Model

Save locally + optionally push to HuggingFace Hub

In [None]:
# Test inference first
print("[INFO] Testing inference...")
FastLanguageModel.for_inference(model)

test_prompt = "Analyze test coverage: 330 files found, 5 executable, 0% coverage"
inputs = tokenizer(
    [ALPACA_PROMPT.format(test_prompt, "")],
    return_tensors="pt"
).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=256, use_cache=True)
result = tokenizer.batch_decode(outputs)[0]

print("\n[INFERENCE TEST]")
print(result)

# Save locally
print("\n" + "="*50)
print("[INFO] Saving model...")
model.save_pretrained("audit-gemma-v1")
tokenizer.save_pretrained("audit-gemma-v1")
print("[OK] Model saved to 'audit-gemma-v1/'")

# Optional: Push to HuggingFace
# Uncomment to push (requires HF_TOKEN in Kaggle secrets)
'''
from huggingface_hub import login
import os

HF_TOKEN = os.environ.get("HF_TOKEN") or "YOUR_TOKEN_HERE"
login(token=HF_TOKEN)

model.push_to_hub("amitrosen/audit-gemma-v1", token=HF_TOKEN)
tokenizer.push_to_hub("amitrosen/audit-gemma-v1", token=HF_TOKEN)
print("[OK] Pushed to HuggingFace!")
'''

print("\n" + "="*50)
print("[DONE] Fine-tuning complete!")
print("Download 'audit-gemma-v1/' from Kaggle Output")
print("="*50)