# VAZHI Gemma-2B Tamil Fine-tuning (Government Module)

**Goal:** Fine-tune the working Gemma-2B Tamil model with government scheme data

**Key Differences from Previous Attempts:**
- Starting from a model that ALREADY produces coherent Tamil
- Using bf16/float16 training (NOT 4-bit!)
- Very conservative LoRA settings
- Small focused dataset (452 items)

**If this works:** Expand to other VAZHI modules

In [None]:
# Install dependencies
!pip install -q transformers datasets peft accelerate bitsandbytes trl huggingface_hub

In [None]:
import torch
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

## Step 1: Load Base Model

Load the Gemma-2B Tamil model that we know works.

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

MODEL_ID = "abhinand/gemma-2b-it-tamil-v0.1-alpha"

print(f"Loading {MODEL_ID}...")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Check if we can fit in float16 (Gemma-2B ~5GB in fp16)
# T4 has 16GB, so this should fit with room for gradients
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

print(f"Model loaded!")
print(f"Parameters: {model.num_parameters() / 1e9:.2f}B")

## Step 2: Verify Base Model Works

In [None]:
def test_model(model, tokenizer, prompt):
    """Quick test of model output"""
    formatted = f"### Instruction:\n{prompt}\n\n### Response:\n"
    inputs = tokenizer(formatted, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("### Response:")[-1].strip()

# Test before fine-tuning
print("=" * 50)
print("BEFORE FINE-TUNING:")
print("=" * 50)

test_prompts = [
    "PM-KISAN திட்டம் என்ன?",
    "அரசு மருத்துவமனையில் free treatment கிடைக்குமா?",
    "தமிழ்நாட்டின் தலைநகரம் எது?"
]

for prompt in test_prompts:
    print(f"\nQ: {prompt}")
    print(f"A: {test_model(model, tokenizer, prompt)}")

## Step 3: Load Government Training Data

In [None]:
import json
from datasets import Dataset

# Download govt data from GitHub or load locally
!wget -q https://raw.githubusercontent.com/CryptoYogiLLC/vazhi/main/data/v04/sources/vazhi_arasu/vazhi_arasu_all.json -O govt_data.json

with open("govt_data.json") as f:
    govt_data = json.load(f)

print(f"Loaded {len(govt_data)} government training samples")
print(f"\nSample:")
print(f"Q: {govt_data[0]['instruction'][:100]}...")
print(f"A: {govt_data[0]['output'][:200]}...")

In [None]:
def format_for_training(example):
    """Format as instruction-response pair"""
    text = f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
    return {"text": text}

# Create dataset
formatted_data = [format_for_training(item) for item in govt_data]
dataset = Dataset.from_list(formatted_data)

# Split into train/val
split = dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = split["train"]
val_dataset = split["test"]

print(f"Train: {len(train_dataset)} samples")
print(f"Val: {len(val_dataset)} samples")

## Step 4: Configure LoRA (Very Conservative)

In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# VERY conservative LoRA config
lora_config = LoraConfig(
    r=4,                    # Very low rank (was 8, 16, 32 before)
    lora_alpha=8,           # Low alpha
    target_modules=["q_proj", "v_proj"],  # Only 2 modules
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

# Apply LoRA
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

## Step 5: Training Configuration

In [None]:
from trl import SFTTrainer, SFTConfig

training_args = SFTConfig(
    output_dir="./vazhi-gemma-govt",
    
    # Very conservative learning
    learning_rate=5e-6,         # Very low (was 1e-5, 2e-4 before)
    num_train_epochs=1,         # Single epoch only
    
    # Batch settings
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,
    
    # Stability
    max_grad_norm=0.3,          # Gradient clipping
    warmup_ratio=0.1,
    
    # Precision - NOT 4-bit!
    bf16=True,
    
    # Memory optimization
    gradient_checkpointing=True,
    optim="adamw_torch",
    
    # Logging
    logging_steps=10,
    eval_strategy="steps",
    eval_steps=50,
    save_steps=100,
    
    # Data
    max_seq_length=512,
    dataset_text_field="text",
)

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
)

print("Trainer configured!")
print(f"Total steps: ~{len(train_dataset) // 8}")

## Step 6: Train!

In [None]:
print("Starting training...")
print("Watch for loss stability - should decrease slowly without spikes")

trainer.train()

## Step 7: Test Fine-tuned Model

In [None]:
print("=" * 50)
print("AFTER FINE-TUNING:")
print("=" * 50)

# Same test prompts
for prompt in test_prompts:
    print(f"\nQ: {prompt}")
    print(f"A: {test_model(model, tokenizer, prompt)}")

# Additional govt-specific tests
govt_tests = [
    "Cyber fraud-க்கு complaint எப்படி போடுவது?",
    "RBI விதிகளின்படி வங்கி பொறுப்பு என்ன?",
]

print("\n" + "=" * 50)
print("GOVERNMENT-SPECIFIC TESTS:")
print("=" * 50)

for prompt in govt_tests:
    print(f"\nQ: {prompt}")
    print(f"A: {test_model(model, tokenizer, prompt)}")

## Step 8: Save & Merge Model

In [None]:
# Save LoRA adapter
model.save_pretrained("./vazhi-gemma-govt-lora")
tokenizer.save_pretrained("./vazhi-gemma-govt-lora")

print("LoRA adapter saved!")

In [None]:
# Merge LoRA into base model for deployment
merged_model = model.merge_and_unload()
merged_model.save_pretrained("./vazhi-gemma-govt-merged")
tokenizer.save_pretrained("./vazhi-gemma-govt-merged")

print("Merged model saved!")

## Step 9: Convert to GGUF

In [None]:
# Clone llama.cpp if needed
!git clone --depth 1 https://github.com/ggerganov/llama.cpp.git 2>/dev/null || echo "Already cloned"

# Build
!cd llama.cpp && mkdir -p build && cd build && cmake .. -DGGML_CUDA=OFF && cmake --build . --config Release -j4 -- llama-quantize

print("llama.cpp built!")

In [None]:
import subprocess
import os

# Convert to GGUF F16
print("Converting to GGUF F16...")
subprocess.run([
    "python", "llama.cpp/convert_hf_to_gguf.py",
    "./vazhi-gemma-govt-merged",
    "--outfile", "./vazhi-gemma-govt-f16.gguf",
    "--outtype", "f16"
], check=True)

# Quantize to Q4_K_M
print("Quantizing to Q4_K_M...")
subprocess.run([
    "./llama.cpp/build/bin/llama-quantize",
    "./vazhi-gemma-govt-f16.gguf",
    "./vazhi-gemma-govt-q4_k_m.gguf",
    "Q4_K_M"
], check=True)

# Check sizes
for f in ["vazhi-gemma-govt-f16.gguf", "vazhi-gemma-govt-q4_k_m.gguf"]:
    if os.path.exists(f):
        size = os.path.getsize(f) / 1e9
        print(f"{f}: {size:.2f} GB")

## Step 10: Test GGUF Quality

In [None]:
!pip install -q llama-cpp-python

In [None]:
from llama_cpp import Llama

llm = Llama(
    model_path="./vazhi-gemma-govt-q4_k_m.gguf",
    n_ctx=512,
    n_threads=4,
    verbose=False
)

print("=" * 50)
print("GGUF Q4_K_M TEST:")
print("=" * 50)

all_tests = test_prompts + govt_tests

for prompt in all_tests:
    print(f"\nQ: {prompt}")
    response = llm(
        f"### Instruction:\n{prompt}\n\n### Response:\n",
        max_tokens=150,
        stop=["###", "\n\n"],
        echo=False
    )
    print(f"A: {response['choices'][0]['text'].strip()}")

## Results Summary

Compare before/after fine-tuning:

| Test | Before | After |
|------|--------|-------|
| PM-KISAN | ? | ? |
| Free treatment | Wrong (said No) | ? |
| Cyber fraud | ? | ? |

If GGUF produces coherent Tamil with improved govt knowledge, the fine-tuning worked!