# Tax-LLaMA-Ind: Online Training Notebook

This notebook allows you to fine-tune LLaMA 3.2 8B on the Indian Income Tax dataset using free/cheap cloud GPUs (Google Colab or Kaggle).

## Prerequisites
1.  **Hugging Face Token**: You need a token with write access. [Get it here](https://huggingface.co/settings/tokens).
2.  **Access to LLaMA 3.2**: Request access to `meta-llama/Llama-3.2-8B-Instruct` on Hugging Face.
3.  **GPU Runtime**: Ensure you are connected to a GPU runtime (T4 is minimum, A100 is recommended).

In [None]:
# Install Dependencies
!pip install -q -U torch transformers peft bitsandbytes trl datasets scipy

In [None]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    logging,
)
from peft import LoraConfig
from trl import SFTTrainer
from datasets import load_dataset
from huggingface_hub import login

# Log in to Hugging Face (Required for LLaMA)
# Replace 'YOUR_HF_TOKEN' with your actual token or use the interactive login
login(token="YOUR_HF_TOKEN_HERE")

In [None]:
# Configuration
MODEL_NAME = "meta-llama/Llama-3.2-8B-Instruct"
NEW_MODEL_NAME = "Tax-LLaMA-Ind-v1"

# QLoRA Config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

# Load Base Model
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto"
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
# Load Dataset
# Upload your 'instruction_tuning.jsonl' to the Colab/Kaggle environment first!
dataset = load_dataset("json", data_files="instruction_tuning.jsonl", split="train")

# Format dataset for LLaMA 3 Instruct format
def format_instruction(sample):
    return f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{sample['instruction']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{sample['output']}<|eot_id|>"

dataset = dataset.map(lambda x: {"text": format_instruction(x)})

In [None]:
# LoRA Configuration
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

# Training Arguments
training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

# Trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)

In [None]:
# Start Training
trainer.train()

In [None]:
# Save Model
trainer.model.save_pretrained(NEW_MODEL_NAME)