# Training

In [None]:
from huggingface_hub import login

login(token=hf_token)

## Import Libraries

In [None]:
import os
import json
import torch
import gc
from datasets import Dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)


## Configration

In [None]:
MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"
DATASET_PATH = "dataset2.jsonl"   
OUTPUT_DIR = "./llama-roman-urdu-lora"
gc.collect()
torch.cuda.empty_cache()

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map={"": 0},
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)


## Import and Tokenize Dataset

In [None]:
from datasets import Dataset

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # LLaMA often uses eos_token as pad

def preprocess_function(example):
    """
    Converts a chat-style JSON message into input_ids, attention_mask, and labels.
    Pads/truncates to max_length.
    """

    full_text = ""
    for msg in example["message"]:
        role = msg.get("role", "")
        content = msg.get("content", "")
        full_text += f"<{role}>: {content}\n"

    tokenized = tokenizer(
        full_text,
        truncation=True,
        max_length=512,
        padding="max_length",
        return_tensors=None
    )

    labels = [
        (t if t != tokenizer.pad_token_id else -100)
        for t in tokenized["input_ids"]
    ]

    return {
        "input_ids": tokenized["input_ids"],
        "attention_mask": tokenized["attention_mask"],
        "labels": labels
    }

data = []
if os.path.exists(DATASET_PATH):
    with open(DATASET_PATH, "r", encoding="utf-8") as f:
        for line in f:
            data.append(json.loads(line))
else:
    raise FileNotFoundError(f"Upload {DATASET_PATH} to Colab.")

raw_dataset = Dataset.from_list(data)

tokenized_dataset = raw_dataset.map(
    preprocess_function,
    remove_columns=raw_dataset.column_names
)

tokenized_dataset.set_format("torch")


## Training Config

In [None]:

model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=3,
    logging_steps=5,
    fp16=True,
    optim="paged_adamw_32bit",
    save_strategy="no", 
    report_to="none",
    remove_unused_columns=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)


## Start Train

In [None]:

print("Starting training...")
trainer.train()

## Model Inference

In [None]:
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print(f"Model saved to {OUTPUT_DIR}")

def test_model(user_query):
    messages = [
        {"role": "system", "content": "You are a Pakistani Boy. You speak Roman English. Explain a bit much but under 30 words."},
        {"role": "user", "content": user_query}
    ]
    prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=50, temperature=0.7, do_sample=True)

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("\nModel Response:\n", response.split("assistant")[-1].strip())

test_model("Kia chal raha hai?")