# LLaMA 3.1-8B Bengali Empathetic Fine-Tuning 




In [None]:
# ============================ CELL 1 ============================
# Install dependencies (Run once, then restart kernel)

!pip install -U transformers>=4.44.0
!pip install -U accelerate>=0.27.0
!pip install -U peft>=0.7.0
!pip install -U bitsandbytes>=0.43.0
!pip install sentencepiece tqdm evaluate sacrebleu rouge-score

print("\n" + "="*80)
print("INSTALL COMPLETE")
print(" RESTART KERNEL NOW: Kernel ‚Üí Restart & Clear Output")
print("   Then run from CELL 2 (skip CELL 1)")
print("="*80)



‚úÖ INSTALL COMPLETE
‚ö†Ô∏è  RESTART KERNEL NOW: Kernel ‚Üí Restart & Clear Output
   Then run from CELL 2 (skip CELL 1)


In [None]:
# ============================ CELL 2 ============================
# Imports and environment checks

import os
import json
import sqlite3
import warnings
from dataclasses import dataclass
from datetime import datetime
from typing import List, Dict, Tuple, Optional

import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

warnings.filterwarnings("ignore")

import transformers
print(f"Transformers version: {transformers.__version__}")

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    get_linear_schedule_with_warmup,
    BitsAndBytesConfig,
)

import accelerate
print(f"Accelerate version: {accelerate.__version__}")

# Check BitsAndBytes
QUANTIZATION_AVAILABLE = False
try:
    import bitsandbytes as bnb
    QUANTIZATION_AVAILABLE = True
    print("BitsAndBytes available")
except Exception as e:
    print(f"BitsAndBytes NOT available: {e}")

from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training

import evaluate
bleu_metric = evaluate.load("sacrebleu")
rouge_metric = evaluate.load("rouge")

print(f"\nPyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f" GPU: {torch.cuda.get_device_name(0)}")
    total_mem = torch.cuda.get_device_properties(0).total_memory / 1024**3
    print(f" GPU Memory: {total_mem:.1f} GB")


‚úÖ Transformers version: 4.57.3
‚úÖ Accelerate version: 1.12.0
‚úÖ BitsAndBytes available


2026-01-05 03:20:53.702048: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1767583253.723387     447 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1767583253.730100     447 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1767583253.747275     447 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767583253.747295     447 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767583253.747297     447 computation_placer.cc:177] computation placer alr


‚úÖ PyTorch: 2.8.0+cu126
‚úÖ CUDA available: True
‚úÖ GPU: Tesla T4
‚úÖ GPU Memory: 14.7 GB


In [3]:
# ============================ CELL 3 ============================
# HuggingFace Authentication

from huggingface_hub import login

hf_token = None
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    hf_token = user_secrets.get_secret("HF_TOKEN")
    print("‚úÖ HF token loaded from Kaggle Secrets")
except Exception as e:
    print(f"‚ö†Ô∏è Kaggle Secrets error: {e}")

if not hf_token:
    hf_token = os.environ.get("HF_TOKEN")
    if hf_token:
        print("‚úÖ HF token from env")

if hf_token:
    login(token=hf_token, new_session=False)
    print("‚úÖ Logged in to HuggingFace")
else:
    raise ValueError("HF_TOKEN not found. Add it to Kaggle Secrets.")


‚úÖ HF token loaded from Kaggle Secrets
‚úÖ Logged in to HuggingFace


In [4]:
# ============================ CELL 4 ============================
# ‚ö° FAST Configuration - Will finish in ~2-3 hours

class Config:
    MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
    
    # ‚ö° SPEED OPTIMIZED SETTINGS
    MAX_LENGTH = 256              # ‚ö° Reduced from 2048
    NUM_EPOCHS = 1                # ‚ö° Reduced from 3
    MAX_SAMPLES = 3000            # ‚ö° Limit data samples
    
    # LoRA settings (smaller for speed)
    LORA_R = 8                    # ‚ö° Reduced from 16
    LORA_ALPHA = 16               # ‚ö° Reduced from 32
    LORA_DROPOUT = 0.05
    TARGET_MODULES = ["q_proj", "v_proj"]  # ‚ö° Reduced from 4 modules

    BATCH_SIZE = 1
    GRADIENT_ACCUMULATION_STEPS = 4  # ‚ö° Reduced from 8
    LEARNING_RATE = 3e-4          # ‚ö° Slightly higher for faster learning
    WARMUP_STEPS = 10
    WEIGHT_DECAY = 0.01
    MAX_GRAD_NORM = 1.0

    TRAIN_SPLIT = 0.9             # 90% train, 10% val
    SEED = 42

    DATASET_PATH_PRIMARY = "/kaggle/input/bengalitext/BengaliEmpatheticConversationsCorpus.csv"
    DATASET_PATH_FALLBACK = "/kaggle/input/bengalitext/BengaliEmpatheticConversationsCorpus .csv"

    OUTPUT_DIR = "./outputs"
    DB_PATH = "./llama_logs.db"
    HUMAN_EVAL_CSV = "./human_eval_sheet.csv"

config = Config()
torch.manual_seed(config.SEED)
np.random.seed(config.SEED)

print("‚ö° FAST CONFIG READY")
print(f"   Model: {config.MODEL_NAME}")
print(f"   Max samples: {config.MAX_SAMPLES}")
print(f"   Max length: {config.MAX_LENGTH}")
print(f"   Epochs: {config.NUM_EPOCHS}")
print(f"   LoRA rank: {config.LORA_R}")
print(f"   Target modules: {config.TARGET_MODULES}")
print("\n‚è±Ô∏è Estimated time: ~2-3 hours")


‚ö° FAST CONFIG READY
   Model: meta-llama/Llama-3.1-8B-Instruct
   Max samples: 3000
   Max length: 256
   Epochs: 1
   LoRA rank: 8
   Target modules: ['q_proj', 'v_proj']

‚è±Ô∏è Estimated time: ~2-3 hours


In [5]:
# ============================ CELL 5 ============================
# Data structures + Dataset Processor

@dataclass
class ConversationPair:
    topic: str
    question_title: str
    question: str
    answer: str

class DatasetProcessor:
    REQUIRED_COLS = ["Topics", "Question-Title", "Questions", "Answers"]

    def __init__(self, dataset_path: Optional[str] = None):
        self.dataset_path = dataset_path

    def resolve_path(self) -> str:
        if self.dataset_path and os.path.exists(self.dataset_path):
            return self.dataset_path
        if os.path.exists(config.DATASET_PATH_PRIMARY):
            return config.DATASET_PATH_PRIMARY
        if os.path.exists(config.DATASET_PATH_FALLBACK):
            return config.DATASET_PATH_FALLBACK
        
        input_dir = "/kaggle/input/bengalitext"
        if os.path.exists(input_dir):
            print(f"Available files: {os.listdir(input_dir)}")
        raise FileNotFoundError("Dataset not found")

    def load(self) -> List[ConversationPair]:
        path = self.resolve_path()
        print(f"üì• Loading dataset from: {path}")
        df = pd.read_csv(path, encoding="utf-8")

        missing = [c for c in self.REQUIRED_COLS if c not in df.columns]
        if missing:
            raise ValueError(f"Missing columns: {missing}")

        conversations = [
            ConversationPair(
                topic=str(row["Topics"]),
                question_title=str(row["Question-Title"]),
                question=str(row["Questions"]),
                answer=str(row["Answers"]),
            )
            for _, row in df.iterrows()
        ]
        
        # ‚ö° LIMIT SAMPLES FOR SPEED
        conversations = conversations[:config.MAX_SAMPLES]
        print(f"‚ö° Limited to {len(conversations)} samples for speed")
        return conversations

    def split(self, conversations: List[ConversationPair]) -> Tuple[List[ConversationPair], List[ConversationPair]]:
        split_idx = int(len(conversations) * config.TRAIN_SPLIT)
        train_convs = conversations[:split_idx]
        val_convs = conversations[split_idx:]
        print(f"‚úÖ Train: {len(train_convs)} | Val: {len(val_convs)}")
        return train_convs, val_convs


In [6]:
# ============================ CELL 6 ============================
# Dataset class

class BengaliEmpatheticDataset(Dataset):
    def __init__(self, conversations: List[ConversationPair], tokenizer: AutoTokenizer, max_length: int):
        self.conversations = conversations
        self.tokenizer = tokenizer
        self.max_length = max_length

    @staticmethod
    def build_prompt(topic: str, question: str) -> str:
        system_prompt = "‡¶Ü‡¶™‡¶®‡¶ø ‡¶è‡¶ï‡¶ú‡¶® ‡¶∏‡¶π‡¶æ‡¶®‡ßÅ‡¶≠‡ßÇ‡¶§‡¶ø‡¶∂‡ßÄ‡¶≤ ‡¶™‡¶∞‡¶æ‡¶Æ‡¶∞‡ßç‡¶∂‡¶¶‡¶æ‡¶§‡¶æ ‡¶Ø‡¶ø‡¶®‡¶ø ‡¶¨‡¶æ‡¶Ç‡¶≤‡¶æ‡¶Ø‡¶º ‡¶∏‡¶π‡¶æ‡¶Ø‡¶º‡¶ï ‡¶â‡¶§‡ßç‡¶§‡¶∞ ‡¶™‡ßç‡¶∞‡¶¶‡¶æ‡¶® ‡¶ï‡¶∞‡ßá‡¶®‡•§"
        return (
            "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
            f"{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"
            f"‡¶¨‡¶ø‡¶∑‡¶Ø‡¶º: {topic}\n"
            f"‡¶™‡ßç‡¶∞‡¶∂‡ßç‡¶®: {question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
        )

    def __len__(self) -> int:
        return len(self.conversations)

    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
        conv = self.conversations[idx]
        prompt = self.build_prompt(conv.topic, conv.question)
        answer = f"{conv.answer}<|eot_id|>"

        prompt_ids = self.tokenizer(prompt, add_special_tokens=False).input_ids
        answer_ids = self.tokenizer(answer, add_special_tokens=False).input_ids

        input_ids = (prompt_ids + answer_ids)[: self.max_length]
        labels = ([-100] * len(prompt_ids) + answer_ids)[: self.max_length]
        attn_mask = [1] * len(input_ids)

        pad_id = self.tokenizer.pad_token_id
        pad_len = self.max_length - len(input_ids)
        if pad_len > 0:
            input_ids += [pad_id] * pad_len
            labels += [-100] * pad_len
            attn_mask += [0] * pad_len

        return {
            "input_ids": torch.tensor(input_ids, dtype=torch.long),
            "attention_mask": torch.tensor(attn_mask, dtype=torch.long),
            "labels": torch.tensor(labels, dtype=torch.long),
        }


In [None]:
# ============================ CELL 7 ============================
# Model Loading with proper gradient setup

def load_model_and_apply_lora():
    print("="*80)
    print("LOADING MODEL")
    print("="*80)

    model = None
    
    if QUANTIZATION_AVAILABLE:
        try:
            print("Attempting 4-bit quantization...")
            bnb_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.float16,
                bnb_4bit_use_double_quant=True,
            )
            model = AutoModelForCausalLM.from_pretrained(
                config.MODEL_NAME,
                quantization_config=bnb_config,
                device_map="auto",
                trust_remote_code=True,
            )
            print(" Loaded with 4-bit quantization")
            
            print("Preparing model for k-bit training...")
            model = prepare_model_for_kbit_training(
                model,
                use_gradient_checkpointing=True,
                gradient_checkpointing_kwargs={"use_reentrant": False}
            )
            print("Model prepared for training")
            
        except Exception as e:
            print(f"4-bit load failed: {e}")
            model = None
    
    if model is None:
        print("Loading with FP16...")
        model = AutoModelForCausalLM.from_pretrained(
            config.MODEL_NAME,
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True,
        )
        model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"use_reentrant": False})
        print("Loaded with FP16")

    # Apply LoRA
    print("\nApplying LoRA adapter...")
    lora_config = LoraConfig(
        r=config.LORA_R,
        lora_alpha=config.LORA_ALPHA,
        target_modules=config.TARGET_MODULES,
        lora_dropout=config.LORA_DROPOUT,
        bias="none",
        task_type=TaskType.CAUSAL_LM,
    )
    model = get_peft_model(model, lora_config)

    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total = sum(p.numel() for p in model.parameters())

    print("\n" + "="*80)
    print("LoRA APPLIED")
    print(f"   Trainable: {trainable:,} ({100*trainable/total:.2f}%)")
    print(f"   Total: {total:,}")
    print("="*80 + "\n")
    
    return model


In [8]:
# ============================ CELL 8 ============================
# Evaluator

class Evaluator:
    def __init__(self, tokenizer: AutoTokenizer):
        self.tokenizer = tokenizer

    @staticmethod
    def compute_perplexity(val_loss: float) -> float:
        return float(np.exp(val_loss))

    def generate_one(self, model, topic: str, question: str, max_new_tokens: int = 128) -> str:
        prompt = BengaliEmpatheticDataset.build_prompt(topic, question)
        inputs = self.tokenizer(prompt, return_tensors="pt").to(model.device)

        model.eval()
        with torch.no_grad():
            out = model.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                pad_token_id=self.tokenizer.eos_token_id,
            )

        decoded = self.tokenizer.decode(out[0], skip_special_tokens=True)
        if prompt in decoded:
            decoded = decoded.split(prompt, 1)[-1].strip()
        return decoded.strip()

    def compute_bleu_rouge(self, model, val_convs: List[ConversationPair], n: int = 20) -> Dict[str, float]:
        sample = val_convs[:min(n, len(val_convs))]
        preds, refs = [], []

        for c in tqdm(sample, desc="Generating for BLEU/ROUGE"):
            pred = self.generate_one(model, c.topic, c.question, max_new_tokens=128)
            preds.append(pred)
            refs.append(c.answer)

        bleu = bleu_metric.compute(predictions=preds, references=[[r] for r in refs])["score"]
        rouge = rouge_metric.compute(predictions=preds, references=refs)

        return {
            "BLEU": float(bleu),
            "ROUGE-1": float(rouge["rouge1"]),
            "ROUGE-2": float(rouge["rouge2"]),
            "ROUGE-L": float(rouge["rougeL"]),
        }

    def export_human_eval_sheet(self, model, val_convs: List[ConversationPair], out_csv: str, n: int = 15) -> str:
        sample = val_convs[:min(n, len(val_convs))]
        rows = []
        for c in tqdm(sample, desc="Human eval sheet"):
            pred = self.generate_one(model, c.topic, c.question, max_new_tokens=128)
            rows.append({
                "topic": c.topic,
                "question": c.question,
                "reference_answer": c.answer,
                "generated_answer": pred,
                "empathy_score_1to5": "",
                "helpfulness_score_1to5": "",
                "safety_score_1to5": "",
                "notes": "",
            })
        pd.DataFrame(rows).to_csv(out_csv, index=False, encoding="utf-8")
        return out_csv


In [9]:
# ============================ CELL 9 ============================
# Experiment Logger

class ExperimentLogger:
    def __init__(self, db_path: str):
        self.conn = sqlite3.connect(db_path)
        self._init_tables()

    def _init_tables(self):
        cur = self.conn.cursor()
        cur.execute('''CREATE TABLE IF NOT EXISTS LLAMAExperiments (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            model_name TEXT, lora_config TEXT, train_loss REAL,
            val_loss REAL, metrics TEXT, timestamp TEXT
        )''')
        cur.execute('''CREATE TABLE IF NOT EXISTS GeneratedResponses (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            experiment_id INTEGER, input_text TEXT, response_text TEXT, timestamp TEXT,
            FOREIGN KEY(experiment_id) REFERENCES LLAMAExperiments(id)
        )''')
        self.conn.commit()

    def log_experiment(self, model_name: str, lora_config: Dict, train_loss: float, val_loss: float, metrics: Dict) -> int:
        cur = self.conn.cursor()
        cur.execute('''INSERT INTO LLAMAExperiments (model_name, lora_config, train_loss, val_loss, metrics, timestamp)
            VALUES (?, ?, ?, ?, ?, ?)''',
            (model_name, json.dumps(lora_config), float(train_loss), float(val_loss),
             json.dumps(metrics), datetime.utcnow().isoformat()))
        self.conn.commit()
        return int(cur.lastrowid)

    def log_response(self, experiment_id: int, input_text: str, response_text: str):
        cur = self.conn.cursor()
        cur.execute('''INSERT INTO GeneratedResponses (experiment_id, input_text, response_text, timestamp)
            VALUES (?, ?, ?, ?)''', (experiment_id, input_text, response_text, datetime.utcnow().isoformat()))
        self.conn.commit()

    def close(self):
        self.conn.close()


In [None]:
# ============================ CELL 10 ============================
# FineTuner

class LLAMAFineTuner:
    def __init__(self, model, tokenizer, train_loader, val_loader):
        self.model = model
        self.tokenizer = tokenizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.optimizer = None
        self.scheduler = None

    def setup(self):
        trainable_params = [p for p in self.model.parameters() if p.requires_grad]
        print(f"Number of trainable parameter groups: {len(trainable_params)}")
        
        self.optimizer = torch.optim.AdamW(
            trainable_params,
            lr=config.LEARNING_RATE,
            weight_decay=config.WEIGHT_DECAY,
        )

        total_steps = (len(self.train_loader) * config.NUM_EPOCHS) // config.GRADIENT_ACCUMULATION_STEPS
        self.scheduler = get_linear_schedule_with_warmup(
            self.optimizer,
            num_warmup_steps=config.WARMUP_STEPS,
            num_training_steps=total_steps,
        )
        print(f"‚úÖ Optimizer ready. Total steps: {total_steps}")

    def train_one_epoch(self, epoch: int) -> float:
        self.model.train()
        total_loss = 0.0
        self.optimizer.zero_grad()

        bar = tqdm(self.train_loader, desc=f"Train Epoch {epoch}")
        for step, batch in enumerate(bar):
            input_ids = batch["input_ids"].to(self.model.device)
            attention_mask = batch["attention_mask"].to(self.model.device)
            labels = batch["labels"].to(self.model.device)

            outputs = self.model(
                input_ids=input_ids, 
                attention_mask=attention_mask, 
                labels=labels,
                use_cache=False
            )
            loss = outputs.loss / config.GRADIENT_ACCUMULATION_STEPS
            total_loss += loss.item() * config.GRADIENT_ACCUMULATION_STEPS
            loss.backward()

            if (step + 1) % config.GRADIENT_ACCUMULATION_STEPS == 0:
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), config.MAX_GRAD_NORM)
                self.optimizer.step()
                self.scheduler.step()
                self.optimizer.zero_grad()

            bar.set_postfix({"loss": f"{loss.item() * config.GRADIENT_ACCUMULATION_STEPS:.4f}"})

        return total_loss / max(1, len(self.train_loader))

    def validate(self) -> float:
        self.model.eval()
        total_loss = 0.0
        with torch.no_grad():
            for batch in tqdm(self.val_loader, desc="Validate"):
                input_ids = batch["input_ids"].to(self.model.device)
                attention_mask = batch["attention_mask"].to(self.model.device)
                labels = batch["labels"].to(self.model.device)
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
                total_loss += outputs.loss.item()
        return total_loss / max(1, len(self.val_loader))

    def train(self) -> Dict[str, List[float]]:
        history = {"train_loss": [], "val_loss": []}
        for epoch in range(1, config.NUM_EPOCHS + 1):
            print(f"\n{'='*80}\nEPOCH {epoch}/{config.NUM_EPOCHS}\n{'='*80}")
            tr = self.train_one_epoch(epoch)
            vl = self.validate()
            history["train_loss"].append(float(tr))
            history["val_loss"].append(float(vl))
            print(f"\nEpoch {epoch}: Train={tr:.4f}, Val={vl:.4f}")
        return history

    def save(self, out_dir: str):
        os.makedirs(out_dir, exist_ok=True)
        self.model.save_pretrained(out_dir)
        self.tokenizer.save_pretrained(out_dir)
        print(f" Saved to: {out_dir}")


In [None]:
# ============================ CELL 11 ============================
# Main pipeline

def main():
    # Load data
    processor = DatasetProcessor()
    conversations = processor.load()
    train_convs, val_convs = processor.split(conversations)

    # Load tokenizer
    print("\nLoading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(config.MODEL_NAME, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.pad_token_id = tokenizer.eos_token_id
    print(f" Tokenizer loaded (vocab: {len(tokenizer):,})")

    # Create datasets
    train_ds = BengaliEmpatheticDataset(train_convs, tokenizer, config.MAX_LENGTH)
    val_ds = BengaliEmpatheticDataset(val_convs, tokenizer, config.MAX_LENGTH)
    train_loader = DataLoader(train_ds, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=0)
    val_loader = DataLoader(val_ds, batch_size=config.BATCH_SIZE, shuffle=False, num_workers=0)

    # Load model with LoRA
    model = load_model_and_apply_lora()

    # Setup and train
    finetuner = LLAMAFineTuner(model, tokenizer, train_loader, val_loader)
    finetuner.setup()
    history = finetuner.train()

    # Evaluate
    evaluator = Evaluator(tokenizer)
    final_train_loss = history["train_loss"][-1]
    final_val_loss = history["val_loss"][-1]
    perplexity = evaluator.compute_perplexity(final_val_loss)

    print(f"\n{'='*80}\nEVALUATION\n{'='*80}")
    print(f"Val loss: {final_val_loss:.4f}")
    print(f"Perplexity: {perplexity:.2f}")

    # ‚ö° FAST: Only 20 samples for BLEU/ROUGE
    metrics_text = evaluator.compute_bleu_rouge(model, val_convs, n=20)
    metrics = {"perplexity": perplexity, **metrics_text}
    print("\n Text Metrics:")
    for k, v in metrics.items():
        print(f"   {k}: {v:.4f}")

    # ‚ö° FAST: Only 15 samples for human eval
    human_csv = evaluator.export_human_eval_sheet(model, val_convs, config.HUMAN_EVAL_CSV, n=15)
    print(f"\nHuman eval sheet: {human_csv}")

    # Log experiment
    logger = ExperimentLogger(config.DB_PATH)
    lora_payload = {
        "r": config.LORA_R, "alpha": config.LORA_ALPHA,
        "dropout": config.LORA_DROPOUT, "target_modules": config.TARGET_MODULES,
        "max_length": config.MAX_LENGTH, "batch_size": config.BATCH_SIZE,
        "grad_accum": config.GRADIENT_ACCUMULATION_STEPS,
        "lr": config.LEARNING_RATE, "epochs": config.NUM_EPOCHS,
    }
    experiment_id = logger.log_experiment(config.MODEL_NAME, lora_payload, final_train_loss, final_val_loss, metrics)
    print(f"\n Logged experiment_id: {experiment_id}")

    # Sample generations
    tests = [
        {"topic": "‡¶™‡¶æ‡¶∞‡¶ø‡¶¨‡¶æ‡¶∞‡¶ø‡¶ï ‡¶¶‡ßç‡¶¨‡¶®‡ßç‡¶¶‡ßç‡¶¨", "question": "‡¶Ü‡¶Æ‡¶æ‡¶∞ ‡¶∏‡ßç‡¶§‡ßç‡¶∞‡ßÄ ‡¶è‡¶¨‡¶Ç ‡¶Æ‡¶æ‡¶Ø‡¶º‡ßá‡¶∞ ‡¶Æ‡¶ß‡ßç‡¶Ø‡ßá ‡¶∏‡¶¨‡¶∏‡¶Æ‡¶Ø‡¶º ‡¶ù‡¶ó‡¶°‡¶º‡¶æ ‡¶π‡¶Ø‡¶º‡•§"},
        {"topic": "‡¶â‡¶¶‡ßç‡¶¨‡ßá‡¶ó", "question": "‡¶Ü‡¶Æ‡¶ø ‡¶∏‡¶¨‡¶∏‡¶Æ‡¶Ø‡¶º ‡¶ö‡¶ø‡¶®‡ßç‡¶§‡¶ø‡¶§ ‡¶•‡¶æ‡¶ï‡¶ø ‡¶è‡¶¨‡¶Ç ‡¶ò‡ßÅ‡¶Æ‡¶æ‡¶§‡ßá ‡¶™‡¶æ‡¶∞‡¶ø ‡¶®‡¶æ‡•§"},
        {"topic": "‡¶∏‡¶Æ‡ßç‡¶™‡¶∞‡ßç‡¶ï", "question": "‡¶Ü‡¶Æ‡¶æ‡¶∞ ‡¶¨‡¶®‡ßç‡¶ß‡ßÅ‡¶∞‡¶æ ‡¶Ü‡¶Æ‡¶æ‡¶ï‡ßá ‡¶¨‡ßÅ‡¶ù‡¶§‡ßá ‡¶™‡¶æ‡¶∞‡ßá ‡¶®‡¶æ‡•§"},
    ]

    print(f"\n{'='*80}\nSAMPLE GENERATIONS\n{'='*80}")
    for i, t in enumerate(tests, 1):
        prompt = BengaliEmpatheticDataset.build_prompt(t["topic"], t["question"])
        response = evaluator.generate_one(model, t["topic"], t["question"], max_new_tokens=128)
        print(f"\n--- TEST {i} ---")
        print(f"Topic: {t['topic']}")
        print(f"Question: {t['question']}")
        print(f"Response:\n{response}")
        logger.log_response(experiment_id, prompt, response)

    logger.close()

    # Save
    os.makedirs(config.OUTPUT_DIR, exist_ok=True)
    finetuner.save(os.path.join(config.OUTPUT_DIR, "final_model"))
    with open(os.path.join(config.OUTPUT_DIR, "history.json"), "w") as f:
        json.dump({"history": history, "metrics": metrics, "experiment_id": experiment_id}, f, indent=2)

    # Zip for download
    !zip -r /kaggle/working/llama_bengali_submission.zip {config.OUTPUT_DIR} {config.DB_PATH} {config.HUMAN_EVAL_CSV}

    print(f"\n{'='*80}\nDONE\n{'='*80}")
    print("Download: Output ‚Üí llama_bengali_submission.zip")

main()


üì• Loading dataset from: /kaggle/input/bengalitext/BengaliEmpatheticConversationsCorpus .csv
‚ö° Limited to 3000 samples for speed
‚úÖ Train: 2700 | Val: 300

üî§ Loading tokenizer...
‚úÖ Tokenizer loaded (vocab: 128,256)
LOADING MODEL
Attempting 4-bit quantization...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

‚úÖ Loaded with 4-bit quantization
Preparing model for k-bit training...
‚úÖ Model prepared for training

Applying LoRA adapter...

‚úÖ LoRA APPLIED
   Trainable: 3,407,872 (0.07%)
   Total: 4,544,008,192

Number of trainable parameter groups: 128
‚úÖ Optimizer ready. Total steps: 675

EPOCH 1/1


Train Epoch 1:  30%|‚ñà‚ñà‚ñà       | 814/2700 [16:36<38:15,  1.22s/it, loss=nan]   