# **installation**

In [1]:
!pip install -U transformers accelerate peft datasets evaluate bitsandbytes sentencepiece trl

Collecting datasets
  Downloading datasets-4.5.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.49.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting trl
  Downloading trl-0.28.0-py3-none-any.whl.metadata (11 kB)
Collecting pyarrow>=21.0.0 (from datasets)
  Downloading pyarrow-23.0.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.1 kB)
Downloading datasets-4.5.0-py3-none-any.whl (515 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m515.2/515.2 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hDownloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.49.2-py3-none-manylinux_2_24_x86_64.whl (60.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.7/60.

# **libraries**

In [2]:
import os
import torch
import pandas as pd
from datasets import Dataset
import random
import evaluate
import math

torch.backends.cuda.matmul.fp32_precision = "tf32"
torch.backends.cudnn.conv.fp32_precision = "tf32"

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling
)

from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# **log **

In [3]:
import json
import uuid
from datetime import datetime

def log_experiment(model_name, lora_config, train_loss, val_loss, metrics):

    experiment = {
        "id": str(uuid.uuid4()),
        "model_name": model_name,
        "lora_config": lora_config,
        "train_loss": train_loss,
        "val_loss": val_loss,
        "metrics": metrics,
        "timestamp": str(datetime.now())
    }

    with open("/kaggle/working/LLAMAExperiments.json", "a") as f:
        f.write(json.dumps(experiment) + "\n")

    return experiment["id"]


def log_generated_response(experiment_id, input_text, response_text):

    record = {
        "id": str(uuid.uuid4()),
        "experiment_id": experiment_id,
        "input_text": input_text,
        "response_text": response_text,
        "timestamp": str(datetime.now())
    }

    with open("/kaggle/working/GeneratedResponses.json", "a") as f:
        f.write(json.dumps(record) + "\n")

In [4]:
from huggingface_hub import login
login()  # token of your hugging face 

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [10]:
# ==============================
# Strategy Pattern - Interface
# ==============================

class FineTuningStrategy:
    def apply(self, base_model):
        raise NotImplementedError("Strategy must implement apply() method.")

In [11]:
# ==============================
# LoRA Strategy
# ==============================

from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

class LoRAStrategy(FineTuningStrategy):
    def __init__(self, r=8, alpha=16, dropout=0.05):
        self.r = r
        self.alpha = alpha
        self.dropout = dropout

    def apply(self, base_model):

        # Required for 4-bit training
        base_model = prepare_model_for_kbit_training(base_model)

        lora_config = LoraConfig(
            r=self.r,
            lora_alpha=self.alpha,
            target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
            lora_dropout=self.dropout,
            bias="none",
            task_type="CAUSAL_LM"
        )

        model = get_peft_model(base_model, lora_config)

        print("LoRA Strategy Applied")
        model.print_trainable_parameters()

        return model

In [12]:
# ==============================
# Unsloth Strategy (Skeleton)
# ==============================

class UnslothStrategy(FineTuningStrategy):
    def apply(self, base_model):
        raise NotImplementedError("Unsloth strategy not implemented yet.")

# **Data Preprocessor and LLama fine tuner Class**

In [13]:
import re
from datasets import Dataset

UNSAFE_PATTERNS = [
    r"ড্রিঙ্ক",
    r"চা খা",
    r"চলে আস",
    r"দেখা কর",
    r"ঈশ্বর",
    r"আল্লাহ",
]

REFLECTIVE_PREFIXES = [
    "শুনে মনে হচ্ছে আপনার জন্য বিষয়টি সত্যিই কঠিন লাগছে। ",
    "এই অনুভূতিটা খুব ভারী হতে পারে, এবং তা বোঝা যায়। ",
    "আপনি যা অনুভব করছেন, তা অনেকের জীবনে আসতে পারে। ",
    "এই মুহূর্তে আপনার ভেতরে অনেক চাপ কাজ করছে বলে মনে হচ্ছে। ",
]


class DatasetProcessor:
    def __init__(self, path, sample_size=None):
        self.df = pd.read_csv(path)[["Questions", "Answers"]]
        self.sample_size = sample_size

    def normalize_answer(self, answer: str) -> str:
        answer = str(answer)

        for pat in UNSAFE_PATTERNS:
            answer = re.sub(pat, "", answer)

        sentences = re.split(r'(?<=[।!?])', answer)
        sentences = [s.strip() for s in sentences if len(s.strip()) > 3]

        return " ".join(sentences)

    def add_reflection(self, answer: str) -> str:
        if any(answer.startswith(p) for p in REFLECTIVE_PREFIXES):
            return answer
        return random.choice(REFLECTIVE_PREFIXES) + answer

    def build_text(self, row):
        question = str(row["Questions"]).strip()
        answer = self.normalize_answer(row["Answers"])
        answer = self.add_reflection(answer)

        return (
            "নির্দেশনা:\n"
            "ব্যবহারকারীর কথার প্রতি সহানুভূতি প্রকাশ করে উত্তর দিন। "
            "প্রথমে অনুভূতি স্বীকার করুন, বিচার না করে সমর্থনমূলক ভাষা ব্যবহার করুন, "
            "এবং কোমলভাবে সাহায্য করার চেষ্টা করুন。\n\n"
            "ব্যবহারকারীর কথা:\n"
            f"{question}\n\n"
            "উত্তর:\n"
            f"{answer}"
        )

    def process(self):
        self.df["text"] = self.df.apply(self.build_text, axis=1)
        dataset = Dataset.from_pandas(self.df[["text"]])

        if self.sample_size:
            dataset = dataset.shuffle(seed=42).select(range(self.sample_size))

        return dataset


class LLAMAFineTuner:
    def __init__(self, base_model, tokenizer, strategy: FineTuningStrategy):
        self.base_model = base_model
        self.tokenizer = tokenizer
        self.strategy = strategy
        self.model = None

    def load_model(self):

        # QLoRA 4-bit config
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.float16,
        )

        base_model = AutoModelForCausalLM.from_pretrained(
            self.base_model,
            quantization_config=bnb_config,
            device_map="auto",
            torch_dtype=torch.float16,
        )

        # Gradient checkpointing
        base_model.gradient_checkpointing_enable()
        base_model.config.use_cache = False

        # Apply selected strategy (LoRA / Unsloth)
        self.model = self.strategy.apply(base_model)

        return self.model

    def train(self, tokenized_train, tokenized_val,
              epochs=1, batch_size=1, gradient_accumulation_steps=8):

        data_collator = DataCollatorForLanguageModeling(
            tokenizer=self.tokenizer,
            mlm=False
        )

        training_args = TrainingArguments(
            output_dir="./llama_bengali",
            per_device_train_batch_size=batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps,
            num_train_epochs=epochs,
            learning_rate=2e-4,
            fp16=True,
            logging_steps=10,
            eval_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True,
            metric_for_best_model="loss",
            report_to="none",
            remove_unused_columns=False,
        )

        trainer = Trainer(
            model=self.model,
            args=training_args,
            train_dataset=tokenized_train,
            eval_dataset=tokenized_val,
            data_collator=data_collator
        )

        trainer.train()
        return trainer

# **Dataset & Model Load**

In [21]:
DATA_PATH = "/kaggle/input/datasets/raseluddin/bengali-empathetic-conversations-corpus/BengaliEmpatheticConversationsCorpus .csv"
processor = DatasetProcessor(DATA_PATH, sample_size=600)

dataset = processor.process()
dataset = dataset.train_test_split(test_size=0.2, seed=42)

train_dataset = dataset["train"]
val_dataset   = dataset["test"]

MODEL_NAME = "meta-llama/Llama-3.1-8b-Instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# **Train and Test split**

In [22]:
def tokenize_fn(batch):
    enc = tokenizer(
        batch["text"],
        truncation=True,
        max_length=1024,
        padding="max_length"
    )

    # Important: convert to list before copying
    enc["labels"] = enc["input_ids"].copy()

    return enc


tokenized_train = train_dataset.map(
    tokenize_fn,
    batched=True,
    remove_columns=["text"]
)

tokenized_val = val_dataset.map(
    tokenize_fn,
    batched=True,
    remove_columns=["text"]
)



print("Tokenized train examples:", len(tokenized_train))
print("Tokenized val examples:", len(tokenized_val))

Map:   0%|          | 0/480 [00:00<?, ? examples/s]

Map:   0%|          | 0/120 [00:00<?, ? examples/s]

Tokenized train examples: 480
Tokenized val examples: 120


In [20]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

# **Model Training**

In [24]:
# ==============================
# Initialize Strategy + Tuner
# ==============================

MODEL_NAME = "meta-llama/Llama-3.1-8b-Instruct"

strategy = LoRAStrategy(r=8, alpha=16)

tuner = LLAMAFineTuner(
    base_model=MODEL_NAME,
    tokenizer=tokenizer,
    strategy=strategy
)

model = tuner.load_model()
print("Model loaded using Strategy Pattern.")

trainer = tuner.train(
    tokenized_train,
    tokenized_val,
    epochs=2,
    batch_size=1,
    gradient_accumulation_steps=2
)

print("Training finished.")

Loading weights:   0%|          | 0/291 [00:00<?, ?it/s]

LoRA Strategy Applied
trainable params: 6,815,744 || all params: 8,037,076,992 || trainable%: 0.0848
Model loaded using Strategy Pattern.


  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss


OutOfMemoryError: CUDA out of memory. Tried to allocate 3.91 GiB. GPU 1 has a total capacity of 14.56 GiB of which 369.81 MiB is free. Including non-PyTorch memory, this process has 14.20 GiB memory in use. Of the allocated memory 11.21 GiB is allocated by PyTorch, and 2.85 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# After trainer.train() finishes
train_loss = trainer.state.log_history[-1]["loss"]  # approximate
val_loss = trainer.state.best_metric          # if available

lora_config = {
    "r": 8,
    "alpha": 16,
    "dropout": 0.05
}

experiment_id = log_experiment(
    model_name=MODEL_NAME,
    lora_config=lora_config,
    train_loss=train_loss,
    val_loss=val_loss,
    metrics={"BLEU": 0, "ROUGE-L": 0}  # placeholder if metrics not computed yet
)
print("Experiment logged with ID:", experiment_id)

# **Save model**

In [4]:
from huggingface_hub import logout, login, whoami

#model save 
logout()
login()

whoami()

Not logged in!


{'type': 'user',
 'id': '67c81ad55993d755c95e4558',
 'name': 'Fariha1999',
 'fullname': 'Fariha Tasnim Chowdhury',
 'email': 'farihatasnimchowdhury2024@gmail.com',
 'emailVerified': True,
 'canPay': False,
 'billingMode': 'prepaid',
 'periodEnd': 1772323200,
 'isPro': False,
 'avatarUrl': 'https://cdn-avatars.huggingface.co/v1/production/uploads/no-auth/oqIcbDW8u03qOgHsN9fRD.png',
 'orgs': [],
 'auth': {'type': 'access_token',
  'accessToken': {'displayName': 'fariha',
   'role': 'write',
   'createdAt': '2026-02-21T15:57:21.879Z'}}}

In [26]:
from huggingface_hub import create_repo

HF_REPO = "your hugging face repo"

create_repo(
    repo_id=HF_REPO,
    repo_type="model",
    private=True,      # recommended
    exist_ok=True
)


RepoUrl('https://huggingface.co/Fariha1999/llama-3.1-8b-bengali-empathetic-lora7', endpoint='https://huggingface.co', repo_type='model', repo_id='Fariha1999/llama-3.1-8b-bengali-empathetic-lora7')

In [27]:
model.push_to_hub(HF_REPO)
tokenizer.push_to_hub(HF_REPO)


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

CommitInfo(commit_url='https://huggingface.co/Fariha1999/llama-3.1-8b-bengali-empathetic-lora7/commit/ffb55867f612bfed6d69980867363cbb8c169312', commit_message='Upload tokenizer', commit_description='', oid='ffb55867f612bfed6d69980867363cbb8c169312', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Fariha1999/llama-3.1-8b-bengali-empathetic-lora7', endpoint='https://huggingface.co', repo_type='model', repo_id='Fariha1999/llama-3.1-8b-bengali-empathetic-lora7'), pr_revision=None, pr_num=None)

# **Evaluation**

In [29]:
# -------------------------------
# 2️⃣ Define Evaluator class
# -------------------------------
class Evaluator:
    def __init__(self, model, tokenizer, val_dataset, experiment_id):
        self.model = model
        self.tokenizer = tokenizer
        self.val_dataset = val_dataset
        self.experiment_id = experiment_id
        self.bleu = evaluate.load("sacrebleu")
        self.rouge = evaluate.load("rouge")

    def compute_perplexity(self):
        self.model.eval()
        losses = []

        for batch in self.val_dataset:
            input_ids = torch.tensor(batch["input_ids"]).unsqueeze(0).to(self.model.device)
            labels = input_ids.clone()

            with torch.no_grad():
                outputs = self.model(input_ids=input_ids, labels=labels)
                loss = outputs.loss

            losses.append(loss.item())

        avg_loss = sum(losses) / len(losses)
        return math.exp(avg_loss)

    def compute_generation_metrics(self, max_samples=50):
        predictions = []
        references = []

        for i in range(min(max_samples, len(self.val_dataset))):
            input_ids = self.val_dataset[i]["input_ids"]
            text = self.tokenizer.decode(input_ids, skip_special_tokens=True)

            # Split question and reference
            question = text.split("উত্তর:")[0]
            reference = text.split("উত্তর:")[-1]

            # Generate response
            generated = generate_response(self.model, self.tokenizer, question)

            # ---- Logging generated response ----
            log_generated_response(
                experiment_id=self.experiment_id,
                input_text=question,
                response_text=generated
            )

            predictions.append(generated)
            references.append([reference])

        bleu_score = self.bleu.compute(predictions=predictions, references=references)
        rouge_score = self.rouge.compute(predictions=predictions, references=[r[0] for r in references])

        return {
            "BLEU": bleu_score["score"],
            "ROUGE-L": rouge_score["rougeL"],
        }

In [30]:
# -------------------------------
# 3️⃣ Log experiment (without trainer)
# -------------------------------
# Since the training session is gone, we pass None for train/val losses
experiment_id = log_experiment(
    model_name=model_path,
    lora_config={"r": 8, "alpha": 16},
    train_loss=None,
    val_loss=None,
    metrics={}  # will update after evaluation
)

# -------------------------------
# 4️⃣ Create evaluator instance
# -------------------------------
evaluator = Evaluator(model, tokenizer, tokenized_val, experiment_id)

# -------------------------------
# 5️⃣ Compute perplexity
# -------------------------------
perplexity = evaluator.compute_perplexity()
print("Perplexity:", perplexity)

# -------------------------------
# 6️⃣ Compute BLEU / ROUGE and log generated responses
# -------------------------------
metrics = evaluator.compute_generation_metrics(max_samples=50)
print("Metrics:", metrics)

# -------------------------------
# 7️⃣ Optional: Update experiment JSON with metrics
# -------------------------------
update_experiment_metrics(experiment_id, metrics)

NameError: name 'tokenized_val' is not defined

# **Fine tuned model load**

In [5]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

BASE_MODEL = "meta-llama/Llama-3.1-8b-Instruct"
LORA_REPO = "Fariha1999/llama-3.1-8b-bengali-empathetic-lora7"

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto",
    low_cpu_mem_usage=True
)

model = PeftModel.from_pretrained(
    base_model,
    LORA_REPO,
    is_trainable=False
)

model.eval()

print(model.config.model_type)
print(tokenizer.name_or_path)

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/291 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/1.01k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/27.3M [00:00<?, ?B/s]

llama
meta-llama/Llama-3.1-8b-Instruct


# **Response Generation**

In [6]:
import pandas as pd

questions = [
    "আমি সবসময় অনলাইনে সফল হলেও ভেতরে একা অনুভব করি। এর মানে কি আমার জীবনে কিছু ভুল আছে?",
    "আমার বন্ধুরা আমাকে অবমূল্যায়ন করে। আমি কেমন প্রতিক্রিয়া দেখাই?",
    "আমি অনেক চাপ অনুভব করছি এবং ঘুমও ঠিকমতো হচ্ছে না। আমি কি করব?",
    "আমি যে কাজগুলো করি তাতে কোনো আনন্দ পাই না। এটি কি স্বাভাবিক?",
    "আমি অপরাধবোধ বোধ করি যখন অন্যদের সাহায্য করতে পারি না। আমি কি ভুল করছি?"
]

df = pd.DataFrame({"question": questions})
df.to_csv("/kaggle/working/questions.csv", index=False)
print("questions.csv saved!")

questions.csv saved!


In [13]:
import torch
import pandas as pd
from tqdm import tqdm

# -------- CONFIG --------
INPUT_PATH = "/kaggle/working/questions.csv"
OUTPUT_PATH = "/kaggle/working/model_generations.csv"

N_SAMPLES = 3          # Reduced from 5
MAX_NEW_TOKENS = 300   # Reduced from 300
# ------------------------

# Make sure model is in eval mode and cache is used
model.eval()
model.config.use_cache = True
torch.set_grad_enabled(False)

def generate_responses(question, n_samples=N_SAMPLES):
    prompt = (
        "নির্দেশনা:\n"
        "ব্যবহারকারীর কথার প্রতি সহানুভূতি প্রকাশ করে উত্তর দিন। "
        "প্রথমে অনুভূতি স্বীকার করুন, বিচার না করে সমর্থনমূলক ভাষা ব্যবহার করুন, "
        "এবং কোমলভাবে সাহায্য করার চেষ্টা করুন。\n\n"
        f"ব্যবহারকারীর কথা:\n{question}\n\nউত্তর:\n"
    )

    # Tokenize and send to GPU
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    responses = []

    for _ in range(n_samples):
        with torch.no_grad():
            output_ids = model.generate(
                **inputs,
                max_new_tokens=MAX_NEW_TOKENS,
                do_sample=True,
                temperature=0.35,
                top_p=0.8,
                repetition_penalty=1.2,
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.eos_token_id,
            )

        decoded = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        response = decoded[len(prompt):].strip()
        responses.append(response)

    return responses


# -------- LOAD QUESTIONS --------
df = pd.read_csv(INPUT_PATH)
assert "question" in df.columns, "CSV must contain a 'question' column"
questions = df["question"].tolist()

results = []

# -------- GENERATE RESPONSES --------
for idx, question in enumerate(tqdm(questions, desc="Generating responses")):
    outputs = generate_responses(question, N_SAMPLES)

    for i, resp in enumerate(outputs):
        results.append({
            "question": question,
            "sample_id": i + 1,
            "response": resp
        })

# -------- SAVE RESULTS --------
pd.DataFrame(results).to_csv(OUTPUT_PATH, index=False)
print(f"Saved to {OUTPUT_PATH}")

Generating responses: 100%|██████████| 5/5 [06:13<00:00, 74.66s/it]

Saved to /kaggle/working/model_generations.csv





In [14]:
import pandas as pd

# Save with UTF-8 encoding
df = pd.read_csv("/kaggle/working/model_generations.csv")  # your existing results
df.to_csv("/kaggle/working/model_generations_utf8.csv", index=False, encoding="utf-8-sig")

print("Saved CSV with UTF-8 encoding!")

Saved CSV with UTF-8 encoding!
