# Step-1: Install Unslot and other dependencies

In [1]:
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    import torch; v = re.match(r"[0-9]{1,}\.[0-9]{1,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.33.post1" if v=="2.9" else "0.0.32.post2" if v=="2.8" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets==4.3.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.56.2
!pip install --no-deps trl==0.22.2

# Step-2: Load the unsloth/meta-Llama-3.1-8B-Instruct

In [2]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/meta-Llama-3.1-8B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2026.1.1: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

# Step-3: Add LoRA Adapters (so we need to finetune 1-10% of the params)

In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

Unsloth 2026.1.1 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


# Step-4: Load the Bangla Empathic Dataset in Alpaca Format and Clean Dataset & Structure it according to prompt
**Don't forget to add `EOS` token. Otherwise finetuned model won't learn to predict the eos token and text generation won't stop**

In [4]:
# ============================================================
# BENGALI EMPATHETIC CONVERSATIONS - DATA PREPARATION (ENHANCED)
# With 85% train, 15% test split, shuffling, and EOS token verification
# ============================================================

import pandas as pd
import re
import unicodedata
from datasets import Dataset, DatasetDict
from sklearn.model_selection import train_test_split
import numpy as np

# ===== STEP 0: TEXT NORMALIZATION FUNCTIONS =====
def normalize_bengali_text(text):
    """
    Comprehensive Bengali text normalization
    """
    if not isinstance(text, str):
        return ""

    # 1. Unicode normalization (NFC form - canonical composition)
    text = unicodedata.normalize('NFC', text)

    # 2. Remove zero-width characters and invisible characters
    text = re.sub(r'[\u200b-\u200f\u202a-\u202e\u2060-\u206f\ufeff]', '', text)

    # 3. Normalize whitespace
    text = re.sub(r'\s+', ' ', text)  # Multiple spaces to single
    text = re.sub(r'\n+', '\n', text)  # Multiple newlines to single

    # 4. Remove leading/trailing whitespace
    text = text.strip()

    # 5. Fix common Bengali punctuation issues
    text = re.sub(r'\s+([।,!?;:])', r'\1', text)  # Remove space before punctuation
    text = re.sub(r'([।,!?;:])\s*', r'\1 ', text)  # Ensure space after punctuation

    # 6. Remove extra dots/ellipsis (keep max 3)
    text = re.sub(r'\.{4,}', '...', text)

    # 7. Remove any control characters
    text = ''.join(char for char in text if unicodedata.category(char)[0] != 'C' or char in '\n\t')

    return text.strip()

def verify_bengali_script(text):
    """
    Check if text contains Bengali characters
    """
    bengali_range = re.compile(r'[\u0980-\u09FF]')
    return bool(bengali_range.search(text))

# ===== STEP 1: Configuration =====
CSV_PATH = '/content/BengaliEmpatheticConversationsCorpus .csv'
QUESTION_COL = 'Questions'
ANSWER_COL = 'Answers'
TOPIC_COL = 'Topics'

# Split ratios
TRAIN_RATIO = 0.85  # 85% for training
TEST_RATIO = 0.15   # 15% for testing
RANDOM_SEED = 42    # For reproducibility

# ===== STEP 2: Load and enhanced cleaning =====
print("=" * 60)
print("LOADING AND NORMALIZING DATASET")
print("=" * 60)

df = pd.read_csv(CSV_PATH, encoding='utf-8')
print(f"✓ Original rows: {len(df)}")

# Drop NaN values
df = df.dropna(subset=[QUESTION_COL, ANSWER_COL])
print(f"✓ After dropping NaN: {len(df)}")

# Normalize all text columns
print("\nNormalizing text columns...")
df[QUESTION_COL] = df[QUESTION_COL].apply(normalize_bengali_text)
df[ANSWER_COL] = df[ANSWER_COL].apply(normalize_bengali_text)
if TOPIC_COL in df.columns:
    df[TOPIC_COL] = df[TOPIC_COL].apply(normalize_bengali_text)

# Remove empty after normalization
df = df[(df[QUESTION_COL] != '') & (df[ANSWER_COL] != '')]
print(f"✓ After normalization: {len(df)}")

# Filter very short answers (<=10 chars)
df = df[df[ANSWER_COL].str.len() > 10]
print(f"✓ After removing very short answers: {len(df)}")

# Verify Bengali content
bengali_questions = df[QUESTION_COL].apply(verify_bengali_script).sum()
bengali_answers = df[ANSWER_COL].apply(verify_bengali_script).sum()
print(f"\n✓ Bengali content verification:")
print(f"  - Questions with Bengali: {bengali_questions}/{len(df)}")
print(f"  - Answers with Bengali: {bengali_answers}/{len(df)}")

# ===== STEP 3: SHUFFLE THE DATAFRAME =====
print("\n" + "=" * 60)
print("SHUFFLING DATASET")
print("=" * 60)

df = df.sample(frac=1, random_state=RANDOM_SEED).reset_index(drop=True)
print(f"✓ Dataset shuffled with random_state={RANDOM_SEED}")

# ===== STEP 4: TRAIN/TEST SPLIT (85/15) =====
print("\n" + "=" * 60)
print("CREATING TRAIN/TEST SPLITS")
print("=" * 60)

train_df, test_df = train_test_split(
    df,
    test_size=TEST_RATIO,
    random_state=RANDOM_SEED,
    shuffle=False  # Already shuffled above
)

print(f"✓ Train set: {len(train_df)} samples ({len(train_df)/len(df)*100:.1f}%)")
print(f"✓ Test set: {len(test_df)} samples ({len(test_df)/len(df)*100:.1f}%)")
print(f"✓ Total: {len(train_df) + len(test_df)} samples")

# ===== STEP 5: Enhanced instruction =====
BENGALI_INSTRUCTION = (
    "আপনি একজন অভিজ্ঞ, সহানুভূতিশীল এবং দয়ালু পরামর্শদাতা। "
    "ব্যবহারকারীর প্রশ্ন শুনে তাদের অনুভূতি গভীরভাবে বোঝার চেষ্টা করুন। "
    "উত্তরটি সহানুভূতিপূর্ণ, বিস্তারিত এবং সহায়ক হোক। "
    "তাদের অভিজ্ঞতা স্বীকার করুন, প্রয়োজনে ব্যাখ্যা দিন এবং উপদেশ দিন। "
    "উত্তর স্বাভাবিক, মানবিক এবং যতটা সম্ভব দীর্ঘ ও গভীর হোক।"
)

# ===== STEP 6: Prepare datasets =====
print("\n" + "=" * 60)
print("PREPARING DATASETS")
print("=" * 60)

def prepare_dataset(dataframe, split_name):
    """Convert dataframe to HuggingFace Dataset format"""
    data_dict = {
        'instruction': [],
        'input': [],
        'output': []
    }

    for _, row in dataframe.iterrows():
        topic = row[TOPIC_COL] if TOPIC_COL in dataframe.columns and pd.notna(row[TOPIC_COL]) else "সাধারণ আলোচনা"
        instruction = f"{BENGALI_INSTRUCTION}\n\nপ্রসঙ্গ: {topic}"

        data_dict['instruction'].append(instruction)
        data_dict['input'].append(row[QUESTION_COL])
        data_dict['output'].append(row[ANSWER_COL])

    dataset = Dataset.from_dict(data_dict)
    print(f"✓ {split_name} dataset: {len(dataset)} samples")
    return dataset

train_dataset = prepare_dataset(train_df, "Train")
test_dataset = prepare_dataset(test_df, "Test")

# ===== STEP 7: EOS TOKEN VERIFICATION =====
print("\n" + "=" * 60)
print("EOS TOKEN VERIFICATION")
print("=" * 60)

# Check if tokenizer is defined
try:
    EOS_TOKEN = tokenizer.eos_token
    print(f"✓ EOS Token found: '{EOS_TOKEN}'")
    print(f"  Token ID: {tokenizer.eos_token_id}")
    print(f"  Token length: {len(EOS_TOKEN)} characters")

    # Additional tokenizer info
    print(f"\n✓ Tokenizer info:")
    print(f"  Model: {tokenizer.name_or_path if hasattr(tokenizer, 'name_or_path') else 'Unknown'}")
    print(f"  Vocab size: {len(tokenizer)}")
    print(f"  PAD token: {tokenizer.pad_token}")
    print(f"  BOS token: {tokenizer.bos_token}")

except NameError:
    print("⚠ WARNING: 'tokenizer' is not defined!")
    print("  Please load your tokenizer first. Example:")
    print("  from transformers import AutoTokenizer")
    print("  tokenizer = AutoTokenizer.from_pretrained('your-model-name')")
    print("\n  Using placeholder EOS token for now...")
    EOS_TOKEN = "</s>"  # Common default for many models

# ===== STEP 8: Formatting with proper EOS token =====
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

def formatting_prompts_func(examples):
    """
    Format examples with Alpaca template and EOS token
    """
    texts = []
    for inst, inp, out in zip(examples["instruction"], examples["input"], examples["output"]):
        # Ensure output ends with EOS token
        text = alpaca_prompt.format(inst, inp, out) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

print("\n" + "=" * 60)
print("FORMATTING DATASETS")
print("=" * 60)

train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
test_dataset = test_dataset.map(formatting_prompts_func, batched=True)

print("✓ All datasets formatted with Alpaca template + EOS token")

# ===== STEP 9: Assign train dataset to 'dataset' variable =====
dataset = train_dataset  # This is what will be used for training
print("\n✓ Training dataset assigned to 'dataset' variable")

# ===== STEP 10: VERIFICATION SAMPLES =====
print("\n" + "=" * 60)
print("SAMPLE VERIFICATION")
print("=" * 60)

print("\n[TRAIN SET - Sample 1]")
print("-" * 60)
sample_text = dataset[0]['text']
print(sample_text[:800] + "..." if len(sample_text) > 800 else sample_text)

# Check EOS token at the end
if sample_text.endswith(EOS_TOKEN):
    print(f"\n✓ EOS token correctly appended: '{EOS_TOKEN}'")
else:
    print(f"\n⚠ WARNING: EOS token NOT found at end!")
    print(f"  Last 20 chars: {repr(sample_text[-20:])}")

print("\n[TEST SET - Sample 1]")
print("-" * 60)
test_sample = test_dataset[0]['text']
print(f"Input length: {len(test_dataset[0]['input'])} chars")
print(f"Output length: {len(test_dataset[0]['output'])} chars")
print(f"Full text length: {len(test_sample)} chars")
print(f"Ends with EOS: {test_sample.endswith(EOS_TOKEN)}")

# ===== STEP 11: DETAILED STATISTICS =====
print("\n" + "=" * 60)
print("DATASET STATISTICS")
print("=" * 60)

def print_split_stats(dataset_obj, split_name):
    print(f"\n{split_name} Set:")
    print(f"  Total samples: {len(dataset_obj)}")
    print(f"  Avg input length: {sum(len(x) for x in dataset_obj['input']) / len(dataset_obj):.1f} chars")
    print(f"  Avg output length: {sum(len(x) for x in dataset_obj['output']) / len(dataset_obj):.1f} chars")
    print(f"  Avg total length: {sum(len(x) for x in dataset_obj['text']) / len(dataset_obj):.1f} chars")

    output_lengths = [len(x) for x in dataset_obj['output']]
    print(f"  Output length distribution:")
    print(f"    Min: {min(output_lengths)} chars")
    print(f"    Max: {max(output_lengths)} chars")
    print(f"    Median: {sorted(output_lengths)[len(output_lengths)//2]} chars")
    print(f"    Samples >200 chars: {sum(1 for x in output_lengths if x > 200)}")

print_split_stats(dataset, "TRAIN")
print_split_stats(test_dataset, "TEST")

# ===== STEP 12: READY FOR TRAINING =====
print("\n" + "=" * 60)
print("✓ DATASETS READY FOR TRAINING!")
print("=" * 60)
print("\nVariables ready:")
print(f"  'dataset' (for training): {len(dataset)} samples")
print(f"  'test_dataset' (for testing after training): {len(test_dataset)} samples")
print("\nUsage in training:")
print("  trainer = SFTTrainer(")
print("      model=model,")
print("      train_dataset=dataset,  # Use this for training")
print("      ...)")
print("\nAfter training, evaluate with:")
print("  test_results = trainer.evaluate(eval_dataset=test_dataset)")
print("\n" + "=" * 60)

LOADING AND NORMALIZING DATASET
✓ Original rows: 38233
✓ After dropping NaN: 38210

Normalizing text columns...
✓ After normalization: 38210
✓ After removing very short answers: 37395

✓ Bengali content verification:
  - Questions with Bengali: 37391/37395
  - Answers with Bengali: 37395/37395

SHUFFLING DATASET
✓ Dataset shuffled with random_state=42

CREATING TRAIN/TEST SPLITS
✓ Train set: 31785 samples (85.0%)
✓ Test set: 5610 samples (15.0%)
✓ Total: 37395 samples

PREPARING DATASETS
✓ Train dataset: 31785 samples
✓ Test dataset: 5610 samples

EOS TOKEN VERIFICATION
✓ EOS Token found: '<|eot_id|>'
  Token ID: 128009
  Token length: 10 characters

✓ Tokenizer info:
  Model: unsloth/meta-llama-3.1-8b-instruct-unsloth-bnb-4bit
  Vocab size: 128256
  PAD token: <|finetune_right_pad_id|>
  BOS token: <|begin_of_text|>

FORMATTING DATASETS


Map:   0%|          | 0/31785 [00:00<?, ? examples/s]

Map:   0%|          | 0/5610 [00:00<?, ? examples/s]

✓ All datasets formatted with Alpaca template + EOS token

✓ Training dataset assigned to 'dataset' variable

SAMPLE VERIFICATION

[TRAIN SET - Sample 1]
------------------------------------------------------------
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
আপনি একজন অভিজ্ঞ, সহানুভূতিশীল এবং দয়ালু পরামর্শদাতা। ব্যবহারকারীর প্রশ্ন শুনে তাদের অনুভূতি গভীরভাবে বোঝার চেষ্টা করুন। উত্তরটি সহানুভূতিপূর্ণ, বিস্তারিত এবং সহায়ক হোক। তাদের অভিজ্ঞতা স্বীকার করুন, প্রয়োজনে ব্যাখ্যা দিন এবং উপদেশ দিন। উত্তর স্বাভাবিক, মানবিক এবং যতটা সম্ভব দীর্ঘ ও গভীর হোক।

প্রসঙ্গ: কৃতজ্ঞ

### Input:
এটি সাধারণ। নিজেকে খুব ভাগ্যবান মনে হচ্ছে

### Response:
হ্যাঁ, আপনি খুব ভাগ্যবান হতে হবে.<|eot_id|>

✓ EOS token correctly appended: '<|eot_id|>'

[TEST SET - Sample 1]
------------------------------------------------------------
Input length: 40 chars
Output length: 18 chars
Full t

# Step-5: Finetune the Model for 1st Iteration


> As the dataset is too large 1 full epoch won't run at a time, so we finetuned the whole dataset in multiple steps



In [None]:
from trl import SFTConfig, SFTTrainer

# Configure trainer
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    packing = True, # Can make training 5x faster for short sequences.
    args = SFTConfig(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        #num_train_epochs = 1, # Set number of epochs (e.g., 3)
        max_steps = 2000, # Comment out max_steps when using num_train_epochs
        learning_rate = 2e-4,
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.001,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use TrackIO/WandB etc
        save_strategy = "steps", # Save at the end of each epoch
        save_total_limit = 2, # Keep only the last 2 checkpoints
        save_steps=100,
    ),
)

In [None]:
trainer_stats = trainer.train()

# Step-6: Upload the checkpoint to huggingface

In [None]:
# Install if needed
!pip install huggingface_hub

# Login with your token
from huggingface_hub import login

# Get token from: https://huggingface.co/settings/tokens
login(token="Your_hf_token")

In [None]:
import shutil
from google.colab import files

# Zip the folder
shutil.make_archive('/content/checkpoint-3000', 'zip', '/content/outputs/checkpoint-3000')

# Download the zip file
files.download('/content/checkpoint-3000.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Step-7. Finetune the Model for <code>N<sup>th</sup></code> Iteration

- **Step-8.1: repeat `step 1-4` and skip `step 5-6`**
- **Step-8.2: Download the last `checkpoint` from huggingface.**
- **Step-8.3: Structure the checkpoint so that training can be resumed**
- **Step-8.4: Start finetuning for n<sup>th</sup> iteartions using `resume_from_checkpont =True`**


In [5]:
import shutil
import os
from huggingface_hub import snapshot_download
print("Downloading checkpoint...")
local_checkpoint = snapshot_download(
    repo_id="Sam3000/bangla_empathic_model",
    allow_patterns=["checkpoint-3000/**"],
    local_dir="./downloaded_checkpoint"
)

checkpoint_path = "./downloaded_checkpoint/checkpoint-3000"
# Copy the downloaded checkpoint to outputs folder
checkpoint_path = "./downloaded_checkpoint/checkpoint-3000"
output_checkpoint_path = "./outputs/checkpoint-3000"

# Create outputs directory if it doesn't exist
os.makedirs("./outputs", exist_ok=True)

# Copy checkpoint folder
if os.path.exists(checkpoint_path):
    shutil.copytree(checkpoint_path, output_checkpoint_path, dirs_exist_ok=True)
    print(f"✓ Copied checkpoint to {output_checkpoint_path}")

Downloading checkpoint...


README.md: 0.00B [00:00, ?B/s]

adapter_config.json: 0.00B [00:00, ?B/s]

checkpoint-3000/adapter_model.safetensor(…):   0%|          | 0.00/168M [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

checkpoint-3000/optimizer.pt:   0%|          | 0.00/86.9M [00:00<?, ?B/s]

checkpoint-3000/rng_state.pth:   0%|          | 0.00/14.6k [00:00<?, ?B/s]

checkpoint-3000/scaler.pt:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

checkpoint-3000/scheduler.pt:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

checkpoint-3000/tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

trainer_state.json: 0.00B [00:00, ?B/s]

checkpoint-3000/training_args.bin:   0%|          | 0.00/6.22k [00:00<?, ?B/s]

✓ Copied checkpoint to ./outputs/checkpoint-3000


- change `max_steps` to **(previous max steps + new nth max step)**. i.e: previouse max steps = 2000 and new nth max steps = 1000, then `max_steps=3000`.


In [6]:
from trl import SFTConfig, SFTTrainer

# Configure trainer
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    packing = True, # Can make training 5x faster for short sequences.
    args = SFTConfig(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        #num_train_epochs = 1, # Set number of epochs (e.g., 3)
        max_steps = 3000, # Comment out max_steps when using num_train_epochs
        learning_rate = 2e-4,
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.001,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use TrackIO/WandB etc
        save_strategy = "steps", # Save at the end of each epoch
        save_total_limit = 2, # Keep only the last 2 checkpoints
        save_steps=100,
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=4):   0%|          | 0/31785 [00:00<?, ? examples/s]

🦥 Unsloth: Padding-free auto-enabled, enabling faster training.


**Start training the trainer using `resume_from_checkpoint =True`**

In [7]:
trainer_stats = trainer.train(resume_from_checkpoint = True)
model.save_pretrained("final_model")
tokenizer.save_pretrained("final_model")

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 31,785 | Num Epochs = 1 | Total steps = 3,000
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
3001,0.198


('final_model/tokenizer_config.json',
 'final_model/special_tokens_map.json',
 'final_model/chat_template.jinja',
 'final_model/tokenizer.json')

# Step-8: Loading & Infercing with Unsloth `FastLanguageModel`

**Infercing**

In [17]:
# English instruction with Bangla input/output
alpaca_prompt = """Below is an instruction that describes a task, paired with an input in Bangla that provides further context. Write a response in Bangla that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

# GENERAL INSTRUCTION IN ENGLISH
general_instruction = """You are a compassionate and caring friend, NOT a professional therapist or medical doctor.

Your role:
- Talk like a trusted friend
- Provide empathy and support
- Listen without judgment
- Use warm, sincere, and human language
- Share your thoughts and feelings naturally
- Be conversational and relatable

What NOT to do:
- Do NOT give professional medical or mental health advice
- Do NOT use clinical or technical language
- Do NOT diagnose conditions
- Do NOT recommend medications or treatments
- Do NOT act like a therapist or counselor

For serious situations:
- If someone mentions suicide or self-harm, gently encourage them to seek professional help
- Suggest mental health helplines or services when appropriate

Tone: Friendly, casual, empathetic, and supportive - like talking to a close friend over coffee.

Respond in Bangla (Bengali language) with this friendly, supportive approach."""

# Usage
FastLanguageModel.for_inference(model)

inputs = tokenizer(
[
    alpaca_prompt.format(
        general_instruction,
        "কাউন্সেলিং কি সত্যিই এমন কিছু করে যা মানুষকে সাহায্য করতে পারে?",  # Bangla input
        ""  # Bangla output will be generated
    )
], return_tensors = "pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens = 1024, eos_token_id = tokenizer.eos_token_id,
    temperature = 0.8,  # More natural/conversational
    top_p = 0.4)
tokenizer.batch_decode(outputs)

['<|begin_of_text|>Below is an instruction that describes a task, paired with an input in Bangla that provides further context. Write a response in Bangla that appropriately completes the request.\n\n### Instruction:\nYou are a compassionate and caring friend, NOT a professional therapist or medical doctor.\n\nYour role:\n- Talk like a trusted friend\n- Provide empathy and support\n- Listen without judgment\n- Use warm, sincere, and human language\n- Share your thoughts and feelings naturally\n- Be conversational and relatable\n\nWhat NOT to do:\n- Do NOT give professional medical or mental health advice\n- Do NOT use clinical or technical language\n- Do NOT diagnose conditions\n- Do NOT recommend medications or treatments\n- Do NOT act like a therapist or counselor\n\nFor serious situations:\n- If someone mentions suicide or self-harm, gently encourage them to seek professional help\n- Suggest mental health helplines or services when appropriate\n\nTone: Friendly, casual, empathetic, 

**Infercing using `TextStreamer`**

In [None]:
# English instruction with Bangla input/output
alpaca_prompt = """Below is an instruction that describes a task, paired with an input in Bangla that provides further context. Write a response in Bangla that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

# GENERAL INSTRUCTION IN ENGLISH
general_instruction = """You are a compassionate and caring friend, NOT a professional therapist or medical doctor.

Your role:
- Talk like a trusted friend
- Provide empathy and support
- Listen without judgment
- Use warm, sincere, and human language
- Share your thoughts and feelings naturally
- Be conversational and relatable

What NOT to do:
- Do NOT give professional medical or mental health advice
- Do NOT use clinical or technical language
- Do NOT diagnose conditions
- Do NOT recommend medications or treatments
- Do NOT act like a therapist or counselor

For serious situations:
- If someone mentions suicide or self-harm, gently encourage them to seek professional help
- Suggest mental health helplines or services when appropriate

Tone: Friendly, casual, empathetic, and supportive - like talking to a close friend over coffee.

Respond in Bangla (Bengali language) with this friendly, supportive approach."""

# Usage
FastLanguageModel.for_inference(model)

inputs = tokenizer(
[
    alpaca_prompt.format(
        general_instruction,
        "কাউন্সেলিং কি সত্যিই এমন কিছু করে যা মানুষকে সাহায্য করতে পারে?",  # Bangla input
        ""  # Bangla output will be generated
    )
], return_tensors = "pt").to("cuda")
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(
    **inputs,
    streamer = text_streamer,
    max_new_tokens = 1024,
    eos_token_id = tokenizer.eos_token_id,
    temperature = 0.8,  # More natural/conversational
    top_p = 0.4,
    # repetition_penalty = 1.1
)



<|begin_of_text|>Below is an instruction that describes a task, paired with an input in Bangla that provides further context. Write a response in Bangla that appropriately completes the request.

### Instruction:
You are a compassionate and caring friend, NOT a professional therapist or medical doctor.

Your role:
- Talk like a trusted friend
- Provide empathy and support
- Listen without judgment
- Use warm, sincere, and human language
- Share your thoughts and feelings naturally
- Be conversational and relatable

What NOT to do:
- Do NOT give professional medical or mental health advice
- Do NOT use clinical or technical language
- Do NOT diagnose conditions
- Do NOT recommend medications or treatments
- Do NOT act like a therapist or counselor

For serious situations:
- If someone mentions suicide or self-harm, gently encourage them to seek professional help
- Suggest mental health helplines or services when appropriate

Tone: Friendly, casual, empathetic, and supportive - like talk

# Step-19: Benchmarking

## Perplexity

In [14]:
import torch
import numpy as np
from tqdm import tqdm
import gc
import math
from typing import Tuple, Optional


alpaca_prompt = """Below is an instruction that describes a task, paired with an input in Bangla that provides further context. Write a response in Bangla that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

general_instruction = """You are a compassionate and caring friend, NOT a professional therapist or medical doctor.

Your role:
- Talk like a trusted friend
- Provide empathy and support
- Listen without judgment
- Use warm, sincere, and human language
- Share your thoughts and feelings naturally
- Be conversational and relatable

What NOT to do:
- Do NOT give professional medical or mental health advice
- Do NOT use clinical or technical language
- Do NOT diagnose conditions
- Do NOT recommend medications or treatments
- Do NOT act like a therapist or counselor

For serious situations:
- If someone mentions suicide or self-harm, gently encourage them to seek professional help
- Suggest mental health helplines or services when appropriate

Tone: Friendly, casual, empathetic, and supportive - like talking to a close friend over coffee.

Respond in Bangla (Bengali language) with this friendly, supportive approach."""

def calculate_perplexity(
    model,
    tokenizer,
    test_dataset,
    max_samples: int = 50,
    max_length: int = 1024,
    use_prompt_template: bool = True,
    input_field: str = 'input',
    output_field: str = 'output',
    temperature: float = 0.8,
    top_p: float = 0.4
) -> Tuple[float, float, dict]:
    """Calculate perplexity on test set"""

    print("Calculating perplexity...")
    print(f"Samples: {max_samples}, Max length: {max_length}")

    model.eval()
    total_loss = 0
    total_tokens = 0
    valid_samples = 0
    sample_perplexities = []
    sample_losses = []

    with torch.no_grad():
        for i in tqdm(range(max_samples), desc="Processing"):
            try:
                sample = test_dataset[i]

                if use_prompt_template:
                    bangla_input = sample.get(input_field, sample.get('text', ''))
                    bangla_output = sample.get(output_field, '')

                    full_text = alpaca_prompt.format(
                        general_instruction,
                        bangla_input,
                        bangla_output
                    )
                else:
                    full_text = sample.get('text', '')

                inputs = tokenizer(
                    full_text,
                    return_tensors="pt",
                    padding=True,
                    truncation=True,
                    max_length=max_length
                ).to(model.device)

                outputs = model(**inputs, labels=inputs['input_ids'])
                num_tokens = (inputs['attention_mask'] == 1).sum().item()

                sample_loss = outputs.loss.item()
                sample_perplexity = math.exp(sample_loss)

                total_loss += sample_loss * num_tokens
                total_tokens += num_tokens
                valid_samples += 1

                sample_perplexities.append(sample_perplexity)
                sample_losses.append(sample_loss)

                del inputs, outputs




            except KeyError as e:
                print(f"\nMissing field in sample {i}: {e}")
                print(f"Available fields: {list(test_dataset[i].keys())}")
                continue
            except RuntimeError as e:
                if "out of memory" in str(e):
                    print(f"\nOOM at sample {i}, skipping...")

                    continue
                else:
                    print(f"\nError at sample {i}: {e}")
                    continue
            except Exception as e:
                print(f"\nError at sample {i}: {e}")

                continue

    if total_tokens == 0:
        print("\nError: No tokens processed")
        return float('inf'), float('inf'), {}

    avg_loss = total_loss / total_tokens
    perplexity = math.exp(avg_loss)

    stats = {
        'perplexity': perplexity,
        'avg_loss': avg_loss,
        'total_tokens': total_tokens,
        'valid_samples': valid_samples,
        'requested_samples': max_samples,
        'avg_tokens_per_sample': total_tokens / valid_samples if valid_samples > 0 else 0,
        'sample_perplexities': sample_perplexities,
        'sample_losses': sample_losses,
        'median_perplexity': np.median(sample_perplexities) if sample_perplexities else float('inf'),
        'std_perplexity': np.std(sample_perplexities) if sample_perplexities else 0,
        'min_perplexity': min(sample_perplexities) if sample_perplexities else float('inf'),
        'max_perplexity': max(sample_perplexities) if sample_perplexities else float('inf'),
    }

    print(f"\nResults:")
    print(f"Perplexity: {perplexity:.4f}")
    print(f"Median: {stats['median_perplexity']:.4f}")
    print(f"Range: {stats['min_perplexity']:.4f} - {stats['max_perplexity']:.4f}")
    print(f"Std Dev: {stats['std_perplexity']:.4f}")
    print(f"Avg Loss: {avg_loss:.4f}")
    print(f"Total Tokens: {total_tokens:,}")
    print(f"Valid Samples: {valid_samples}/{max_samples}")
    print(f"Avg Tokens/Sample: {stats['avg_tokens_per_sample']:.1f}")


    return perplexity, avg_loss, stats


# Run calculation
perplexity, avg_loss, stats = calculate_perplexity(
    model=model,
    tokenizer=tokenizer,
    test_dataset=test_dataset,
    max_samples=100,
    use_prompt_template=True,
    input_field='input',
    output_field='output',
    temperature=0.8,
    top_p=0.4
)

# Sample-level breakdown
print(f"\nBest samples (lowest perplexity):")
sorted_indices = np.argsort(stats['sample_perplexities'])[:5]
for idx in sorted_indices:
    print(f"Sample {idx}: {stats['sample_perplexities'][idx]:.4f}")

print(f"\nWorst samples (highest perplexity):")
sorted_indices = np.argsort(stats['sample_perplexities'])[-5:][::-1]
for idx in sorted_indices:
    print(f"Sample {idx}: {stats['sample_perplexities'][idx]:.4f}")

Calculating perplexity...
Samples: 100, Max length: 1024


Processing: 100%|██████████| 100/100 [00:51<00:00,  1.94it/s]


Results:
Perplexity: 5.1538
Median: 5.8335
Range: 2.5726 - 9.3587
Std Dev: 1.4986
Avg Loss: 1.6397
Total Tokens: 43,655
Valid Samples: 100/100
Avg Tokens/Sample: 436.6

Best samples (lowest perplexity):
Sample 92: 2.5726
Sample 28: 2.7040
Sample 58: 2.7315
Sample 38: 2.7582
Sample 30: 2.7719

Worst samples (highest perplexity):
Sample 10: 9.3587
Sample 71: 8.9673
Sample 76: 8.8271
Sample 23: 8.5998
Sample 61: 8.5722





## Generate Predictions and Blue Score

In [15]:
import torch
import pandas as pd
from tqdm import tqdm

# Install evaluate library
!pip install evaluate -q
import evaluate

alpaca_prompt = """Below is an instruction that describes a task, paired with an input in Bangla that provides further context. Write a response in Bangla that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

general_instruction = """You are a compassionate and caring friend, NOT a professional therapist or medical doctor.

Your role:
- Talk like a trusted friend
- Provide empathy and support
- Listen without judgment
- Use warm, sincere, and human language
- Share your thoughts and feelings naturally
- Be conversational and relatable

What NOT to do:
- Do NOT give professional medical or mental health advice
- Do NOT use clinical or technical language
- Do NOT diagnose conditions
- Do NOT recommend medications or treatments
- Do NOT act like a therapist or counselor

For serious situations:
- If someone mentions suicide or self-harm, gently encourage them to seek professional help
- Suggest mental health helplines or services when appropriate

Tone: Friendly, casual, empathetic, and supportive - like talking to a close friend over coffee.

Respond in Bangla (Bengali language) with this friendly, supportive approach."""

def generate_predictions(model, tokenizer, test_dataset, num_samples=100, input_field='input', output_field='output'):
    """Generate predictions from model"""
    print(f"Generating predictions for {num_samples} samples...")

    try:
        FastLanguageModel.for_inference(model)
    except:
        model.eval()

    predictions = []
    references = []
    inputs_list = []

    for i in tqdm(range(num_samples), desc="Generating"):
        try:
            sample = test_dataset[i]
            bangla_input = sample.get(input_field, sample.get('text', ''))
            bangla_output = sample.get(output_field, '')

            prompt = alpaca_prompt.format(general_instruction, bangla_input, "")

            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                max_length=1024
            ).to(model.device)

            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=1024,
                    temperature=0.8,
                    top_p=0.4,
                    pad_token_id=tokenizer.pad_token_id,
                    eos_token_id=tokenizer.eos_token_id,
                )

            generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
            prediction = generated.split("### Response:")[-1].strip() if "### Response:" in generated else generated.strip()

            predictions.append(prediction)
            references.append(bangla_output)
            inputs_list.append(bangla_input)

            del inputs, outputs, generated

        except Exception as e:
            print(f"\nError at {i}: {e}")
            predictions.append("")
            references.append(sample.get(output_field, ''))
            inputs_list.append(sample.get(input_field, ''))

    print(f"Generated {len(predictions)} predictions")

    # Save predictions
    pred_df = pd.DataFrame({
        'input': inputs_list,
        'reference': references,
        'prediction': predictions
    })
    pred_df.to_csv('predictions.csv', index=False, encoding='utf-8-sig')
    print(f"Saved to predictions.csv")

    return predictions, references, inputs_list

def calculate_bleu(predictions, references):
    """Calculate BLEU score"""
    print("\nCalculating BLEU score...")

    # Filter out empty predictions
    valid_pairs = [(p, r) for p, r in zip(predictions, references) if p and len(p.strip()) > 0]
    print(f"Valid pairs: {len(valid_pairs)}/{len(predictions)}")

    preds = [p for p, _ in valid_pairs]
    refs = [[r] for _, r in valid_pairs]

    bleu_metric = evaluate.load("bleu")
    bleu_results = bleu_metric.compute(predictions=preds, references=refs)

    print(f"\nBLEU Score: {bleu_results['bleu'] * 100:.2f}")
    print(f"Precisions:")
    for i, prec in enumerate(bleu_results['precisions'], 1):
        print(f"  BLEU-{i}: {prec * 100:.2f}")

    return bleu_results

# Generate predictions
predictions, references, inputs_list = generate_predictions(
    model=model,
    tokenizer=tokenizer,
    test_dataset=test_dataset,
    num_samples=100,
    input_field='input',
    output_field='output'
)

# Calculate BLEU
bleu_results = calculate_bleu(predictions, references)

# Show sample predictions
print(f"\nSample predictions:")
for i in range(min(3, len(predictions))):
    print(f"\n--- Sample {i+1} ---")
    print(f"Input: {inputs_list[i][:80]}...")
    print(f"\nPrediction ({len(predictions[i])} chars):")
    print(predictions[i][:200] + ("..." if len(predictions[i]) > 200 else ""))
    print(f"\nReference ({len(references[i])} chars):")
    print(references[i][:200] + ("..." if len(references[i]) > 200 else ""))

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hGenerating predictions for 100 samples...


Generating: 100%|██████████| 100/100 [13:55<00:00,  8.36s/it]


Generated 100 predictions
Saved to predictions.csv

Calculating BLEU score...
Valid pairs: 100/100


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]


BLEU Score: 2.42
Precisions:
  BLEU-1: 16.38
  BLEU-2: 3.57
  BLEU-3: 1.24
  BLEU-4: 0.48

Sample predictions:

--- Sample 1 ---
Input: আমার দাদা আমাকে তার পকেট ঘড়ি দিয়েছিলেন...

Prediction (36 chars):
এটা চমৎকার! আপনি কি তাকে পছন্দ করেন?

Reference (18 chars):
বাহ, কোথায় পেলেন?

--- Sample 2 ---
Input: আমার দাঁতের ডাক্তার আমার দাঁতে একটি আশ্চর্যজনক কাজ করেছেন, এবং আমি তাদের ভালবাসি...

Prediction (73 chars):
আমি বাজি ধরেছি আপনি তাদের ভালবাসেন। আমি আশা করি আপনি সবসময় সুস্থ থাকবেন।

Reference (50 chars):
আপনি আপনার নতুন সেট দাঁতের জন্য খুব গর্বিত হতে হবে

--- Sample 3 ---
Input: হ্যাঁ, সৌভাগ্যবশত আমি সপ্তাহান্তে তাদের সাহায্য করার জন্য আমার গাড়ি ছেড়ে যেতে ...

Prediction (78 chars):
আমি খুশি যে আপনি এটি করতে পেরেছেন। আমি আশা করি তারা এটি সম্পর্কে ভাল বোধ করবে।

Reference (41 chars):
আমি যদি আপনার মত একটি চমৎকার বন্ধু পেতাম!


## Rouge Score





In [16]:
# ============================================================
# ROUGE SCORE CALCULATION (MANUAL - CORRECT FOR BENGALI)
# ============================================================

import pandas as pd
import numpy as np

# Load predictions
pred_df = pd.read_csv('predictions.csv', encoding='utf-8-sig')
predictions = pred_df['prediction'].tolist()
references = pred_df['reference'].tolist()

print("="*60)
print("ROUGE SCORE CALCULATION")
print("="*60)

def calculate_manual_rouge(predictions, references):
    """Manual ROUGE calculation that works for Bengali"""
    rouge1_scores = []
    rouge2_scores = []
    rougeL_scores = []

    valid_count = 0

    for pred, ref in zip(predictions, references):
        if not pred or not ref or len(pred.strip()) == 0:
            continue

        valid_count += 1

        pred_words = pred.lower().split()
        ref_words = ref.lower().split()

        # ROUGE-1 (unigram overlap)
        pred_unigrams = set(pred_words)
        ref_unigrams = set(ref_words)
        overlap = len(pred_unigrams & ref_unigrams)

        precision = overlap / len(pred_unigrams) if pred_unigrams else 0
        recall = overlap / len(ref_unigrams) if ref_unigrams else 0
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
        rouge1_scores.append(f1)

        # ROUGE-2 (bigram overlap)
        pred_bigrams = set([' '.join(pred_words[i:i+2]) for i in range(len(pred_words)-1)])
        ref_bigrams = set([' '.join(ref_words[i:i+2]) for i in range(len(ref_words)-1)])
        overlap_2 = len(pred_bigrams & ref_bigrams)

        precision_2 = overlap_2 / len(pred_bigrams) if pred_bigrams else 0
        recall_2 = overlap_2 / len(ref_bigrams) if ref_bigrams else 0
        f1_2 = 2 * precision_2 * recall_2 / (precision_2 + recall_2) if (precision_2 + recall_2) > 0 else 0
        rouge2_scores.append(f1_2)

        # ROUGE-L (longest common subsequence - simplified as unigram)
        rougeL_scores.append(f1)

    print(f"Valid pairs evaluated: {valid_count}/{len(predictions)}")

    return {
        'rouge1': np.mean(rouge1_scores) if rouge1_scores else 0,
        'rouge2': np.mean(rouge2_scores) if rouge2_scores else 0,
        'rougeL': np.mean(rougeL_scores) if rougeL_scores else 0
    }

# Calculate ROUGE
rouge_scores = calculate_manual_rouge(predictions, references)

print("\n" + "="*60)
print("ROUGE RESULTS")
print("="*60)
print(f"✓ ROUGE-1: {rouge_scores['rouge1']:.4f}")
print(f"✓ ROUGE-2: {rouge_scores['rouge2']:.4f}")
print(f"✓ ROUGE-L: {rouge_scores['rougeL']:.4f}")

# Interpretation
print("\n" + "="*60)
print("INTERPRETATION")
print("="*60)


# Save results
rouge_df = pd.DataFrame([rouge_scores])
rouge_df.to_csv('rouge_scores.csv', index=False)

print(f"\n✅ ROUGE scores saved to: rouge_scores.csv")

ROUGE SCORE CALCULATION
Valid pairs evaluated: 100/100

ROUGE RESULTS
✓ ROUGE-1: 0.1452
✓ ROUGE-2: 0.0367
✓ ROUGE-L: 0.1452

INTERPRETATION

✅ ROUGE scores saved to: rouge_scores.csv


In [None]:
# ============================================================
# LOAD MODEL FROM LOCAL CHECKPOINT
# ============================================================

from unsloth import FastLanguageModel
import torch

# Configuration
max_seq_length = 2048
dtype = None  # Auto-detect
load_in_4bit = True  # Use 4-bit quantization

# Path to your saved model
MODEL_PATH = "./final_model"  # Change this to your model path

print("="*60)
print("LOADING MODEL FROM LOCAL CHECKPOINT")
print("="*60)
print(f"Model path: {MODEL_PATH}")

# Load model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_PATH,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

print("✓ Model loaded successfully!")
print(f"✓ Model device: {model.device}")
print(f"✓ Tokenizer vocab size: {len(tokenizer)}")

# Set to inference mode
FastLanguageModel.for_inference(model)

print("\n✓ Model ready for inference!")

LOADING MODEL FROM LOCAL CHECKPOINT
Model path: ./final_model
==((====))==  Unsloth 2025.12.10: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
✓ Model loaded successfully!
✓ Model device: cuda:0
✓ Tokenizer vocab size: 128256

✓ Model ready for inference!


In [None]:
# ============================================================

# Save model to HuggingFace Hub
HF_MODEL_NAME = "Sam3000/bangla_empathic_chat_model"  # Change this

print("="*60)
print("UPLOADING MODEL TO HUGGINGFACE HUB")
print("="*60)
print(f"Repository: {HF_MODEL_NAME}")

# Push model to hub
model.push_to_hub(
    HF_MODEL_NAME,
    private=False  # Set True for private repo
)

# Push tokenizer to hub
tokenizer.push_to_hub(
    HF_MODEL_NAME,
    private=False
)

print(f"\n✓ Model uploaded successfully!")
print(f"✓ View at: https://huggingface.co/{HF_MODEL_NAME}")

UPLOADING MODEL TO HUGGINGFACE HUB
Repository: Sam3000/bangla_empathic_chat_model


README.md:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...adapter_model.safetensors:  15%|#4        | 25.1MB /  168MB            

Saved model to https://huggingface.co/Sam3000/bangla_empathic_chat_model


README.md:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...mpnm1u5sjc/tokenizer.json: 100%|##########| 17.2MB / 17.2MB            


✓ Model uploaded successfully!
✓ View at: https://huggingface.co/Sam3000/bangla_empathic_chat_model


In [None]:
# ============================================================
# LOAD MODEL FROM HUGGINGFACE HUB
# ============================================================

from unsloth import FastLanguageModel
import torch

# Configuration
max_seq_length = 2048
dtype = None
load_in_4bit = True

# Your HuggingFace model name
HF_MODEL_NAME = "Sam3000/bangla_empathic_chat_model"  # Change this

print("="*60)
print("LOADING MODEL FROM HUGGINGFACE HUB")
print("="*60)
print(f"Model: {HF_MODEL_NAME}")

# Load from HuggingFace
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=HF_MODEL_NAME,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

print("✓ Model loaded from HuggingFace!")
print(f"✓ Model device: {model.device}")

# Set to inference mode
FastLanguageModel.for_inference(model)

print("\n✓ Model ready for evaluation!")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
LOADING MODEL FROM HUGGINGFACE HUB
Model: Sam3000/bangla_empathic_chat_model
==((====))==  Unsloth 2025.12.10: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Unsloth 2025.12.10 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


✓ Model loaded from HuggingFace!
✓ Model device: cuda:0

✓ Model ready for evaluation!


In [None]:
# ============================================================
# CELL 1: PERPLEXITY CALCULATION
# ============================================================

import torch
import numpy as np
from tqdm import tqdm
import gc
import math

def clear_memory():
    gc.collect()
    torch.cuda.empty_cache()

def calculate_perplexity(model, tokenizer, test_dataset, max_samples=50):
    """Calculate perplexity on test set"""
    print("="*60)
    print("CALCULATING PERPLEXITY")
    print("="*60)

    model.eval()
    total_loss = 0
    total_tokens = 0
    clear_memory()

    with torch.no_grad():
        for i in tqdm(range(max_samples), desc="Perplexity"):
            try:
                inputs = tokenizer(
                    [test_dataset[i]['text']],
                    return_tensors="pt",
                    padding=True,
                    truncation=True,
                    max_length=1024
                ).to(model.device)

                outputs = model(**inputs, labels=inputs['input_ids'])
                num_tokens = (inputs['attention_mask'] == 1).sum().item()

                total_loss += outputs.loss.item() * num_tokens
                total_tokens += num_tokens

                del inputs, outputs

                if i % 10 == 0:
                    clear_memory()

            except Exception as e:
                print(f"\nError at sample {i}: {e}")
                clear_memory()
                continue

    avg_loss = total_loss / total_tokens
    perplexity = math.exp(avg_loss)

    print(f"\n{'='*60}")
    print("PERPLEXITY RESULTS")
    print(f"{'='*60}")
    print(f"✓ Perplexity: {perplexity:.4f}")
    print(f"✓ Average Loss: {avg_loss:.4f}")
    print(f"✓ Total Tokens: {total_tokens:,}")
    print(f"✓ Samples Evaluated: {max_samples}")

    # Interpretation
    if perplexity < 2.0:
        print(f"\n⭐⭐⭐⭐⭐ EXCELLENT - Model is very confident!")
    elif perplexity < 5.0:
        print(f"\n⭐⭐⭐⭐ GOOD - Model performs well")
    elif perplexity < 10.0:
        print(f"\n⭐⭐⭐ AVERAGE - Room for improvement")
    else:
        print(f"\n⭐⭐ NEEDS WORK - Consider more training")

    clear_memory()
    return perplexity, avg_loss

# RUN PERPLEXITY CALCULATION
perplexity, avg_loss = calculate_perplexity(
    model=model,
    tokenizer=tokenizer,
    test_dataset=test_dataset,
    max_samples=100
)

CALCULATING PERPLEXITY


Perplexity: 100%|██████████| 100/100 [01:18<00:00,  1.27it/s]



PERPLEXITY RESULTS
✓ Perplexity: 1.2155
✓ Average Loss: 0.1952
✓ Total Tokens: 62,675
✓ Samples Evaluated: 100

⭐⭐⭐⭐⭐ EXCELLENT - Model is very confident!


# Step-10: Upload and Load model for Huggingface

## Load Model From Local Checkpoint

In [None]:


from unsloth import FastLanguageModel
import torch

# Configuration
max_seq_length = 2048
dtype = None  # Auto-detect
load_in_4bit = True  # Use 4-bit quantization

# Path to your saved model
MODEL_PATH = "./final_model"  # Change this to your model path

print("="*60)
print("LOADING MODEL FROM LOCAL CHECKPOINT")
print("="*60)
print(f"Model path: {MODEL_PATH}")

# Load model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_PATH,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

print("✓ Model loaded successfully!")
print(f"✓ Model device: {model.device}")
print(f"✓ Tokenizer vocab size: {len(tokenizer)}")

# Set to inference mode
FastLanguageModel.for_inference(model)

print("\n✓ Model ready for inference!")

## Save model to HuggingFace Hub

In [None]:

HF_MODEL_NAME = "Sam3000/bangla_empathic_chat_model"  # Change this

print("="*60)
print("UPLOADING MODEL TO HUGGINGFACE HUB")
print("="*60)
print(f"Repository: {HF_MODEL_NAME}")

# Push model to hub
model.push_to_hub(
    HF_MODEL_NAME,
    private=False  # Set True for private repo
)

# Push tokenizer to hub
tokenizer.push_to_hub(
    HF_MODEL_NAME,
    private=False
)

print(f"\n✓ Model uploaded successfully!")
print(f"✓ View at: https://huggingface.co/{HF_MODEL_NAME}")

## Load Model From HuggingFace Hub

In [18]:


from unsloth import FastLanguageModel
import torch

# Configuration
max_seq_length = 2048
dtype = None
load_in_4bit = True

# Your HuggingFace model name
HF_MODEL_NAME = "Sam3000/bangla_empathic_chat_model"  # Change this

print("="*60)
print("LOADING MODEL FROM HUGGINGFACE HUB")
print("="*60)
print(f"Model: {HF_MODEL_NAME}")

# Load from HuggingFace
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=HF_MODEL_NAME,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

print("✓ Model loaded from HuggingFace!")
print(f"✓ Model device: {model.device}")

# Set to inference mode
FastLanguageModel.for_inference(model)

print("\n✓ Model ready for evaluation!")

LOADING MODEL FROM HUGGINGFACE HUB
Model: Sam3000/bangla_empathic_chat_model
==((====))==  Unsloth 2026.1.1: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

✓ Model loaded from HuggingFace!
✓ Model device: cuda:0

✓ Model ready for evaluation!
