In [1]:
# @title üöÄ CELL 1 ‚Äî Project Setup & Paths
# ALLaM-7B ‚Äî Bilingual SFT for MOI RAG (Saudi Tone + English Answers When Asked)
# Requirements: transformers>=4.44, peft>=0.12, accelerate>=0.34, bitsandbytes, trl>=0.9

import os, sys, json, math, random, shutil, logging, warnings
from pathlib import Path

# Base model (your local path)
BASE_MODEL_PATH = "/ibex/user/rashidah/projects/MOI_ChatBot/chatbot_project/3_models/models--ALLaM-AI--ALLaM-7B-Instruct-preview"

# Project dirs
PROJ_DIR = Path("/ibex/user/rashidah/projects/MOI_ChatBot/al-lam_bilingual_sft")
DATA_DIR = PROJ_DIR / "data"
CKPT_DIR = PROJ_DIR / "checkpoints"
OUT_DIR  = PROJ_DIR / "artifacts"
for p in [DATA_DIR, CKPT_DIR, OUT_DIR]: p.mkdir(parents=True, exist_ok=True)

logging.basicConfig(level=logging.INFO)
print("‚úÖ Paths ready.")
print(f"Project Directory: {PROJ_DIR}")

‚úÖ Paths ready.
Project Directory: /ibex/user/rashidah/projects/MOI_ChatBot/al-lam_bilingual_sft


In [2]:
# @title üì¶ CELL 2 ‚Äî Load Raw CSV Data
import pandas as pd
import glob
import os # Ensure os is imported if running standalone

# Use the same sources you already validated
MASTER_DIR = "/ibex/user/rashidah/projects/MOI_ChatBot/chatbot_project/1_data/Data_Master"
CHUNKS_DIR = "/ibex/user/rashidah/projects/MOI_ChatBot/chatbot_project/1_data/Data_chunks"

def load_csvs(d):
    return [pd.read_csv(p) for p in sorted(glob.glob(os.path.join(d, "*.csv")))]

df_master = pd.concat(load_csvs(MASTER_DIR), ignore_index=True)
df_chunks = pd.concat(load_csvs(CHUNKS_DIR), ignore_index=True)

# Keep Arabic text fields used in RAG answers
df_master = df_master.fillna("")
df_chunks = df_chunks.fillna("")

print(f"‚úÖ Master rows loaded: {len(df_master)}")
print(f"‚úÖ Chunk rows loaded: {len(df_chunks)}")

‚úÖ Master rows loaded: 64
‚úÖ Chunk rows loaded: 256


In [3]:
# @title üìä CELL 3 ‚Äî Bilingual Dataset Generation (PADDING_SIDE FIX)
# %%
import re, unicodedata, random, os
from tqdm.notebook import tqdm
import pandas as pd
import torch
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    BitsAndBytesConfig, 
    pipeline
)
import glob # Make sure glob is imported

# --- 1. Load Data (Same as before) ---
print("Loading raw CSV data...")
# Ensure df_master and df_chunks are loaded from CELL 2
if 'df_master' not in globals() or df_master.empty:
    MASTER_DIR = "/ibex/user/rashidah/projects/MOI_ChatBot/chatbot_project/1_data/Data_Master"
    df_master = pd.concat([pd.read_csv(p) for p in sorted(glob.glob(os.path.join(MASTER_DIR, "*.csv")))], ignore_index=True)
    df_master = df_master.fillna("")

if 'df_chunks' not in globals() or df_chunks.empty:
    CHUNKS_DIR = "/ibex/user/rashidah/projects/MOI_ChatBot/chatbot_project/1_data/Data_chunks"
    df_chunks = pd.concat([pd.read_csv(p) for p in sorted(glob.glob(os.path.join(CHUNKS_DIR, "*.csv")))], ignore_index=True)
    df_chunks = df_chunks.fillna("")

print(f"Loaded {len(df_master)} master rows and {len(df_chunks)} chunk rows.")

# --- 2. Arabic Normalization (Same as before) ---
def normalize_ar(s):
    if not isinstance(s, str): return ""
    s = unicodedata.normalize("NFC", s)
    s = re.sub(r"[\u0617-\u061A\u064B-\u0652\u0670\u06D6-\u06ED]", "", s)
    s = re.sub(r"[ÿ£ÿ•ÿ¢Ÿ±]", "ÿß", s).replace("Ÿâ", "Ÿä")
    return re.sub(r"\s+", " ", s).strip()

# --- 3. Templates (Same as before) ---
EN_Q_TEMPLATES = [
    "How can I {x}?", "What are the steps to {x}?",
    "Where is the service for {x} in Absher?", "Requirements/fees for {x}?"
]
AR_Q_TEMPLATES = [
    "ŸÉŸäŸÅ ÿ£ŸÇÿØÿ± {x}ÿü", "ŸÖÿß ŸáŸä ÿÆÿ∑Ÿàÿßÿ™ {x}ÿü",
    "ŸàŸäŸÜ ÿ£ŸÑŸÇŸâ ÿÆÿØŸÖÿ© {x} ŸÅŸä ÿ£ÿ®ÿ¥ÿ±ÿü", "Ÿàÿ¥ ÿßŸÑÿ¥ÿ±Ÿàÿ∑/ÿßŸÑÿ±ÿ≥ŸàŸÖ ŸÑŸÄ {x}ÿü"
]

def ar_action_from_row(r):
    title = normalize_ar(str(r.get("service_title_ar","")))
    return title if len(title) >= 6 else normalize_ar(str(r.get("description_full",""))[:80])

def build_ar_answer_text(r):
    parts = []
    desc = normalize_ar(r.get("description_full", ""))
    cond = normalize_ar(r.get("conditions", ""))
    fees = normalize_ar(r.get("fees", ""))
    path = normalize_ar(r.get("access_path", ""))
    if desc: parts.append(f"ÿßŸÑŸàÿµŸÅ: {desc}")
    if cond: parts.append(f"ÿßŸÑÿ¥ÿ±Ÿàÿ∑: {cond}")
    if fees: parts.append(f"ÿßŸÑÿ±ÿ≥ŸàŸÖ: {fees}")
    if path: parts.append(f"ÿ∑ÿ±ŸäŸÇÿ© ÿßŸÑŸàÿµŸàŸÑ: {path}")
    return " | ".join(parts)

# --- 4. üî¥ NEW: ALLaM-7B Translation Pipeline ---
def load_allam_translator():
    """Loads the 4-bit ALLaM model, identical to CELL 5."""
    print("Loading ALLaM-7B (4-bit) to use as a translator...")
    
    # Path to the *working* model snapshot
    model_path = "/ibex/user/rashidah/projects/MOI_ChatBot/chatbot_project/3_models/models--ALLaM-AI--ALLaM-7B-Instruct-preview/snapshots/a28dd1e67420cde72d3629c8633a974cf7d9c366"
    
    bnb_cfg = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
    )
    
    # --- üî¥ PADDING_SIDE FIX (As suggested by the warning) üî¥ ---
    tokenizer = AutoTokenizer.from_pretrained(
        model_path, 
        use_fast=False,
        padding_side='left' # <-- This is the fix
    )
    # --- End of Fix ---
    
    model = AutoModelForCausalLM.from_pretrained(
        model_path, 
        quantization_config=bnb_cfg,
        device_map="auto"
    )
    
    # --- BATCHING FIX (As suggested by the *previous* error) ---
    if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = model.config.eos_token_id
        print("Setting pad_token_id = eos_token_id for batching.")
    # --- End of Fix ---

    translator_pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        dtype=torch.bfloat16, 
        max_new_tokens=512, 
        do_sample=False,    
        pad_token_id=tokenizer.eos_token_id
    )
    print("‚úÖ ALLaM-7B translator loaded on GPU.")
    return translator_pipe

def translate_batch_with_allam(translator, text_list_ar):
    """Translates a list of Arabic texts using the ALLaM pipeline."""
    
    prompts = []
    for text in text_list_ar:
        prompt = f"Translate the following Arabic text to English. Provide *only* the translated English text.\n\nArabic: {text}\n\nEnglish:"
        prompts.append(prompt)
    
    print(f"Translating {len(prompts)} texts with ALLaM...")
    
    # Use batching (batch_size=8 is safe for A100 80GB)
    results_raw = translator(prompts, batch_size=8)
    
    cleaned_results = []
    for result in results_raw:
        generated_text = result[0]['generated_text']
        translated = generated_text.split("English:")[-1].strip()
        cleaned_results.append(translated)
        
    return cleaned_results

# --- 5. Generate Dataset ---

def generate_sft_data(translator):
    if translator is None:
        raise RuntimeError("Translation model failed to load. Cannot proceed.")

    records = []
    texts_to_translate = set()
    
    print("Pass 1: Collecting texts for translation...")
    master_answers = {}
    for _, r in df_master.iterrows() :
        ans_ar = build_ar_answer_text(r)
        if not ans_ar: continue
        master_answers[r['service_id']] = ans_ar
        texts_to_translate.add(ans_ar)

    chunk_texts = {}
    for _, r in df_chunks.iterrows():
        txt = normalize_ar(str(r.get("chunk_text","")))
        if len(txt) < 40: continue
        chunk_texts[r['chunk_id']] = txt
        texts_to_translate.add(txt)

    print(f"Pass 2: Translating {len(texts_to_translate)} unique text snippets...")
    text_list_ar = list(texts_to_translate)
    
    translated_texts = translate_batch_with_allam(translator, text_list_ar)
    
    translation_map = {
        ar_text: en_text
        for ar_text, en_text in zip(text_list_ar, translated_texts)
    }
    print("‚úÖ Translation complete.")

    print("Pass 3: Building final bilingual dataset...")
    random.seed(1337)

    # Master-level pairs
    for _, r in df_master.iterrows():
        sid = r['service_id']
        if sid not in master_answers: continue
        action = ar_action_from_row(r)
        ans_ar = master_answers[sid]
        ans_en = translation_map.get(ans_ar)
        if not action or not ans_en: continue

        q_ar = random.choice(AR_Q_TEMPLATES).format(x=action)
        a_ar = f"ÿ£ŸÉŸäÿØ. ÿ®ŸÜÿßÿ°Ÿã ÿπŸÑŸâ ŸÇÿßÿπÿØÿ© ÿßŸÑŸÖÿπÿ±ŸÅÿ©: {ans_ar}"
        records.append({"instruction": q_ar, "response": a_ar})
        
        q_en = random.choice(EN_Q_TEMPLATES).format(x=action)
        a_en = f"Certainly. Based on the knowledge base: {ans_en}"
        records.append({"instruction": q_en, "response": a_en})

    # Chunk-level pairs
    for _, r in df_chunks.iterrows():
        cid = r['chunk_id']
        if cid not in chunk_texts: continue
        title = normalize_ar(str(r.get("chunk_title","") or "the service"))
        txt_ar = chunk_texts[cid]
        txt_en = translation_map.get(txt_ar)
        if not txt_en: continue

        q_ar = f"ÿπÿ∑ŸÜŸä ŸÖŸÑÿÆÿµ ŸÖŸàÿ¨ÿ≤ ÿπŸÜ: {title}"
        a_ar = f"ÿ®ŸÜÿßÿ°Ÿã ÿπŸÑŸâ ÿßŸÑŸÖÿ≥ÿ™ŸÜÿØÿßÿ™: {txt_ar[:450]}"
        records.append({"instruction": q_ar, "response": a_ar})
        
        q_en = f"Give a concise English summary about: {title}"
        a_en = f"Based on the documents: {txt_en[:450]}"
        records.append({"instruction": q_en, "response": a_en})

    return records

# --- 6. Execute and Save ---
translator_model_pipe = None
# Define the new train_path which will be used by CELL 5
train_path = DATA_DIR / "bilingual_moi_absher_sFT_v2.jsonl"

try:
    torch.cuda.empty_cache() 
    translator_model_pipe = load_allam_translator()
    sft_records = generate_sft_data(translator_model_pipe)
    
    with open(train_path, "w", encoding="utf-8") as f:
        for ex in sft_records:
            f.write(json.dumps(ex, ensure_ascii=False) + "\n")

    print(f"\n‚úÖ‚úÖ‚úÖ New SFT records generated: {len(sft_records)}")
    print(f"‚úÖ New Dataset saved to: {train_path}")
    print("\nüí° NOTE: CELL 5 is now ready to use this new train_path.")

except Exception as e:
    print(f"\n‚ùå An error occurred during dataset generation: {e}")
finally:
    if translator_model_pipe:
        del translator_model_pipe
    torch.cuda.empty_cache()

Loading raw CSV data...
Loaded 64 master rows and 256 chunk rows.
Loading ALLaM-7B (4-bit) to use as a translator...


INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Setting pad_token_id = eos_token_id for batching.
‚úÖ ALLaM-7B translator loaded on GPU.
Pass 1: Collecting texts for translation...
Pass 2: Translating 170 unique text snippets...
Translating 170 texts with ALLaM...
‚úÖ Translation complete.
Pass 3: Building final bilingual dataset...

‚úÖ‚úÖ‚úÖ New SFT records generated: 438
‚úÖ New Dataset saved to: /ibex/user/rashidah/projects/MOI_ChatBot/al-lam_bilingual_sft/data/bilingual_moi_absher_sFT_v2.jsonl

üí° NOTE: CELL 5 is now ready to use this new train_path.


In [4]:
# @title üìú CELL 4 ‚Äî System Prompt (Unchanged)
# This prompt is excellent. It explicitly states the bilingual policy.

SYSTEM_RULES = """You are Absher Assistant. Policies:
- Answer in the user's question language: Arabic‚ÜíArabic, English‚ÜíEnglish.
- Saudi polite tone (friendly, concise).
- Prefer factual content grounded in MOI/Absher knowledge.
- If asked for general Saudi information (distance, regions), answer simply.
- If info not in knowledge, say: "ÿßŸÑŸÖÿπŸÑŸàŸÖÿ© ÿ∫Ÿäÿ± ŸÖÿ™ŸàŸÅÿ±ÿ© ŸÅŸä ÿßŸÑŸÖÿ≥ÿ™ŸÜÿØ." / "Information is not available in the documents."
- NEVER confuse National ID (Saudi citizen) with Iqama (resident ID)."""

print("‚úÖ System Rules defined.")

‚úÖ System Rules defined.


In [5]:
# @title ‚öôÔ∏è CELL 5 ‚Äî SFT Trainer Setup (ALL FIXES APPLIED)
# %%
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer, SFTConfig

print("Setting up Tokenizer, Model (4-bit), and LoRA Config...")

# --- 1. Path Fix: Point to the correct snapshot directory ---
BASE_MODEL_PATH = "/ibex/user/rashidah/projects/MOI_ChatBot/chatbot_project/3_models/models--ALLaM-AI--ALLaM-7B-Instruct-preview/snapshots/a28dd1e67420cde72d3629c8633a974cf7d9c366"

# --- Tokenizer ---
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, use_fast=False)

if tokenizer.pad_token_id is None:
    print("Setting pad_token_id to eos_token_id")
    tokenizer.pad_token_id = tokenizer.eos_token_id

# --- 4-bit Quantization (BNB) ---
bnb_cfg = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16, # Use bfloat16 for A100
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

# --- Load Base Model ---
print("Loading 4-bit base model (ALLaM-7B-Instruct)...")
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_PATH, 
    quantization_config=bnb_cfg,
    device_map="auto" 
)

# --- LoRA Config (Syntax Fix) ---
lora_cfg = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05, # <-- Corrected 'a'
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "up_proj", "down_proj", "gate_proj"
    ]
)
print("Applying PEFT (LoRA) to model...")
model = get_peft_model(base_model, lora_cfg)

# --- Dataset Loading and Formatting ---
# üî¥ UPDATED PATH: Point to the new dataset generated by CELL 3
train_path = DATA_DIR / "bilingual_moi_absher_sFT_v2.jsonl"
print(f"Loading dataset from: {train_path}")
# --- End of Update ---

ds = load_dataset("json", data_files=str(train_path))

def format_example(ex):
    prompt = f"<s>[INST] <<SYS>>\n{SYSTEM_RULES}\n<</SYS>>\n{ex['instruction']} [/INST]"
    return {"text": f"{prompt} {ex['response']}"}

print("Formatting dataset...")
ds = ds.map(format_example, remove_columns=ds["train"].column_names)


# --- TRL Version Fix 1: Manual Tokenization ---
MAX_SEQ_LENGTH = 4096 
def tokenize_data(examples):
    return tokenizer(
        examples["text"], 
        truncation=True, 
        padding=False, 
        max_length=MAX_SEQ_LENGTH
    )

print(f"Manually tokenizing dataset (max_length={MAX_SEQ_LENGTH})...")
ds = ds.map(tokenize_data, batched=True, remove_columns=["text"])

# --- TRL Version Fix 2 & W&B Fix ---
train_cfg = SFTConfig(
    output_dir=str(CKPT_DIR),
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=8,
    lr_scheduler_type="cosine",
    learning_rate=2e-4,
    logging_steps=25,
    save_strategy="epoch",
    report_to="none", # <-- W&B Fix
    # packing=True, # <-- Removed for old TRL version
    
    # A100-specific optimizations
    bf16=True,
    tf32=True,
)

# --- TRL Version Fix 3: Remove unsupported args ---
trainer = SFTTrainer(
    model=model,
    train_dataset=ds["train"],
    args=train_cfg,
    # tokenizer, dataset_text_field, max_seq_length REMOVED
)

print("‚úÖ Trainer ready.")

  from trl import SFTTrainer, SFTConfig


Setting up Tokenizer, Model (4-bit), and LoRA Config...
Setting pad_token_id to eos_token_id
Loading 4-bit base model (ALLaM-7B-Instruct)...


INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Applying PEFT (LoRA) to model...
Loading dataset from: /ibex/user/rashidah/projects/MOI_ChatBot/al-lam_bilingual_sft/data/bilingual_moi_absher_sFT_v2.jsonl


Generating train split: 0 examples [00:00, ? examples/s]

Formatting dataset...


Map:   0%|          | 0/438 [00:00<?, ? examples/s]

Manually tokenizing dataset (max_length=4096)...


Map:   0%|          | 0/438 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/438 [00:00<?, ? examples/s]

‚úÖ Trainer ready.


In [6]:
# @title üöÄ CELL 6 ‚Äî Start Training
# This cell is correct.

print("Starting SFT training...")
trainer.train()

# Save the adapter and tokenizer
adapter_path = CKPT_DIR / "lora_adapter"
trainer.save_model(str(adapter_path))
tokenizer.save_pretrained(str(adapter_path))

print(f"‚úÖ Training complete. LoRA adapter saved to: {adapter_path}")

Starting SFT training...


Step,Training Loss


‚úÖ Training complete. LoRA adapter saved to: /ibex/user/rashidah/projects/MOI_ChatBot/al-lam_bilingual_sft/checkpoints/lora_adapter


In [7]:
# @title üíø CELL 7 ‚Äî Merge Adapter (dtype fix)
# This cell merges the LoRA adapter with the base model.
# %%
from peft import PeftModel
from transformers import AutoModelForCausalLM

print("Merging LoRA adapter into base model...")

merged_dir = OUT_DIR / "ALLaM-7B-MOI-Bilingual-Merged"

# Define adapter path from previous cell
adapter_path = CKPT_DIR / "lora_adapter"

# Load base model in bf16 for merging
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_PATH, 
    dtype=torch.bfloat16, # <-- Fixed 'torch_dtype' to 'dtype'
    device_map="auto"
)

# Load the PEFT model
model = PeftModel.from_pretrained(model, str(adapter_path))

# Merge and unload
print("Performing merge and unload...")
model = model.merge_and_unload()

# Save the merged model
model.save_pretrained(str(merged_dir))
tokenizer.save_pretrained(str(merged_dir))

print(f"‚úÖ Merged standalone model saved to: {merged_dir}")

INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Merging LoRA adapter into base model...


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Performing merge and unload...
‚úÖ Merged standalone model saved to: /ibex/user/rashidah/projects/MOI_ChatBot/al-lam_bilingual_sft/artifacts/ALLaM-7B-MOI-Bilingual-Merged


In [8]:
# @title üß™ CELL 8 ‚Äî Test Merged Model (Syntax FIX)
# This cell tests the final merged model.
# %%
from transformers import pipeline
import torch

print("Loading merged model for testing...")

# Define merged_dir path from previous cell
merged_dir = OUT_DIR / "ALLaM-7B-MOI-Bilingual-Merged"

# Ensure any old models are cleared from VRAM if necessary
torch.cuda.empty_cache()

# --- üü¢ EFFICIENCY FIX üü¢ ---
# Load the pipeline ONCE, not inside the function.
print("Loading test pipeline...")
test_pipe = pipeline(
    "text-generation", 
    model=str(merged_dir), 
    tokenizer=tokenizer,
    dtype=torch.bfloat16, # <-- Fixed 'torch_dtype' to 'dtype'
    device_map="auto"
)

def gen(prompt, max_new_tokens=256):
    """Runs generation on the pre-loaded pipeline."""
    out = test_pipe(
        prompt, 
        max_new_tokens=max_new_tokens, 
        do_sample=False, 
        pad_token_id=tokenizer.eos_token_id
    )[0]["generated_text"]
    
    # Clean the output to show only the response
    return out.split("[/INST]",1)[-1].strip() if "[/INST]" in out else out
# --- End of Fix ---

# Test cases
tests = [
    ("AR‚ÜíAR", f"<s>[INST] <<SYS>>\n{SYSTEM_RULES}\n<</SYS>>\nŸÉŸäŸÅ ÿßÿ¨ÿØÿØ ÿ¨Ÿàÿßÿ≤ ÿßŸÑÿ≥ŸÅÿ±ÿü [/INST]"),
    ("EN‚ÜíEN", f"<s>[INST] <<SYS>>\n{SYSTEM_RULES}\n<</SYS>>\nHow can I renew my passport? [/INST]"),
    
    # --- üî¥ SYNTAX FIX: \N was changed to \n ---
    ("NoMix_EN", f"<s>[INST] <<SYS>>\n{SYSTEM_RULES}\n<</SYS>>\nTell me steps to renew National ID. [/INST]"),
    ("CodeSwitch", f"<s>[INST] <<SYS>>\n{SYSTEM_RULES}\n<</SYS>>\nŸÖÿß ŸáŸä ÿßŸÑ requirements ŸÑÿÆÿØŸÖÿ© ÿ™ÿ¨ÿØŸäÿØ ÿßŸÑÿßŸÇÿßŸÖÿ©ÿü [/INST]")
]

for name, p in tests:
    print(f"---- TEST: {name} ----")
    print(f"ü§ñ Response: {gen(p)}")
    print("-------------------------\n")

Loading merged model for testing...


INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Loading test pipeline...


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cuda:0


---- TEST: AR‚ÜíAR ----
ü§ñ Response: ÿ®ŸÜÿßÿ°Ÿã ÿπŸÑŸâ ÿßŸÑŸÖÿ≥ÿ™ŸÜÿØÿßÿ™: ÿßŸÑÿÆÿØŸÖÿßÿ™ > ÿÆÿØŸÖÿßÿ™ > ÿßŸÑÿ¨Ÿàÿßÿ≤ÿßÿ™ > ÿ™ÿ¨ÿØŸäÿØ ÿ¨Ÿàÿßÿ≤ ÿßŸÑÿ≥ŸÅÿ±. ÿßÿØÿÆŸÑ ÿπŸÑŸâ ÿÆÿØŸÖÿßÿ™ ÿßŸÑÿ¨Ÿàÿßÿ≤ÿßÿ™ > ÿ™ÿ¨ÿØŸäÿØ ÿ¨Ÿàÿßÿ≤ ÿßŸÑÿ≥ŸÅÿ±. ÿßŸÑÿ¥ÿ±Ÿàÿ∑: ÿ£ŸÜ ŸäŸÉŸàŸÜ ÿßŸÑŸÖÿ≥ÿ™ŸÅŸäÿØ ÿπŸÑŸâ ŸÇŸäÿØ ÿßŸÑÿ≠Ÿäÿßÿ©. ÿ£ŸÜ ŸÑÿß ŸäŸÇŸÑ ÿπŸÖÿ± ÿßŸÑŸÖÿ≥ÿ™ŸÅŸäÿØ ÿπŸÜ 21 ÿ≥ŸÜÿ©. ÿ£ŸÜ ŸÑÿß ŸäŸÉŸàŸÜ ŸáŸÜÿßŸÉ ÿ£Ÿä ŸÖŸÑÿßÿ≠ÿ∏ÿßÿ™ ÿ£ŸÖŸÜŸäÿ© ÿπŸÑŸâ ÿßŸÑŸÖÿ≥ÿ™ŸÅŸäÿØ. ÿ£ŸÜ ŸÑÿß ŸäŸÉŸàŸÜ ŸáŸÜÿßŸÉ ÿ£Ÿä ŸÖÿÆÿßŸÑŸÅÿßÿ™ ÿπŸÑŸâ ÿßŸÑŸÖÿ≥ÿ™ŸÅŸäÿØ. ÿ£ŸÜ ŸÑÿß ŸäŸÉŸàŸÜ ŸáŸÜÿßŸÉ ÿ£Ÿä ŸÖŸÜÿπ ÿ≥ŸÅÿ± ÿπŸÑŸâ ÿßŸÑŸÖÿ≥ÿ™ŸÅŸäÿØ. ÿ£ŸÜ ŸÑÿß ŸäŸÉŸàŸÜ ŸáŸÜÿßŸÉ ÿ£Ÿä ŸÖŸÜÿπ ÿ≥ŸÅÿ± ÿπŸÑŸâ ÿ£ÿ≠ÿØ ÿ£ŸÅÿ±ÿßÿØ ÿßŸÑÿ£ÿ≥ÿ±ÿ©. ÿ£ŸÜ ŸÑÿß ŸäŸÉŸàŸÜ ŸáŸÜÿßŸÉ ÿ£Ÿä ŸÖŸÜÿπ ÿÆÿ±Ÿàÿ¨ ŸàÿπŸàÿØÿ© ÿπŸÑŸâ ÿßŸÑŸÖÿ≥ÿ™ŸÅŸäÿØ. ÿ£ŸÜ ŸÑÿß ŸäŸÉŸàŸÜ ŸáŸÜÿßŸÉ ÿ£Ÿä ŸÖŸÜÿπ ÿÆÿ±Ÿàÿ¨ ŸÜŸáÿßÿ¶Ÿä ÿπŸÑŸâ ÿßŸÑŸÖÿ≥ÿ™ŸÅŸäÿØ. ÿ£ŸÜ ŸÑÿß ŸäŸÉŸàŸÜ ŸáŸÜÿßŸÉ ÿ£Ÿä ŸÖŸÜÿπ ÿÆÿ±Ÿàÿ¨ ŸÜŸáÿßÿ¶Ÿä ÿπŸÑŸâ ÿ£ÿ≠ÿØ ÿ£ŸÅÿ±ÿßÿØ ÿßŸÑÿ£ÿ≥ÿ±ÿ©. ÿ£ŸÜ ŸÑÿß ŸäŸÉŸàŸÜ ŸáŸÜÿßŸÉ ÿ£Ÿä ŸÖŸÜÿπ ÿÆÿ±Ÿàÿ¨ Ÿ