In [1]:
%%capture
# Install required packages
!pip install unsloth transformers torch datasets huggingface_hub

In [2]:
# Imports
from unsloth import FastLanguageModel
import torch
import json
import re

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
# Load the fine-tuned model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    "OussemaGuerriche/mistral-multilingual-name-generator-collab-pro",
    max_seq_length=512,
    dtype=torch.float16,
    load_in_4bit=True,
)

# Ensure tokenizer has pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

==((====))==  Unsloth 2025.5.4: Fast Mistral patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors.index.json:   0%|          | 0.00/165k [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.31G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/162 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/177k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/228M [00:00<?, ?B/s]

Unsloth 2025.5.4 patched 40 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


In [6]:
def generate_variants(primary_name, max_retries=3):
    """
    Generates name variants in all scripts (Latn/Arab/Cyrl/Hani) and returns raw model output.

    Args:
        primary_name (str): Input name to convert
        person_type (str): "individual" or other type
        max_retries (int): Retry attempts if generation fails

    Returns:
        str: Raw model output (may include JSON and other text)
        or {"error": str} if failed
    """
    # SYSTEM PROMPT - Forces all script generation
    system_prompt = """You are a specialized Name Variant Generator with expertise in Arabic, Latin, and Cyrillic writing systems. Rules:

    1. Output JSON with these EXACT fields:
    - "Latn": [Latin script variants]
    - "Arab": [Arabic script variants]
    - "Cyrl": [Cyrillic script variants]

    2. Quality Guidelines:
    - Generate ONLY linguistically accurate and culturally appropriate variants in  latn, cyrl and arab
    - Provide up to 3-5 high-quality variants per script - NO MORE
    - NEVER include low-quality or "filler" variants to reach a quota
    - If fewer than 3 legitimate variants exist, return only those valid forms
    - Empty array [] if no valid transliteration is possible

    3. Transliteration Principles:
    - Preserve phonetic integrity across writing systems
    - Include common regional spelling variations when appropriate
    - Apply proper diacritics and character mappings

    4. Output Format:
    - RETURN ONLY VALID JSON with no explanations or commentary"""
    # - Preserve original name semantics"""

        # USER PROMPT - Clear task specification
    user_prompt = f"""Generate authentic name variants across  latn, cyrl and arab scripts:
    Name: {primary_name}
    """

    # Structured prompt with JSON priming
    prompt = f"""[INST] {system_prompt}\n{user_prompt}  [/INST]
{{"""  # Intentional opening brace to force JSON

    for attempt in range(max_retries):
        try:
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                max_length=1024,
                truncation=True
            ).to("cuda")

            outputs = model.generate(
                **inputs,
                max_new_tokens=500,
                temperature=0.1,
                top_p=0.9,
                do_sample=True,
                repetition_penalty=1.1,    # Add this to discourage repetitive variants
                pad_token_id=tokenizer.eos_token_id,
                no_repeat_ngram_size=2 # can change this to 3 to avoid repetitive patterns
            )
            raw_output = tokenizer.decode(outputs[0], skip_special_tokens=False)
            return raw_output  # Return the complete raw output

        except Exception as e:
            if attempt == max_retries - 1:
                return {"error": f"Failed after {max_retries} attempts: {str(e)}"}
            continue

    return {"error": "Unknown error occurred during generation"}

# Example Usage:
print(
generate_variants("أسامة قريش")
)

<s>[INST] You are a specialized Name Variant Generator with expertise in Arabic, Latin, and Cyrillic writing systems. Rules:

    1. Output JSON with these EXACT fields:
    - "Latn": [Latin script variants]
    - "Arab": [Arabic script variants]
    - "Cyrl": [Cyrillic script variants]

    2. Quality Guidelines:
    - Generate ONLY linguistically accurate and culturally appropriate variants in  latn, cyrl and arab
    - Provide up to 3-5 high-quality variants per script - NO MORE
    - NEVER include low-quality or "filler" variants to reach a quota
    - If fewer than 3 legitimate variants exist, return only those valid forms
    - Empty array [] if no valid transliteration is possible

    3. Transliteration Principles:
    - Preserve phonetic integrity across writing systems
    - Include common regional spelling variations when appropriate
    - Apply proper diacritics and character mappings

    4. Output Format:
    - RETURN ONLY VALID JSON with no explanations or commentary
Gen