## Testing Saved Model

In [1]:
# --- Install requirements if not yet done ---
!pip install -r /content/jawirumi_v1_model/requirements.txt --quiet

# --- Import after installation ---
from unsloth import FastLanguageModel

# --- Load once (already trained + saved) ---
def load_model(model_path="lora_model", max_seq_length=2048, dtype=None, load_in_4bit=True):
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=model_path,
        max_seq_length=max_seq_length,
        dtype=dtype,
        load_in_4bit=load_in_4bit,
    )
    FastLanguageModel.for_inference(model)
    return model, tokenizer

In [None]:
model, tokenizer = load_model("/content/jawirumi_v1_model")

Example: Jawi → Rumi

In [None]:
from unsloth.chat_templates import get_chat_template
from transformers import TextStreamer
import torch

def jawi2rumi_generate(model, tokenizer, prompt: str,
                       chat_template="llama-3.1",
                       max_new_tokens=128,
                       temperature=0.1,
                       min_p=0.1):
    """
    Generate transliteration or text output using an Unsloth fine-tuned model.

    Args:
        model: Loaded FastLanguageModel
        tokenizer: Corresponding tokenizer
        prompt (str): User input or Jawi text to process
        chat_template (str): Chat template used during training (default: llama-3.1)
        max_new_tokens (int): Number of tokens to generate
        temperature (float): Sampling temperature for creativity
        min_p (float): Minimum probability threshold for nucleus sampling

    Returns:
        str: Generated output text
    """

    # Ensure correct chat template
    tokenizer = get_chat_template(tokenizer, chat_template=chat_template)

    # Create text streamer for live streaming (optional)
    text_streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    # Prepare input message
    messages = [
        {"role": "user", "content": f"<jawi2rumi> {prompt}"},
    ]

    # Tokenize and send to model device
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to(model.device)

    # ✅ Create attention mask (all non-padding tokens = 1)
    attention_mask = inputs.ne(tokenizer.pad_token_id).long()

    # Run generation
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            attention_mask=attention_mask,
            streamer=text_streamer,  # can remove if you don’t want live output
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            min_p=min_p,
        )

    # Decode final text
    generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated


In [None]:
result = jawi2rumi_generate(
    model,
    tokenizer,
    prompt="چينتا سجاتي اداله سوات ڤركارا يڠ مرجوع كڤد حقيقة باطن"
)
