In [None]:
pip install -U "transformers==4.44.2" "peft==0.18.1" "accelerate==1.12.0" sentencepiece

In [7]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import PeftModel

DARJA2MSA_MODEL = "Saidtaoussi/AraT5_Darija_to_MSA"
ARAT5_BASE      = "UBC-NLP/AraT5v2-base-1024"
ARAT5_ADAPTER   = "HassnaaElshafei/arat5v2_adapter"

device = "cuda" if torch.cuda.is_available() else "cpu"

# 1) Darija -> MSA
tok_d = AutoTokenizer.from_pretrained(DARJA2MSA_MODEL)
model_d = AutoModelForSeq2SeqLM.from_pretrained(DARJA2MSA_MODEL).to(device).eval()

# 2) MSA -> Egyptian (AraT5 base + your adapter from HF)
tok_e  = AutoTokenizer.from_pretrained(ARAT5_BASE)
base_e = AutoModelForSeq2SeqLM.from_pretrained(ARAT5_BASE).to(device)
model_e = PeftModel.from_pretrained(base_e, ARAT5_ADAPTER).to(device).eval()

def arat5_darija_to_egyptian(text, max_length=128, num_beams=5):
    # Darija -> MSA
    in1 = tok_d(text, return_tensors="pt").to(device)
    msa_ids = model_d.generate(**in1, max_length=max_length)
    msa = tok_d.decode(msa_ids[0], skip_special_tokens=True).strip()

    # MSA -> Egyptian (your fine-tuned adapter expects your prompt)
    in2 = tok_e("ุญูู ูููุตุฑู: " + msa, return_tensors="pt").to(device)
    egy_ids = model_e.generate(**in2, max_length=max_length, num_beams=num_beams)
    egy = tok_e.decode(egy_ids[0], skip_special_tokens=True).strip()
    return egy


In [8]:
moroccan_sentences = [
    "ุจุบูุช ููุดู ููุฏุงุฑ ุฏุงุจุง.",
    "ูุงุฏ ุงูุชููููู ุฏูุงูู ูุงุดู ุฏูุงูู.",
    "ุชูุญุดุชู ุจุฒุงู ูุง ุฎููุง.",
    "ุบุงุฏู ูุณุงูุฑ ููุฑุงูุณุง ุงูุณููุงูุฉ ุงูุฌุงูุฉ.",
    "ูุง ุจุบูุชุด ูุงููุ ุดุจุนุช.",
    "ูุงุด ูุงูู ุดู ุฌุฏูุฏุ",
    "ููู ุบุงุฏู ุชูุดู ู ุงูุนุดูุฉุ",
    "ุดุญุงู ุฏุงูุฑ ูุงุฏ ุงูุณุฑูุงูุ",
    "ุนุทููู ุดู ูุงุณ ุฏูุงู ุงููุงุก ุนูุงู.",
    "ุงูุฏุฑุงุฑู ูููุนุจู ุงูููุฑุฉ ู ุงูุฒููุฉ."
]

print("--- ๐ฒ๐ฆ Darija -> ๐ช๐ฌ Egyptian Using Finetuned NLLB ---")
for sent in moroccan_sentences:
    print(f"\nInput:  {sent}")
    # Use your function
    output = arat5_darija_to_egyptian(sent)
    print(f"Output: {output}")

--- ๐ฒ๐ฆ Darija -> ๐ช๐ฌ Egyptian Using Finetuned NLLB ---

Input:  ุจุบูุช ููุดู ููุฏุงุฑ ุฏุงุจุง.
Output: ุนุงูุฒ ุฃุฑูุญ ุงูุจูุช ุฏูููุชู.

Input:  ูุงุฏ ุงูุชููููู ุฏูุงูู ูุงุดู ุฏูุงูู.
Output: ุฏู ุชูููููู ูุด ุชูููููู.

Input:  ุชูุญุดุชู ุจุฒุงู ูุง ุฎููุง.
Output: ุฃูุง ูุงูุฑู ูุชูุฑ.

Input:  ุบุงุฏู ูุณุงูุฑ ููุฑุงูุณุง ุงูุณููุงูุฉ ุงูุฌุงูุฉ.
Output: ุญุฃุฑูุญ ูุฑูุณุง ุงูุฃุณุจูุน ุงูุฌุงู.

Input:  ูุง ุจุบูุชุด ูุงููุ ุดุจุนุช.
Output: ุฃูุง ูุด ุนุงูุฒ ุขููุ ุฃูุง ุดุจุนุช.

Input:  ูุงุด ูุงูู ุดู ุฌุฏูุฏุ
Output: ููู ุญุงุฌุฉ ุฌุฏูุฏุฉุ

Input:  ููู ุบุงุฏู ุชูุดู ู ุงูุนุดูุฉุ
Output: ุฅูุช ุฑุงูุญ ููู ุจุงููููุ

Input:  ุดุญุงู ุฏุงูุฑ ูุงุฏ ุงูุณุฑูุงูุ
Output: ุงููููุต ุฏู ุจูุงูุ

Input:  ุนุทููู ุดู ูุงุณ ุฏูุงู ุงููุงุก ุนูุงู.
Output: ุงุฏููู ููุจุงูู ููู ูุนูุงู.

Input:  ุงูุฏุฑุงุฑู ููู