In [1]:
# Install necessary packages if missing 
!pip install arabic-reshaper python-bidi rich sacrebleu



In [2]:
import sys
sys.path.append("../src")

In [3]:
from llm.translation_pipeline import ArabicToGermanTranslator
from document_processing.processor import DocumentProcessor
import arabic_reshaper
from bidi.algorithm import get_display
from rich.console import Console
from sacrebleu import corpus_bleu
from bert_score import score as bert_score
import pandas as pd

In [4]:
# # Utility Functions
# def reshape_arabic(text: str) -> str:
#     reshaped_text = arabic_reshaper.reshape(text)
#     bidi_text = get_display(reshaped_text)
#     return bidi_text

### Test _Dummy_ Translation using BLEU and BERT Score

In [19]:
# Example Arabic Texts (could be extracted dynamically later)
arabic_texts = [
    "آية الدرس: \"تنضح علىَّ بزوفاك فأطهر. تغسلني فأَبيضَ أكثرَ من الثلج\" (مزمور 50: 7 قبطي)",
    "الأسبوع الثاني من شهر برمهات (مارس)",
    "الدرس السابع: القديسة دميانة",
    "قصة توبة",
    "الدرس القادم: ليلة أبوغلمسيس"
]

# Placeholder Human Translations (Later extracted from Word docs)
human_translations = [
    "Lehrvers: \"Entsündige mich mit Ysop, dann werde ich rein, wasche mich, dann werde ich weißer sein als Schnee.\" (Psalm 50:7 Koptisch)",
    "Die zweite Woche des Monats Baramhat (März)",
    "Siebte Lektion: Die heilige Damiana",
    "Geschichte der Reue",
    "Nächste Lektion: Die Nacht von Apocalypse (Abughalamsis-Nacht)"
]

# Translate and Compare
llm_translations = []

for arabic in arabic_texts:
    console.rule("Original Arabic")
    console.print(arabic, style="bold green")

    llm_output = translator.translate(arabic)
    llm_translations.append(llm_output)

    console.rule("LLM German Translation")
    console.print(llm_output, style="bold blue")

# --- Automated BLEU Evaluation ---
bleu = corpus_bleu(llm_translations, [human_translations])
print("\n--- BLEU Evaluation Results ---")
print(f"BLEU Score: {bleu.score:.2f}")

# --- BERTScore Evaluation ---
P, R, F1 = bert_score(llm_translations, human_translations, lang="de", verbose=True)

print("\n--- BERTScore Evaluation Results ---")
print(f"Average Precision: {P.mean():.4f}")
print(f"Average Recall:    {R.mean():.4f}")
print(f"Average F1:        {F1.mean():.4f}")

# Comparison Table
comparison_df = pd.DataFrame({
    "Arabic": arabic_texts,
    "Human German": human_translations,
    "LLM German": llm_translations,
    "BERTScore F1": F1.tolist()
})

comparison_df



--- BLEU Evaluation Results ---
BLEU Score: 14.60
calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  attn_output = torch.nn.functional.scaled_dot_product_attention(


  0%|          | 0/1 [00:00<?, ?it/s]

done in 0.39 seconds, 12.69 sentences/sec

--- BERTScore Evaluation Results ---
Average Precision: 0.8323
Average Recall:    0.7751
Average F1:        0.8004


Unnamed: 0,Arabic,Human German,LLM German,BERTScore F1
0,"آية الدرس: ""تنضح علىَّ بزوفاك فأطهر. تغسلني فأ...","Lehrvers: ""Entsündige mich mit Ysop, dann werd...","Die Lektion lautet: ""Du wirst mir dein Schwert...",0.745416
1,الأسبوع الثاني من شهر برمهات (مارس),Die zweite Woche des Monats Baramhat (März),"Die zweite Woche des Monats, März.",0.851452
2,الدرس السابع: القديسة دميانة,Siebte Lektion: Die heilige Damiana,Lektion 7: Die Heilige Dame.,0.796391
3,قصة توبة,Geschichte der Reue,Eine Geschichte der Reue.,0.856793
4,الدرس القادم: ليلة أبوغلمسيس,Nächste Lektion: Die Nacht von Apocalypse (Abu...,Nächste Lektion:,0.751939


### Upload Sundyschool Lesson Translation

In [25]:
# Instantiate LLM Translator
translator = ArabicToGermanTranslator()
console = Console()

doc_path = "../docs/processed/Zweite Woche des Monats Baramhat (März)_Maria die Ägypterin.docx"
german_start = "===GERMAN START==="  # adjust based on your document

processor = DocumentProcessor(doc_path, german_start)
arabic_paragraphs, human_translations = processor.extract_text()

console.rule("Original Arabic")
for a in arabic_paragraphs:
    console.print(a, style="bold green")

console.rule("Human Translation")
for h in human_translations:
    console.print(h, style="bold blue")

Device set to use cuda:0


### Translation Lesson using LLM Model

In [21]:
# Translate and Compare
llm_translations = []

for arabic in arabic_paragraphs:  # limit for now
    llm_translation = translator.translate(arabic)
    llm_translations.append(llm_translation)

console.rule("LLM Translation")
for l in llm_translations:
    console.print(l, style="bold yellow")

### Test LLM translation vs. Human translation using BLEU and BERT Score

In [24]:
# --- Automated BLEU Evaluation ---
bleu = corpus_bleu(llm_translations, [human_translations])
print("\n--- BLEU Evaluation Results ---")
print(f"BLEU Score: {bleu.score:.2f}")

print(f"LLM translations: {len(llm_translations)}")
print(f"Human translations: {len(human_translations)}")


# --- BERTScore Evaluation ---
P, R, F1 = bert_score(llm_translations, human_translations, lang="de", verbose=True)

print("\n--- BERTScore Evaluation Results ---")
print(f"Average Precision: {P.mean():.4f}")
print(f"Average Recall:    {R.mean():.4f}")
print(f"Average F1:        {F1.mean():.4f}")

# Comparison Table
comparison_df = pd.DataFrame({
    "Arabic": arabic_texts,
    "Human German": human_translations,
    "LLM German": llm_translations,
    "BERTScore F1": F1.tolist()
})

comparison_df


--- BLEU Evaluation Results ---
BLEU Score: 1.92
LLM translations: 29
Human translations: 34


AssertionError: Different number of candidates and references