In [None]:
from datasets import load_dataset
import string
import pandas as pd
from tqdm import tqdm
from sacrebleu.metrics import BLEU, CHRF, TER
from transformers import pipeline
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "1"

In [2]:
# Load metrics
metric_bleu = BLEU(effective_order=True)
metric_chrf = CHRF()
metric_ter = TER()

In [48]:
def process_txt(text):
  translator = str.maketrans('', '', string.punctuation)
  return text.translate(translator).strip()

def evaluate(df, MODELS) -> pd.DataFrame:
    for model_path in MODELS:
        pipe = pipeline("translation", model=model_path, max_length=512, device="cuda")
        
        for i in tqdm(df.index):
            reference = df.loc[i, 'Darija']
            english_text = df.loc[i, 'English']
            translation = pipe(english_text)[0]["translation_text"]
            
            refs = [process_txt(reference)]
            hyp = process_txt(translation)

            df.loc[i, f'translated_{model_path.split('/')[-1]}'] = translation
            df.loc[i, f'BLEU_{model_path.split('/')[-1]}'] = metric_bleu.sentence_score(references=refs, hypothesis=hyp).score
            df.loc[i, f'CHRF_{model_path.split('/')[-1]}'] = metric_chrf.sentence_score(references=refs, hypothesis=hyp).score
            df.loc[i, f'TER_{model_path.split('/')[-1]}'] = metric_ter.sentence_score(references=refs, hypothesis=hyp).score
            
        avg_bleu_score = df[f'BLEU_{model_path.split('/')[-1]}'].mean()
        avg_chrf_score = df[f'CHRF_{model_path.split('/')[-1]}'].mean()
        avg_ter_score = df[f'TER_{model_path.split('/')[-1]}'].mean()
        
        print(f'[INFO] For model: {model_path}')
        print(f'bleu_score: {avg_bleu_score}')
        print(f'chrf_score: {avg_chrf_score}')
        print(f'ter_score: {avg_ter_score}')
        print('-'*10)

    return df

In [49]:
bench = load_dataset("atlasia/TerjamaBench", split='test').to_pandas()

In [50]:
bench.head()

Unnamed: 0,topic,subtopic,Arabizi,English,Darija,annotator_dialect
0,dialect_variation,marrakech,lays3d lmasa,good evening,الله يسعد الماسا,Marrakech
1,dialect_variation,marrakech,lays3d saba7,good morning,الله يسعد الصباح,Marrakech
2,dialect_variation,marrakech,bit nmchi ndrb chi 9siyes flmdina,I’m heading to the old medina to eat something,بيت نمشي نضرب شي قسيس فالمدينة,Marrakech
3,dialect_variation,marrakech,aji lhad jih,come here,أجي لهاد جيه,Marrakech
4,dialect_variation,marrakech,achawa had ti9i9t lioma,It’s scorching hot today!,أشاوا هاد تيقيقت ليوما,Marrakech


In [51]:
MODELS = [
    "BounharAbdelaziz/Terjman-Nano-v2.2",
    "BounharAbdelaziz/Terjman-Large-v2.2",
]

In [52]:
results_df = evaluate(bench, MODELS)

Device set to use cuda
100%|██████████| 850/850 [01:06<00:00, 12.72it/s]


[INFO] For model: BounharAbdelaziz/Terjman-Nano-v2.2
bleu_score: 18.77464106466772
chrf_score: 38.41039387371171
ter_score: 100.73932218279197
----------


Device set to use cuda
100%|██████████| 850/850 [02:46<00:00,  5.11it/s]

[INFO] For model: BounharAbdelaziz/Terjman-Large-v2.2
bleu_score: 20.577393481575847
chrf_score: 41.9021347455449
ter_score: 81.81540525237182
----------





In [54]:
results_df.head()

Unnamed: 0,topic,subtopic,Arabizi,English,Darija,annotator_dialect,translated_Terjman-Nano-v2.2,BLEU_Terjman-Nano-v2.2,CHRF_Terjman-Nano-v2.2,TER_Terjman-Nano-v2.2,translated_Terjman-Large-v2.2,BLEU_Terjman-Large-v2.2,CHRF_Terjman-Large-v2.2,TER_Terjman-Large-v2.2
0,dialect_variation,marrakech,lays3d lmasa,good evening,الله يسعد الماسا,Marrakech,مساء الخير,0.0,10.470546,100.0,مساء الخير,0.0,10.470546,100.0
1,dialect_variation,marrakech,lays3d saba7,good morning,الله يسعد الصباح,Marrakech,صباح الخير,0.0,19.23267,100.0,صباح الخير,0.0,19.23267,100.0
2,dialect_variation,marrakech,bit nmchi ndrb chi 9siyes flmdina,I’m heading to the old medina to eat something,بيت نمشي نضرب شي قسيس فالمدينة,Marrakech,أنا غادي للمدينة القديمة باش ناكل شي حاجة,5.522398,22.110967,133.333333,انا غادي للمدينة القديمة باش ناكل شي حاجة,5.522398,22.110967,133.333333
3,dialect_variation,marrakech,aji lhad jih,come here,أجي لهاد جيه,Marrakech,تعال هنا,0.0,7.303708,100.0,أجي لهنا,30.326533,33.66846,66.666667
4,dialect_variation,marrakech,achawa had ti9i9t lioma,It’s scorching hot today!,أشاوا هاد تيقيقت ليوما,Marrakech,الجو حار بزاف اليوم!,0.0,14.140764,100.0,راه سخون بزاف اليوم!,0.0,15.09862,100.0
