**Set up**

In [None]:
!pip install accelerate
!pip install rouge
!pip install torch
!pip install mistralai
!pip install transformers


In [None]:
from transformers import EncoderDecoderModel, BertTokenizer
from rouge import Rouge
import pandas as pd 
from tqdm import tqdm 
import torch
import os 
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage

**Load data**

In [None]:
PATH_TO_EVAL_DATA_CONTAINING_OCR_AND_ASR_OUTPUT = ""

In [None]:
eval_set = pd.read_csv(PATH_TO_EVAL_DATA_CONTAINING_OCR_AND_ASR_OUTPUT)

Generate summary using the reference model trained on the Goud dataset 

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
tokenizer = BertTokenizer.from_pretrained("Goud/DarijaBERT-summarization-goud", max_length=1000)
model = EncoderDecoderModel.from_pretrained("Goud/DarijaBERT-summarization-goud").to(device)

Calculate Rouge score

In [None]:
rouge_scorer = Rouge()
darija_score = rouge_scorer.get_scores(
    hyps=eval_set["Transformed_text"],
    refs=eval_set["summary"],
)
darija_score

In [None]:
def calculate_rouge_for_wholde_df(rouge_scores):
    sum_rouge1_r = sum_rouge1_p = sum_rouge1_f = 0
    sum_rouge2_r = sum_rouge2_p = sum_rouge2_f = 0
    sum_rougel_r = sum_rougel_p = sum_rougel_f = 0
    num_records = len(rouge_scores)
    for item in rouge_scores:
        sum_rouge1_r += item['rouge-1']['r']
        sum_rouge1_p += item['rouge-1']['p']
        sum_rouge1_f += item['rouge-1']['f']
        
        sum_rouge2_r += item['rouge-2']['r']
        sum_rouge2_p += item['rouge-2']['p']
        sum_rouge2_f += item['rouge-2']['f']
        
        sum_rougel_r += item['rouge-l']['r']
        sum_rougel_p += item['rouge-l']['p']
        sum_rougel_f += item['rouge-l']['f']

    mean_rouge1_r = sum_rouge1_r / num_records
    mean_rouge1_p = sum_rouge1_p / num_records
    mean_rouge1_f = sum_rouge1_f / num_records

    mean_rouge2_r = sum_rouge2_r / num_records
    mean_rouge2_p = sum_rouge2_p / num_records
    mean_rouge2_f = sum_rouge2_f / num_records

    mean_rougel_r = sum_rougel_r / num_records
    mean_rougel_p = sum_rougel_p / num_records
    mean_rougel_f = sum_rougel_f / num_records

    final_rouge = {
        'rouge-1': {'r': mean_rouge1_r, 'p': mean_rouge1_p, 'f': mean_rouge1_f},
        'rouge-2': {'r': mean_rouge2_r, 'p': mean_rouge2_p, 'f': mean_rouge2_f},
        'rouge-l': {'r': mean_rougel_r, 'p': mean_rougel_p, 'f': mean_rougel_f}
    }
    return final_rouge

In [None]:
darija_bert_rouge = calculate_rouge_for_wholde_df(darija_score)

Generate summary using the the fine-tuned model 

In [None]:
api_key = os.environ.get("MISTRAL_API_KEY")
client = MistralClient(api_key=api_key)
retrieved_jobs = client.jobs.retrieve("job_id")

In [None]:
def summarize_text(text):
    prompt = (

        "مهمتك هي تلخيص نص مكتوب باللهجة المغاربية. "
        "لا يجوز لك استخدام المعلومات إلا من النص المقدم لك، "
        "ولا يجوز لك استخدام أي معلومات خارجية. "
        "قم بتلخيص النص في 30 كلمة على الأكثر باللغة المغاربية."
    )
    messages = [
        ChatMessage(role="assistant", content=prompt),
        ChatMessage(role="user", content=text),
    ]
    chat_response = client.chat(
        model=model,
        messages=messages,
    )
    return chat_response.choices[0].message.content


In [None]:
eval_set['fine_tuned_summary'] = eval_set['Transformed_text'].apply(summarize_text)

In [None]:
fine_tuned_score = rouge_scorer.get_scores(
    hyps=eval_set["fine_tuned_summary"],
    refs=eval_set["summary"],
)
fine_tuned_rouge = calculate_rouge_for_wholde_df(fine_tuned_score)