In [None]:
# Errori nelle traduzioni da fixare
"""
    gemma detailed : fornisce anche la spiegazione della risposta
    llama3 tutti: fornisce anche la spiegazione nella risposta, ancora piu dettagliata in detailed and role_based

"""
#####
# Note 
"""
    Cerb detailed e role based fa una traduzione in inglese (scrivere nel report se persiste)
    Cerb teacher stud alcune inglese e alcune non solo traduzione 

"""

### Install libraries

In [None]:
%pip install transformers accelerate torch
%pip install ollama

In [None]:
import json

# open the config file 
CONFIG_FILE_PATH = "config.json"

try:
    with open(CONFIG_FILE_PATH, 'r', encoding='utf-8') as f:
        config = json.load(f)

    models = config.get('models', [])
    prompts = config.get('prompts', [])

    print("Config success:")
    print(f"Models: {models}")
    print(f"Prompts: {prompts}")

except FileNotFoundError:
    print(f"Error: Config file '{CONFIG_FILE_PATH}' not found.")
    models = []
    prompts = []
except json.JSONDecodeError:
    print(f"Error: File '{CONFIG_FILE_PATH}' not a valid JSON.")
    models = []
    prompts = []

print("Configuration loaded successfully.")
print(f"Models: {models}")

Config success:
Models: ['galatolo/cerbero-7b', 'gemma', 'llama3']
Prompts: ['base', 'detailed', 'few_shot', 'role_based', 'teacher_student']
Configuration loaded successfully.
Models: ['galatolo/cerbero-7b', 'gemma', 'llama3']


## Define parameters

In [18]:
import pandas as pd
from tqdm import tqdm

dataset_path = "dataset/dataset_cleaned.csv"
df = pd.read_csv(dataset_path) #creation of dataframe
output_path = "dataset/output.csv"
col_name = "Sentence"
# models loaded from config file
#prompts
prompt_templates={
    "base":        "Translate the following sentence from archaic italian to modern italian:\n\n{sentence}\n\n.The output must be ONLY the translated sentence in modern italian:",
    "detailed": "The following text is written in archaic Italian from the 13th century, originating from the Tuscany region. Rewrite it in modern Italian while preserving the original meaning, clarity, and syntactic coherence. First analyze the structure, then identify key words, then write the final version:\n\n {sentence}\n\nThe output must be ONLY the translated sentence in modern italian:",
    "role-based":   "You are an expert linguist specializing in the evolution of Italian language. Translate this 13th century Tuscan text to contemporary Italian while preserving the original meaning, clarity, and syntactic coherence: \n\n{sentence}\n\nThe output must be ONLY the translated sentence in modern italian:",
    "few_shot": (
            "Here are some examples of sentences in archaic Italian from the 13th century translated into modern Italian:\n\n"
            "Archaic Italian: «quella guerra ben fatta l' opera perché etc.». Modern Italian: «quella guerra fu condotta bene, e l'opera fu compiuta come previsto.».\n"
            "Archaic Italian: «crudele, e di tutte le colpe pigli vendetta». Modern Italian: «crudele, e si vendica di tutte le colpe.»\n"
            "Archaic Italian: «Non d' altra forza d' animo fue ornato Ponzio Aufidiano». Modern Italian: «Ponzio Aufidiano non era dotato di altro vigore d’animo.»\n\n"
            "Now translate the following sentence from archaic italian to modern italian while preserving the original meaning:\n\n"
            "{sentence}\n\n.The output must be ONLY the translated sentence in modern italian:"
                ),
    "teacher_student": (
            "A student asked: 'What does this old Italian sentence mean in modern language?'\n"
            "You, a university professor of historical linguistics, respond with a clear and faithful modern Italian translation\n\n{sentence}\n\nThe output must be ONLY the translated sentence in modern italian:"
)
}


## Translate the sentences

In [None]:
from ollama import Client

client = Client(host='http://localhost:11434')#client to local ollama

for model_name in models:#iterate through models
    for prompt_name, prompt_template in prompt_templates.items():#iterate through prompt templates
        print(f"\n Translation with model: {model_name} | prompt: {prompt_name}")
        translations = []#list to store translations
        for sentence in tqdm(df[col_name]):#iterate through sentences
            try:
                prompt = prompt_template.format(sentence=sentence)#give the prompt

                response = client.chat(
                    model=model_name,
                    messages=[{"role": "user", "content": prompt}]
                )#ollama api call

                translation = response['message']['content'].strip()#extract the translation


            except Exception as e:
                translation = f"[ERROR]: {e}"
            translations.append(translation)#append the translation to the list

        df["translation"] = translations# add the translations to the dataframe
        # TODO
        if model_name == "qwen:7B":
            output_file = f"translations/translation_qwen_{prompt_name}.csv"# create the output file name
        elif model_name == "galatolo/cerbero-7b":
            output_file = f"translations/translation_cerbero_{prompt_name}.csv"
        else:
            output_file = f"translations/translation_{model_name}_{prompt_name}.csv"# create the output file name

        df.to_csv(output_file, index=False)# save the dataframe to a csv file
        print(f"Translation saved in '{output_file}'")      

## Evaluation with Prometheus

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "Unbabel/M-Prometheus-3B"
cache_dir = "./models/m_prometheus_3b"  #local directory to cache the model

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=cache_dir)

### Evaluation prompt

In [None]:
evaluation_prompt_template = """
You are evaluating the quality of a modern Italian translation based on an original archaic Italian sentence.

Original Archaic Sentence:
{original}

Candidate Modern Translation:
{translation}

Please score the "Candidate Modern Translation" using the following 1-5 scale:

1. **Completely Unacceptable Translation:** Meaningless or totally wrong.
2. **Severe Semantic Errors/Omissions:** Big meaning problems, serious issues.
3. **Partially Wrong Translation / Lackluster:** Understandable but flawed.
4. **Good Translation:** Mostly correct, minor issues.
5. **Perfect Translation:** Fully correct, fluent, natural.

Based on the above criteria, respond only with the numeric score (1 to 5). Do not explain or write anything else.
"""

### Give the scores

In [None]:
from transformers import pipeline
import torch
from transformers import TextGenerationPipeline 
judge_pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer, device=0)# Use GPU if available, otherwise CPU

# Function to extract score from the model's output
def extract_score(output_text):
    try:
        score = int(output_text.strip().split()[0])
        if 1 <= score <= 5:
            return score
    except:
        pass
    return "[INVALID OUTPUT]"

for model_name in models:# iterate through translations
    for prompt_name in prompt_templates:
        # TODO

        if model_name == "qwen:7B":
            file_path = f"translations/translation_qwen_{prompt_name}.csv"
        elif model_name == "galatolo/cerbero-7b":
            file_path = f"translations/translation_cerbero_{prompt_name}.csv"
        else:
            file_path = f"translations/translation_{model_name}_{prompt_name}.csv"
        df_translation = pd.read_csv(file_path)

        scores = []# list to store scores
        for _, row in tqdm(df_translation.iterrows(), total=len(df_translation)):
            original = row[col_name]
            translation = row["translation"]

            prompt = evaluation_prompt_template.format(original = original, translation=translation)# create the prompt for evaluation

            output = judge_pipeline(prompt, max_new_tokens=10, do_sample=False)[0]["generated_text"]# generate the output using the model
            score = extract_score(output[len(prompt):])  # extract the score from the output
            scores.append(score)# append the score to the list

        df_translation["Score"] = scores# add the scores to the dataframe
        # TODO
        #save the dataframe with scores
        if model_name == "qwen:7B":
            df_translation.to_csv(f"scores_prometheus/scored_qwen_{prompt_name}.csv", index=False)
        elif model_name == "galatolo/cerbero-7b":
            df_translation.to_csv(f"scores_prometheus/scored_cerbero_{prompt_name}.csv", index=False)
        else:
            df_translation.to_csv(f"scores_prometheus/scored_{model_name}_{prompt_name}.csv", index=False)
        print(f"PScores saved in 'scored_{model_name}_{prompt_name}.csv'")

### Average scores

In [None]:
import os
import pandas as pd

results = []
for model_name in models:#iterate through scores
    for prompt_name in prompt_templates.keys():
        # TODO

        if model_name == "qwen:7B":
            file_path = f"scores_prometheus/scored_qwen_{prompt_name}.csv"
            short_model = "qwen"
        elif model_name == "galatolo/cerbero-7b":
            file_path = f"scores_prometheus/scored_cerbero_{prompt_name}.csv"
            short_model = "cerbero"
        else:
            file_path = f"scores_prometheus/scored_{model_name}_{prompt_name}.csv"
            short_model = model_name.replace("/", "").replace(":", "_")

        # Check if the file exists and read it
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)
            if "score" in df.columns:
                mean_score = df["score"].mean()# calculate the mean score
                results.append({
                    "model": short_model,
                    "prompt": prompt_name,
                    "mean_score": mean_score
                })
            else:
                print(f"Column'score' not found in {file_path}")
        else:
            print(f"file not found in {file_path}")

df_results = pd.DataFrame(results)# create a dataframe with the results

os.makedirs("scores", exist_ok=True)
df_results.to_csv("final_scores_prometheus.csv", index=False)# save the results to a csv file
