In [None]:
# Errori nelle traduzioni da fixare
"""
    gemma detailed : fornisce anche la spiegazione della risposta
    llama3 tutti: fornisce anche la spiegazione nella risposta, ancora piu dettagliata in detailed and role_based

"""
#####
# Note 
"""
    Cerb detailed e role based fa una traduzione in inglese (scrivere nel report se persiste)
    Cerb teacher stud alcune inglese e alcune non solo traduzione 

"""

### Install libraries

In [None]:
%pip install transformers accelerate torch
%pip install ollama

In [None]:
import json

# open the config file 
CONFIG_FILE_PATH = "config.json"

try:
    with open(CONFIG_FILE_PATH, 'r', encoding='utf-8') as f:
        config = json.load(f)

    models = config.get('models', [])
    prompts = config.get('prompts', [])

    print("Config success:")
    print(f"Models: {models}")
    print(f"Prompts: {prompts}")

except FileNotFoundError:
    print(f"Error: Config file '{CONFIG_FILE_PATH}' not found.")
    models = []
    prompts = []
except json.JSONDecodeError:
    print(f"Error: File '{CONFIG_FILE_PATH}' not a valid JSON.")
    models = []
    prompts = []

print("Configuration loaded successfully.")
print(f"Models: {models}")

## Define parameters

In [None]:
import pandas as pd
from tqdm import tqdm

dataset_path = "dataset/dataset_cleaned.csv"
df = pd.read_csv(dataset_path) #creation of dataframe
output_path = "dataset/output.csv"
col_name = "Sentence"
# models loaded from config file
#prompts
prompt_templates={
    "base":        "Translate the following sentence from archaic italian to modern italian:\n\n{sentence}\n\n.The output must be ONLY the translated sentence in modern italian:",
    "detailed": "The following text is written in archaic Italian from the 13th century, originating from the Tuscany region. Rewrite it in modern Italian while preserving the original meaning, clarity, and syntactic coherence. First analyze the structure, then identify key words, then write the final version:\n\n {sentence}\n\nThe output must be ONLY the translated sentence in modern italian:",
    "role-based":   "You are an expert linguist specializing in the evolution of Italian language. Translate this 13th century Tuscan text to contemporary Italian while preserving the original meaning, clarity, and syntactic coherence: \n\n{sentence}\n\nThe output must be ONLY the translated sentence in modern italian:",
    "few_shot": (
            "Here are some examples of sentences in archaic Italian from the 13th century translated into modern Italian:\n\n"
            "Archaic Italian: «quella guerra ben fatta l' opera perché etc.». Modern Italian: «quella guerra fu condotta bene, e l'opera fu compiuta come previsto.».\n"
            "Archaic Italian: «crudele, e di tutte le colpe pigli vendetta». Modern Italian: «crudele, e si vendica di tutte le colpe.»\n"
            "Archaic Italian: «Non d' altra forza d' animo fue ornato Ponzio Aufidiano». Modern Italian: «Ponzio Aufidiano non era dotato di altro vigore d’animo.»\n\n"
            "Now translate the following sentence from archaic italian to modern italian while preserving the original meaning:\n\n"
            "{sentence}\n\n.The output must be ONLY the translated sentence in modern italian:"
                ),
    "teacher_student": (
            "A student asked: 'What does this old Italian sentence mean in modern language?'\n"
            "You, a university professor of historical linguistics, respond with a clear and faithful modern Italian translation\n\n{sentence}\n\nThe output must be ONLY the translated sentence in modern italian:"
)
}


## Translate the sentences

In [None]:
from ollama import Client

client = Client(host='http://localhost:11434')#client to local ollama

for model_name in models:#iterate through models
    for prompt_name, prompt_template in prompt_templates.items():#iterate through prompt templates
        print(f"\n Translation with model: {model_name} | prompt: {prompt_name}")
        translations = []#list to store translations
        for sentence in tqdm(df[col_name]):#iterate through sentences
            try:
                prompt = prompt_template.format(sentence=sentence)#give the prompt

                response = client.chat(
                    model=model_name,
                    messages=[{"role": "user", "content": prompt}]
                )#ollama api call

                translation = response['message']['content'].strip()#extract the translation


            except Exception as e:
                translation = f"[ERROR]: {e}"
            translations.append(translation)#append the translation to the list

        df["translation"] = translations# add the translations to the dataframe
        # TODO
        if model_name == "qwen:7B":
            output_file = f"translations/translation_qwen_{prompt_name}.csv"# create the output file name
        elif model_name == "galatolo/cerbero-7b":
            output_file = f"translations/translation_cerbero_{prompt_name}.csv"
        else:
            output_file = f"translations/translation_{model_name}_{prompt_name}.csv"# create the output file name

        df.to_csv(output_file, index=False)# save the dataframe to a csv file
        print(f"Translation saved in '{output_file}'")      