## 1. Importing Libraries

In [None]:
!pip install bitsandbytes

import pandas as pd
import torch
from datasets import Dataset, DatasetDict
from transformers import pipeline,AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
login(token="your_key")

## 2. Importing the Test Set

In [None]:
df = pd.read_csv("test.csv", sep=";")
df = df.rename(columns={"Sentence": "source", "Traductions": "target"})

dataset = Dataset.from_pandas(df)

## 3. Importing the Model

In [None]:
model_name = "meta-llama/Meta-Llama-3-8B"
#model_name = "sapienzanlp/Minerva-7B-base-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             torch_dtype=torch.bfloat16,
                                             device_map="auto")
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

## 4. Prometheus Evaluator
Since the PROMETHEUS model used with VLLM has 7 billion parameters, we attempted to load it using
the Hugging Face Transformers library and then quantize it in order to reduce memory usage and improve inference efficiency.   
To address this:

 - We used the Hugging Face Transformers library to load the model, as it provides a standardized interface for accessing pretrained weights and integrating them into existing pipelines.

 - We then applied quantization, a common technique that reduces the numerical precision of the model weights , with the goal of:

        - Lowering memory consumption

        - Speeding up inference

        - Maintaining reasonable accuracy

In [None]:
from prometheus import PrometheusEval_AtM

evaluator = PrometheusEval_AtM(quantized = True, device = device)

## 5. Examples for In-Context_Learning

In [None]:
few_shot_examples =

"""

Testo antico: "quella guerra ben fatta l' opera perché etc. Et dall' altra parte Aiaces era uno cavaliere franco e prode all' arme, di gran guisa, ma non era pieno di grande senno"
Traduzione moderna: "Quella guerra fu ben condotta per raggiungere il suo scopo. Dall'altra parte, Aiace era un cavaliere leale e valoroso nelle armi, di grande statura, ma non molto saggio."

Testo antico: crudele, e di tutte le colpe pigli vendetta, come dice la legge, ed a neuno cavaliere perdoni che pecchi."
Traduzione moderna: "È crudele e si vendica di ogni colpa, come stabilisce la legge, e non perdona alcun cavaliere che commetta un errore."

Testo antico: "Non d' altra forza d' animo fue ornato Ponzio Aufidiano, romano cavaliere."
Traduzione moderna: "Ponzio Aufidiano, cavaliere romano, non era dotato di un coraggio superiore."

"""

## 6. Translating using In-Context-Learning

In [None]:
import re
i = 1

source_sentences = []
predicted_sentences = []
gold_sentences = [] #annoted by hand
prometheus_score = []
df_col = ["source_sentences", "gold_sentences","predicted_sentences", "prometheus_score","GPT_score","user_score"]

i = 1
for sample in dataset:


    input_sentence = sample["source"]
    target_sentence = sample["target"]

    author = sample["Author"]
    date = sample["Date"]
    region = sample["Region"]

    #user_prompt = prompt.format(author=author, date=date, region=region, input_sentence=input_sentence)
    user_prompt = f"""

    {few_shot_examples}

    Testo antico: "{input_sentence}"
    Traduzione moderna:
    """

    output = pipe(user_prompt.strip(), max_new_tokens=200, do_sample=False)[0]["generated_text"]

    if "Traduzione moderna:" in output:
        translation = output.split("Traduzione moderna:")[-1].strip()
    else:
        translation = output.strip()
    translation = translation.split('"')[1]

    evaluation = evaluator.getEvaluation(input_sentence, translation, target_sentence)
    match_ = re.search(r'\[RESULT\]\s*(\d)', evaluation)
    if match_:
      result = int(match_.group(1))

    else:
      result = 0

    source_sentences.append(input_sentence)
    predicted_sentences.append(translation)
    gold_sentences.append(target_sentence)
    prometheus_score.append(result)

    print(f"Sentence {i}")
    print(f"\tItaliano Arcaico -> {input_sentence}")
    print(f"\tItaliano moderno -> {translation}")
    print(f"\tGOLD LABEL       -> {target_sentence}")
    print(f"\tEVALUATION       -> {result}")
    print(f"-----------------------------------------")
    i+=1

z = [0 for _ in range(len(dataset))]
GPT_score,user_score = z,z
df = pd.DataFrame(list(zip(source_sentences, gold_sentences,predicted_sentences,prometheus_score,GPT_score,user_score)), columns=df_col)

df.to_csv("test_results_base.csv", sep=";")