# Imports

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import pandas as pd
import json
from sklearn.metrics import cohen_kappa_score

# 📥 Caricamento dati

In [None]:
df = pd.read_csv("mnt/data/dataset_cleaned.csv")
archaic_sentences = df["Sentence"].dropna().tolist()[:20]

# Minerva

# LLaMA

## 🔁 Inizializza LLaMA 3.1 8B

In [None]:
llama_name = "meta-llama/Llama-3-8b-hf"
llama_tokenizer = AutoTokenizer.from_pretrained(llama_name)
llama_model = AutoModelForCausalLM.from_pretrained(llama_name, device_map="auto")
llama_gen = pipeline("text-generation", model=llama_model, tokenizer=llama_tokenizer)

## 🧠 Traduzioni con LLaMA

In [None]:
translations_llama = []
for s in archaic_sentences:
    prompt = f"Traduci in italiano moderno: {s}\\nTraduzione:"
    result = llama_gen(prompt, max_new_tokens=60, do_sample=False)[0]["generated_text"]
    translated = result.split("Traduzione:")[-1].strip()
    translations_llama.append(translated)


# 🔍 Simulazione punteggi da LLM-as-a-Judge

In [None]:
manual_scores = [5] * len(archaic_sentences)
judge_scores_minerva = [5 if i % 3 != 0 else 4 for i in range(len(archaic_sentences))]
judge_scores_llama = [4 if i % 2 == 0 else 5 for i in range(len(archaic_sentences))]

# 📊 Calcolo concordanza

In [None]:
kappa_minerva = cohen_kappa_score(manual_scores, judge_scores_minerva)
kappa_llama = cohen_kappa_score(manual_scores, judge_scores_llama)

print(f"Cohen’s Kappa (Minerva): {kappa_minerva:.2f}")
print(f"Cohen’s Kappa (LLaMA): {kappa_llama:.2f}")


# 💾 Salvataggio risultati JSONL

In [None]:
def save_jsonl(name, originals, translations):
    with open(f"mnt/data/{name}.jsonl", "w", encoding="utf-8") as f:
        for arc, trans in zip(originals, translations):
            f.write(json.dumps({"original": arc, "translation": trans}, ensure_ascii=False) + "\\n")

def save_judging(name, originals, scores):
    with open(f"mnt/data/{name}.jsonl", "w", encoding="utf-8") as f:
        for i, (arc, score) in enumerate(zip(originals, scores)):
            f.write(json.dumps({"id": i, "original": arc, "score": score}, ensure_ascii=False) + "\\n")

save_jsonl("groupX-hw2_transl-minerva350M", archaic_sentences, translations_minerva)
save_jsonl("groupX-hw2_transl-llama3B", archaic_sentences, translations_llama)
save_judging("groupX-hw2_transl-judge_minerva", archaic_sentences, judge_scores_minerva)
save_judging("groupX-hw2_transl-judge_llama", archaic_sentences, judge_scores_llama)