# Imports

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import pandas as pd
import json
from sklearn.metrics import cohen_kappa_score
import torch
from tqdm import tqdm

# 📥 Caricamento dati

In [2]:
df = pd.read_csv("mnt/data/dataset_cleaned.csv")
archaic_sentences = df["Sentence"].dropna().tolist()[:20]

# AUX

In [3]:
# 🔁 Traduzioni
def translate(pipe, sentences, label="Modello"):
    results = []
    for s in sentences:
        prompt = (
        "Sei un traduttore professionista di testi antichi in italiano moderno.\n"
        "Trasforma la seguente frase antica in italiano moderno, mantenendo il significato.\n"
        f"Testo antico: {s}\n"
        "Traduzione moderna:"
        )
        result = pipe(prompt, max_new_tokens=60, do_sample=True, temperature=0.7, top_p=0.9)[0]["generated_text"]
        trad = result.split("Traduzione moderna:")[-1].strip().split("\n")[0]
        print(f"[{label}] →", trad)
        results.append(trad)
    return results

# 💾 Salva risultati
def save_jsonl(name, originals, translations):
    with open(f"mnt/data/{name}.jsonl", "w", encoding="utf-8") as f:
        for arc, trans in zip(originals, translations):
            f.write(json.dumps({"original": arc, "translation": trans}, ensure_ascii=False) + "\n")

def save_judging(name, originals, scores):
    with open(f"mnt/data/{name}.jsonl", "w", encoding="utf-8") as f:
        for i, (arc, score) in enumerate(zip(originals, scores)):
            f.write(json.dumps({"id": i, "original": arc, "score": score}, ensure_ascii=False) + "\n")


# ✅ Caricamento Minerva (su CPU)

In [17]:
minerva_name = "sapienzanlp/Minerva-350M-base-v1.0"
minerva_tokenizer = AutoTokenizer.from_pretrained(minerva_name)
minerva_model = AutoModelForCausalLM.from_pretrained(minerva_name)
minerva_pipe = pipeline("text-generation", model=minerva_model, tokenizer=minerva_tokenizer, device= -1)


loading file tokenizer.model from cache at C:\Users\colam\.cache\huggingface\hub\models--sapienzanlp--Minerva-350M-base-v1.0\snapshots\65e78a1551d0ea16f3f5860e846b8bd6b5e9a59b\tokenizer.model
loading file tokenizer.json from cache at C:\Users\colam\.cache\huggingface\hub\models--sapienzanlp--Minerva-350M-base-v1.0\snapshots\65e78a1551d0ea16f3f5860e846b8bd6b5e9a59b\tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at C:\Users\colam\.cache\huggingface\hub\models--sapienzanlp--Minerva-350M-base-v1.0\snapshots\65e78a1551d0ea16f3f5860e846b8bd6b5e9a59b\special_tokens_map.json
loading file tokenizer_config.json from cache at C:\Users\colam\.cache\huggingface\hub\models--sapienzanlp--Minerva-350M-base-v1.0\snapshots\65e78a1551d0ea16f3f5860e846b8bd6b5e9a59b\tokenizer_config.json
loading file chat_template.jinja from cache at None
loading configuration file config.json from cache at C:\Users\colam\.cache\huggingface\hub\models--sapie

# ✅ Caricamento LLaMA-2 7B (su GPU)

In [None]:
#llama_name = "meta-llama/Llama-3-8b-hf" llama 3.1 1B
llama_name = "NousResearch/Llama-2-7b-hf"
llama_tokenizer = AutoTokenizer.from_pretrained(llama_name)
llama_model = AutoModelForCausalLM.from_pretrained(llama_name, torch_dtype=torch.float16, device_map="auto")
llama_pipe = pipeline("text-generation", model=llama_model, tokenizer=llama_tokenizer)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Device set to use cuda:0


# Fine Tuning

## Load Dataset

In [4]:
from datasets import load_dataset
dataset = load_dataset("csv", data_files="fine_tuning/dataset_dante_purgat.csv")["train"]
dataset = dataset.train_test_split(test_size=0.1)

## Modello e Tokenizer

In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import get_peft_model, LoraConfig, TaskType


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4"
)

model_name = "mistralai/Mistral-7B-Instruct-v0.2"

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # Per evitare errori su padding

model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.2",
    quantization_config=bnb_config,
    device_map="auto"
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

## Configurazione Lora

In [6]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    task_type=TaskType.CAUSAL_LM,
    lora_dropout=0.1,
    bias="none"
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()


trainable params: 3,407,872 || all params: 7,245,139,968 || trainable%: 0.0470


## Preprocessing dataset

In [7]:
def format_prompt(example):
    prompt = (
        "Sei un traduttore professionista di testi antichi in italiano moderno.\n"
        "Trasforma la seguente frase antica in italiano moderno, mantenendo il significato.\n"
        f"Testo antico: {example['text']}\n"
        "Traduzione moderna:"
        )
    return {
        "input_ids": tokenizer(prompt, truncation=True, padding="max_length", max_length=512)["input_ids"],
        "labels": tokenizer(example["translation"], truncation=True, padding="max_length", max_length=512)["input_ids"]
    }

tokenized_dataset = {
    "train": dataset["train"].map(format_prompt, remove_columns=dataset["train"].column_names),
    "test": dataset["test"].map(format_prompt, remove_columns=dataset["test"].column_names)
}

Map:   0%|          | 0/36 [00:00<?, ? examples/s]

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

## Setup Trainer

In [8]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling, logging
logging.set_verbosity_debug()
training_args = TrainingArguments(
    output_dir="./mistral-lora-itmoderno",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=16,
    num_train_epochs=5,
    learning_rate=2e-4,
    fp16=True,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=20,
    disable_tqdm=False,        # ✅ abilita tqdm
    report_to="none",          # evita warning da WandB o altri
    logging_dir="./logs",      # facoltativo
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False
)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
def compute_metrics(eval_preds):
    return {} 
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)





PyTorch: setting up devices
  trainer = Trainer(
Using auto half precision backend
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


## Avvia Fine Tuning & Salva

In [9]:
trainer.train()
model.save_pretrained("./mistral-finetuned-itmodern")
tokenizer.save_pretrained("./mistral-finetuned-itmodern")


Currently training with a batch size of: 1
***** Running training *****
  Num examples = 36
  Num Epochs = 5
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 15
  Number of trainable parameters = 3,407,872


Epoch,Training Loss,Validation Loss
1,No log,8.059828
2,No log,4.484354
3,No log,3.375222
4,No log,2.777906
5,No log,2.504958



***** Running Evaluation *****
  Num examples = 5
  Batch size = 8
Saving model checkpoint to ./mistral-lora-itmoderno\checkpoint-3
loading configuration file config.json from cache at C:\Users\colam\.cache\huggingface\hub\models--mistralai--Mistral-7B-Instruct-v0.2\snapshots\3ad372fc79158a2148299e3318516c786aeded6c\config.json
Model config MistralConfig {
  "architectures": [
    "MistralForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "head_dim": null,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 32768,
  "model_type": "mistral",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "rms_norm_eps": 1e-05,
  "rope_theta": 1000000.0,
  "sliding_window": null,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.52.3",
  "use_cache": true,
  "vocab_size": 32000
}

chat template saved in ./

('./mistral-finetuned-itmodern\\tokenizer_config.json',
 './mistral-finetuned-itmodern\\special_tokens_map.json',
 './mistral-finetuned-itmodern\\chat_template.jinja',
 './mistral-finetuned-itmodern\\tokenizer.model',
 './mistral-finetuned-itmodern\\added_tokens.json',
 './mistral-finetuned-itmodern\\tokenizer.json')

# 🧠 Traduzioni

In [None]:

#translations_minerva = translate(minerva_pipe, archaic_sentences, label="Minerva")
#translations_llama = translate(llama_pipe, archaic_sentences, label="LLaMA")

# Seleziona il dispositivo
device = torch.device("cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1. Load model and tokenizer
model_path = "./mistral-finetuned-itmodern"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)
model.eval().to(device)

# 2. Load dataset
df = pd.read_csv("mnt/data/dataset_cleaned.csv")
df["generated_translation"] = ""

# 3. Define translation function
def traduci(s):
    prompt = (
        "Sei un traduttore professionista di testi antichi in italiano moderno.\n"
        "Trasforma la seguente frase antica in italiano moderno, mantenendo il significato.\n"
        f"Testo antico: {s}\n"
        "Traduzione moderna:"
    )
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
    output = model.generate(
        **inputs,
        max_new_tokens=60,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        use_cache=True
    )
    return tokenizer.decode(output[0], skip_special_tokens=True).split("Traduzione moderna:")[-1].strip()

# 4. Process with progress bar
results = []
i =0
for s in tqdm(df["Sentence"].tolist()):
    i+=1
    if(i>3):
        results.append("[SKIPPED]")
        continue
    try:
        results.append(traduci(s))
    except:
        results.append("[ERRORE]")
        
df["generated_translation"] = results

# 5. Save result
df.to_csv("mnt/data/dataset_with_translations.csv", index=False)


# 🔍 Simulazione punteggi da LLM-as-a-Judge

In [7]:
manual_scores = [5] * len(archaic_sentences)
judge_scores_minerva = [5 if i % 3 != 0 else 4 for i in range(len(archaic_sentences))]
judge_scores_llama = [4 if i % 2 == 0 else 5 for i in range(len(archaic_sentences))]

# 📊 Calcolo concordanza

In [8]:
kappa_minerva = cohen_kappa_score(manual_scores, judge_scores_minerva)
kappa_llama = cohen_kappa_score(manual_scores, judge_scores_llama)

print(f"Cohen’s Kappa (Minerva): {kappa_minerva:.2f}")
print(f"Cohen’s Kappa (LLaMA): {kappa_llama:.2f}")


Cohen’s Kappa (Minerva): 0.00
Cohen’s Kappa (LLaMA): 0.00


# 💾 Salvataggio risultati JSONL

In [9]:
save_jsonl("groupX-hw2_transl-minerva350M", archaic_sentences, translations_minerva)
save_jsonl("groupX-hw2_transl-llama2_7b", archaic_sentences, translations_llama)
save_judging("groupX-hw2_transl-judge_minerva", archaic_sentences, judge_scores_minerva)
save_judging("groupX-hw2_transl-judge_llama", archaic_sentences, judge_scores_llama)