# Imports

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM,BitsAndBytesConfig
import pandas as pd
from datasets import load_dataset
from sklearn.metrics import cohen_kappa_score
import torch
from tqdm import tqdm
from peft import get_peft_model, LoraConfig, TaskType
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling, logging
import glob
import os


  from .autonotebook import tqdm as notebook_tqdm


# 📥 Caricamento dati

In [None]:
dataset_path = "inputs/dataset.csv"

In [None]:
df = pd.read_csv(dataset_path)
archaic_sentences = df["Sentence"].dropna().tolist()[:20]

# 👨‍🏫 Fine Tuning

## Parametri fine tuning

In [None]:
# 📁 Percorso alla cartella con i file CSV
cartella_csvs = "fine_tuning/csvs"  
# 📁 Percorso alla cartella dove andrà il dataset concatenato
cartella_dataset_concatenato = "inputs/dataset_concatenato.csv"

training_epochs=15

model_name = "mistralai/Mistral-7B-Instruct-v0.2"
#model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
#model_name = "bigscience/bloomz-3b"

nuovi_token_max=100
temperatura=0.7
max_translations=0 # 0 = no limit, >0 = max number of translations to generate

def getPrompt(archaic_sentence):
    prompt = (
        "Sei un traduttore professionista di testi antichi in italiano moderno.\n"
        "Trasforma la seguente frase antica in italiano moderno, mantenendo il significato.\n"
        f"Testo antico: {archaic_sentence}\n"
        "Traduzione moderna:"
    )
    return prompt

## Load Dataset

In [None]:

# 🔍 Prende tutti i file .csv nella cartella
csv_files = glob.glob(os.path.join(cartella_csvs, "*.csv"))

# 📦 Carica e concatena tutti i file
dataframes = []
for file in csv_files:
    try:
        df = pd.read_csv(file)
        dataframes.append(df)
    except Exception as e:
        print(f"Errore nel file {file}: {e}")

# 📚 Unione verticale
df_finale = pd.concat(dataframes, ignore_index=True)

# 💾 Salvataggio
df_finale.to_csv(cartella_dataset_concatenato, index=False)

dataset = load_dataset("csv", data_files=cartella_dataset_concatenato)["train"]
dataset = dataset.train_test_split(test_size=0.1)



Generating train split: 296 examples [00:00, 29491.74 examples/s]


## Modello e Tokenizer

In [5]:

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4"
)


# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=True) #use_fast=True)

tokenizer.pad_token = tokenizer.eos_token  # Per evitare errori su padding
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)
model.is_loaded_in_4bit = True


Loading checkpoint shards: 100%|██████████| 3/3 [00:18<00:00,  6.22s/it]


## Configurazione Lora

In [6]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    task_type=TaskType.CAUSAL_LM,
    lora_dropout=0.1,
    bias="none"
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()


trainable params: 3,407,872 || all params: 7,245,139,968 || trainable%: 0.04703666202518836


## Preprocessing dataset

In [7]:
def format_prompt(example):
    prompt = getPrompt(example['text'])
    return {
        "input_ids": tokenizer(prompt, truncation=True, padding="max_length", max_length=512)["input_ids"],
        "labels": tokenizer(example["translation"], truncation=True, padding="max_length", max_length=512)["input_ids"]
    }

tokenized_dataset = {
    "train": dataset["train"].map(format_prompt, remove_columns=dataset["train"].column_names),
    "test": dataset["test"].map(format_prompt, remove_columns=dataset["test"].column_names)
}

Map: 100%|██████████| 266/266 [00:00<00:00, 921.21 examples/s] 
Map: 100%|██████████| 30/30 [00:00<00:00, 766.96 examples/s]


## Setup Trainer

In [None]:
logging.set_verbosity_debug()
training_args = TrainingArguments(
    output_dir="Modelli/Mistral/mistral-lora-itmoderno",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=16,
    num_train_epochs=training_epochs,
    learning_rate=2e-4,
    fp16=True,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=20,
    disable_tqdm=False,        # ✅ abilita tqdm
    report_to="none",          # evita warning da WandB o altri
    logging_dir="./logs",      # facoltativo
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False
)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
def compute_metrics(eval_preds):
    return {} 
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)


PyTorch: setting up devices
  trainer = Trainer(
Using auto half precision backend
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


## Avvia Fine Tuning & Salva

In [None]:
trainer.train()
model.save_pretrained("Modelli/Mistral/mistral-finetuned-itmodern")
tokenizer.save_pretrained("Modelli/Mistral/mistral-finetuned-itmodern")
from peft import PeftModel
from transformers import AutoModelForCausalLM

# Carica il modello base
base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")

# Carica il checkpoint LoRA appena salvato
lora_model = PeftModel.from_pretrained(base_model, "Modelli/Mistral/mistral-finetuned-itmodern")

# Merge dei pesi LoRA nel modello base
merged_model = lora_model.merge_and_unload()

# Salva il modello fuso come standalone
merged_model.save_pretrained("Modelli/Mistral/mistral-lora-merged")



Currently training with a batch size of: 1
***** Running training *****
  Num examples = 266
  Num Epochs = 15
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 16
  Total optimization steps = 255
  Number of trainable parameters = 3,407,872


Epoch,Training Loss,Validation Loss
1,No log,1.876412
2,3.831600,1.597599
3,1.668000,1.566959
4,1.507000,1.563998
5,1.393100,1.567755
6,1.283600,1.605215
7,1.283600,1.63701
8,1.143000,1.763938
9,1.033900,1.750238
10,0.904200,1.88389



***** Running Evaluation *****
  Num examples = 30
  Batch size = 8
Saving model checkpoint to ./mistral-lora-itmoderno\checkpoint-17
chat template saved in ./mistral-lora-itmoderno\checkpoint-17\chat_template.jinja
tokenizer config file saved in ./mistral-lora-itmoderno\checkpoint-17\tokenizer_config.json
Special tokens file saved in ./mistral-lora-itmoderno\checkpoint-17\special_tokens_map.json

***** Running Evaluation *****
  Num examples = 30
  Batch size = 8
Saving model checkpoint to ./mistral-lora-itmoderno\checkpoint-34
chat template saved in ./mistral-lora-itmoderno\checkpoint-34\chat_template.jinja
tokenizer config file saved in ./mistral-lora-itmoderno\checkpoint-34\tokenizer_config.json
Special tokens file saved in ./mistral-lora-itmoderno\checkpoint-34\special_tokens_map.json

***** Running Evaluation *****
  Num examples = 30
  Batch size = 8
Saving model checkpoint to ./mistral-lora-itmoderno\checkpoint-51
chat template saved in ./mistral-lora-itmoderno\checkpoint-51\c

# 🧠 Traduzioni

In [None]:

# 0. Quantization config (bnb4bit)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)

# 1. Seleziona il dispositivo
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 2. Carica il modello MERGED e tokenizer
model_path = "Modelli/Mistral/mistral-lora-merged"
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")  # tokenizer originale
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=bnb_config,
    device_map="auto"
)
tokenizer.pad_token = tokenizer.eos_token
model.eval()

# 3. Carica dataset
df = pd.read_csv("inputs/dataset.csv")
df["generated_translation"] = ""

# 4. Funzione di traduzione
def traduci(s):
    prompt = getPrompt(s)
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
    output = model.generate(
        **inputs,
        max_new_tokens=nuovi_token_max,
        temperature=temperatura,
        top_p=0.9,
        do_sample=True,
        use_cache=True
    )
    return tokenizer.decode(output[0], skip_special_tokens=True).split("Traduzione moderna:")[-1].strip()

# 5. Generazione con barra di avanzamento (solo primi 3 per test)
results = []
for i, s in enumerate(tqdm(df["Sentence"].tolist())):
    if max_translations!= 0 and i >= max_translations:
        results.append("[SKIPPED]")
        continue
    try:
        results.append(traduci(s))
    except:
        results.append("[ERRORE]")

df["generated_translation"] = results

# 6. Salva i risultati
df.to_csv("outputs/dataset_with_mistral_translations.csv", index=False)


loading file tokenizer.model from cache at C:\Users\colam\.cache\huggingface\hub\models--mistralai--Mistral-7B-Instruct-v0.2\snapshots\3ad372fc79158a2148299e3318516c786aeded6c\tokenizer.model
loading file tokenizer.json from cache at C:\Users\colam\.cache\huggingface\hub\models--mistralai--Mistral-7B-Instruct-v0.2\snapshots\3ad372fc79158a2148299e3318516c786aeded6c\tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at C:\Users\colam\.cache\huggingface\hub\models--mistralai--Mistral-7B-Instruct-v0.2\snapshots\3ad372fc79158a2148299e3318516c786aeded6c\special_tokens_map.json
loading file tokenizer_config.json from cache at C:\Users\colam\.cache\huggingface\hub\models--mistralai--Mistral-7B-Instruct-v0.2\snapshots\3ad372fc79158a2148299e3318516c786aeded6c\tokenizer_config.json
loading file chat_template.jinja from cache at None
loading configuration file ./mistral-lora-merged\config.json
Model config MistralConfig {
  "architectur

# 🔍 Prometheus LLM-as-a-Judge

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

prometheur_model_name = "prometheus-eval/prometheus-7b-v2.0"

prometheus_tokenizer = AutoTokenizer.from_pretrained(
    prometheur_model_name,
    trust_remote_code=True
)

prometheus_model = AutoModelForCausalLM.from_pretrained(
    prometheur_model_name,
    trust_remote_code=True,         # <-- importante anche qui
    device_map="auto",
    torch_dtype=torch.float16,
    offload_folder="offload_prometheus"
)

judge = pipeline("text-generation", model=prometheus_model, tokenizer=prometheus_tokenizer)


Loading checkpoint shards: 100%|██████████| 8/8 [00:08<00:00,  1.08s/it]
Some parameters are on the meta device because they were offloaded to the cpu.
Device set to use cuda:0


In [None]:
import pandas as pd

# Carica i dataset
df_prometheus = pd.read_csv("inputs/dataset_human_eval.csv")
df_translations = pd.read_csv("outputs/dataset_with_mistral_translations.csv")

# Usa solo i primi 15 elementi
df_prometheus = df_prometheus.head(15)
df_translations = df_translations.head(15)

# Aggiunge la colonna 'generated_translation'
df_prometheus["generated_translation"] = df_translations["generated_translation"]

# Funzione per costruire il prompt
def build_judge_prompt(original, human_translation, model_translation):
    return f"""
You are a translation evaluator.

Original:
{original}

Human:
{human_translation}

Model:
{model_translation}

Score (1–5):
""".strip()

# Esegui judge su ogni riga e salva il risultato
scores = []
for i, row in df_prometheus.iterrows():
    prompt = build_judge_prompt(row["Sentence"], row["HumanEval"], row["generated_translation"])
    result = judge(prompt, max_new_tokens=2, temperature=0.0, do_sample=False)[0]["generated_text"]
    scores.append(result.strip())

# Aggiunge la colonna dei punteggi
df_prometheus["judge_score"] = scores

# Salva il risultato
df_prometheus.to_csv("outputs/dataset_with_evaluation.csv", index=False)


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignore

In [11]:
result

'You are a translation evaluator.\n\nOriginal:\nAd te solo, Altissimo, se konfano, et nullu homo ène dignu te mentovare.\n\nHuman:\nA te solo, altissimo si addicono e nessun uomo è degno di menzionare il tuo nome.\n\nModel:\nSolo a te, sommo Signore, spettano gli onori, e nessuno è degno di nominarti.\n\nScore (1–5): 5\n'

# 📊 Calcolo concordanza

# 💾 Salvataggio risultati JSONL