# Fine-tuning for Translation from Ancient to Modern Italian

# System Setup 🖥️

### Drive Interface 📁

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

%cp /content/drive/MyDrive/MNLP_HW_2/Many_Naps_Little_Progress/*.* .
%ls

Mounted at /content/drive
dataset_ann.csv  fine_tuning.ipynb  PrometheusAPI.py  utils.py
dataset.csv      install_colab.sh   prompting.ipynb
[0m[01;34mdrive[0m/           Judge.py           [01;34msample_data[0m/


### Additional Dependencies 🐍

In [None]:
!bash install_colab.sh >> /dev/null

[31mERROR: Operation cancelled by user[0m[31m
[0m[31mERROR: Operation cancelled by user[0m[31m
[0m

### Hugging Face 🤗

In [None]:
import huggingface_hub
TOKEN = "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"
huggingface_hub.login(token=TOKEN)

In [None]:
# Import Datases to work with Transformers by Hugging-Face
import torch
import pandas as pd
from tqdm.auto import tqdm
# Imports for Transformers
from transformers import AutoTokenizer  # Datasets
from datasets import Dataset, DatasetDict
from utils import Report
import numpy as np
import evaluate

from transformers import DataCollatorForLanguageModeling, Trainer, TrainingArguments, AutoModelForCausalLM          # imports for causal Learning
from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer, AutoModelForSeq2SeqLM    # imports for Seq2Seq models
from peft import LoraConfig, TaskType, LoftQConfig, PeftModelForSeq2SeqLM, PeftModelForCausalLM, get_peft_model     # imports for quantization methods (LoRA etc...)
from transformers import EarlyStoppingCallback


# Fine-Tuned Models

* google/mt5-base (Machine Translation)
* sapienzanlp/Minerva-1B-base-v1.0 🇮🇹 (LMM)
* sapienzanlp/Minerva-3B-base-v1.0 🇮🇹 (LMM)
*

In [None]:
device = ('cuda' if torch.cuda.is_available() else "cpu")
DATASET = "dataset_ann.csv"
SRC_L = "Sentence"
TRG_L = "Target"
network = "sapienzanlp/Minerva-3B-base-v1.0"
tokenization_method = "minerva_base"
OUT_DIR = network.split("/")[-1]
EPOCHS = 42
BATCH_SIZE = 8
max_length = 80

# Dataset Analysis

In [None]:
df = pd.read_csv(DATASET, sep=",", index_col=False)

In [None]:
print(f"length mean {SRC_L} text: {df[SRC_L].apply(lambda x: len(x.split())).mean()}")
print(f"length mean {TRG_L} text: {df[TRG_L].apply(lambda x: len(x.split())).mean()}")

length mean Sentence text: 20.04123711340206
length mean Target text: 20.690721649484537


In [None]:
df.head()

Unnamed: 0,Author,Date,Region,Sentence,Target
0,Brunetto Latini,1260-61,fior.,quella guerra ben fatta l' opera perché etc. E...,Quella guerra fu ben condotta per via delle az...
1,Bono Giamboni,1292,fior.,"crudele, e di tutte le colpe pigli vendetta, c...","È severo, e punisce tutte le colpe come prescr..."
2,Valerio Massimo (red. V1,1336,fior.,Non d' altra forza d' animo fue ornato Ponzio ...,"Ponzio Aufidiano, cavaliere romano, fu dotato ..."
3,Lucano volg. (ed. Marinoni),1330/40,prat.,Se questo piace a tutti e se 'l tempo hae biso...,Se questo è quello che tutti desiderano e se l...
4,Brunetto Latini,1260-61,fior.,Officio di questa arte pare che sia dicere app...,Il compito di quest’arte sembra essere quello ...


## Model Selection

Configure Pipline for model select for fine-tuning

In [None]:
# Switch to select the network and load the appropriate model and tokenizer
match network:

    case "sapienzanlp/Minerva-3B-base-v1.0":
        from transformers import BitsAndBytesConfig
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,  # o torch.float16 se bfloat16 non è supportato
        )
        tokenizer = AutoTokenizer.from_pretrained(network)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token

        model = AutoModelForCausalLM.from_pretrained(network, device_map="auto", quantization_config=bnb_config)
        data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

        lora_config = LoraConfig(
            task_type=TaskType.CAUSAL_LM,
            inference_mode=False,
            r=128,
            lora_alpha=128,
            lora_dropout=0.50
            )

        qlora_config = LoraConfig(
            init_lora_weights="loftq",
            loftq_config=LoftQConfig(loftq_bits=4),
            r=8,
            lora_alpha=16,
            target_modules="all-linear",
            lora_dropout=0.5,
            bias="none",
            task_type="CAUSAL_LM"
        )

        model = get_peft_model(model, lora_config)
        model.print_trainable_parameters()



        early_stopping_patience = 10
        early_stopping_threshold = 0.01

        early_callback = EarlyStoppingCallback(
            early_stopping_patience=early_stopping_patience, # Se la loss di valutazione non migliora per 3 epoche consecutive
            early_stopping_threshold=early_stopping_threshold # Ignora miglioramenti inferiori a 0.001
        )

        training_args = TrainingArguments(
            output_dir=OUT_DIR,
            learning_rate=3e-5,
            weight_decay=1e-2,
            warmup_steps=500,
            per_device_train_batch_size=BATCH_SIZE,
            per_device_eval_batch_size=BATCH_SIZE,
            num_train_epochs=EPOCHS,
            eval_strategy="epoch",
            save_strategy="epoch",
            logging_strategy="epoch",
            load_best_model_at_end=True,
            report_to="none",
            save_total_limit=3,
            lr_scheduler_type="cosine",
            logging_dir=OUT_DIR,
            logging_steps=10,
            label_names=['labels'],
            metric_for_best_model="eval_chrf++",
            greater_is_better=True,
        )

        params = {

            #"max_new_tokens": max_length, # max number of new tokens to generate
            #"do_sample":True,      # enables sampling for more diverse outputs
            #"top_k":100,            # diversity increase by controlling the candidate words
            #"top_p":0.95,          # nucleus sampling for further control over variety
            #"temperature":1.0,     # reduces randomness and increases coherence
            #"repetition_penalty":1.0,  # penalizza ripetizioni
            #"num_return_sequences":10,  # number of generated responses
            "pad_token_id":tokenizer.eos_token_id  # avoids warning if padding token is missing
        }
    case "google/mt5-base" | "google/mt5-large":
        tokenizer = AutoTokenizer.from_pretrained(network)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token


        model = AutoModelForSeq2SeqLM.from_pretrained(network, device_map=device, torch_dtype=torch.float32)
        data_collator = DataCollatorForSeq2Seq(tokenizer, network)

        lora_config = LoraConfig(
            task_type=TaskType.SEQ_2_SEQ_LM,
            inference_mode=False,
            r=64,
            lora_alpha=64,
            lora_dropout=0.3
            )

        qlora_config = LoraConfig(
            init_lora_weights="loftq",
            loftq_config=LoftQConfig(loftq_bits=4),
            r=128,
            lora_alpha=128*2,
            target_modules="all-linear",
            lora_dropout=0.3,
            bias="none",
            task_type=TaskType.SEQ_2_SEQ_LM
        )

        model = get_peft_model(model, lora_config)
        model.print_trainable_parameters()

        early_stopping_patience = 3
        early_stopping_threshold = 0.01

        early_callback = EarlyStoppingCallback(
            early_stopping_patience=early_stopping_patience, # Se la loss di valutazione non migliora per 3 epoche consecutive
            early_stopping_threshold=early_stopping_threshold # Ignora miglioramenti inferiori a 0.001
        )

        training_args = Seq2SeqTrainingArguments(
            output_dir=OUT_DIR,
            learning_rate=4e-4,
            weight_decay=3e-4,
            warmup_steps=500,
            per_device_train_batch_size=BATCH_SIZE,
            per_device_eval_batch_size=BATCH_SIZE,
            num_train_epochs=EPOCHS,
            eval_strategy="epoch",
            save_strategy="epoch",
            logging_strategy="epoch",
            load_best_model_at_end=True,
            report_to="none",
            save_total_limit=3,
            lr_scheduler_type="linear",
            logging_dir=OUT_DIR,
            logging_steps=10,
            label_names=['labels'],
            metric_for_best_model="eval_chrf++",
            greater_is_better=True,
        )

        params = {

            "max_new_tokens": max_length, # max number of new tokens to generate
            "do_sample":True,      # enables sampling for more diverse outputs
            #"top_k":100,            # diversity increase by controlling the candidate words
            #"top_p":0.95,          # nucleus sampling for further control over variety
            #"temperature":1.0,     # reduces randomness and increases coherence
            #"repetition_penalty":1.0,  # penalizza ripetizioni
            #"num_return_sequences":10,  # number of generated responses
            "pad_token_id":tokenizer.eos_token_id  # avoids warning if padding token is missing
        }
    case _:
        raise Exception(f"Rete {network} non testabile")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 34,078,720 || all params: 2,928,314,880 || trainable%: 1.1638


In [None]:
class MyTrainerSeq2Seq(Seq2SeqTrainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        if 'num_items_in_batch' in inputs:
            inputs = {k: v for k, v in inputs.items() if k != 'num_items_in_batch'}
        return super().compute_loss(model, inputs, return_outputs=return_outputs)

class MyTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        if 'num_items_in_batch' in inputs:
            inputs = {k: v for k, v in inputs.items() if k != 'num_items_in_batch'}
        return super().compute_loss(model, inputs, return_outputs=return_outputs)

In [None]:
sacrebleu_metric = evaluate.load("sacrebleu")
rouge_metric = evaluate.load("rouge")
meteor_metric = evaluate.load("meteor")
chrf_metric = evaluate.load("chrf")
ter_metric = evaluate.load("ter")


def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels] # Specific format for SacreBLEU
    return preds, labels

def compute_metrics(eval_preds):
    preds_input, label_ids = eval_preds

    # Dealing with logits or token IDs for predictions
    # If preds_input are logits (es. direct output of training modello)
    current_preds = preds_input
    if isinstance(current_preds, tuple): # Common in HF Trainer, es. (logits, hidden_states)
        current_preds = current_preds[0]

    if hasattr(current_preds, "ndim") and current_preds.ndim == 3: # Array of logits (batch_size, seq_len, vocab_size)
        current_preds_ids = np.argmax(current_preds, axis=-1)
    else: # Otherwise, assumed to be token ID (batch_size, seq_len)
        current_preds_ids = current_preds

    # Decode predictions and labels
    decoded_preds_raw = tokenizer.batch_decode(current_preds_ids, skip_special_tokens=True)

    # Replace -100 in labels (common for token to be ignored) with pad_token_id for decoding
    processed_label_ids = np.where(label_ids != -100, label_ids, tokenizer.pad_token_id)
    decoded_labels_raw = tokenizer.batch_decode(processed_label_ids, skip_special_tokens=True)

    processed_preds, processed_labels_for_sacrebleu = postprocess_text(decoded_preds_raw, decoded_labels_raw)

    # For other metrics (ROUGE, METEOR, CHRF, TER), usually expects a flat list of reference strings
    flat_references = [ref[0] for ref in processed_labels_for_sacrebleu]

    results = {}

    # 1. SacreBLEU
    sacrebleu_output = sacrebleu_metric.compute(predictions=processed_preds, references=processed_labels_for_sacrebleu)
    if sacrebleu_output and "score" in sacrebleu_output:
        results["bleu"] = sacrebleu_output["score"]
    else:
        results["bleu"] = 0.0 # Fallback

    # 2. ROUGE (rouge1, rouge2, rougeL, rougeLsum)
    rouge_output = rouge_metric.compute(predictions=processed_preds, references=flat_references, use_stemmer=True)
    if rouge_output:
        results["rouge1"] = rouge_output.get("rouge1", 0.0)
        results["rouge2"] = rouge_output.get("rouge2", 0.0)
        results["rougeL"] = rouge_output.get("rougeL", 0.0)
        results["rougeLsum"] = rouge_output.get("rougeLsum", 0.0) # Spesso più robusto per sommario
    else:
        results.update({"rouge1": 0.0, "rouge2": 0.0, "rougeL": 0.0, "rougeLsum": 0.0})

    # 3. METEOR
    meteor_output = meteor_metric.compute(predictions=processed_preds, references=flat_references)
    if meteor_output and "meteor" in meteor_output:
        results["meteor"] = meteor_output["meteor"]
    else:
        results["meteor"] = 0.0

    # 4. CHRF++ (CHRF with n-grams of words)
    # For CHRF++, word_order (or word_n) is > 0. Default of evaluate.load('chrf') are word_order=0 (CHRF standard).
    # Common parameters for CHRF++: word_order=2, beta=2 (beta=2 default)
    chrf_output = chrf_metric.compute(predictions=processed_preds, references=flat_references, word_order=2, beta=2)
    if chrf_output and "score" in chrf_output:
        results["chrf++"] = chrf_output["score"] # CHRF++ score
    else:
        results["chrf++"] = 0.0

    # (Optional) CHRF standard (only characters)
    # chrf_std_output = chrf_metric.compute(predictions=processed_preds, references=flat_references, word_order=0)
    # if chrf_std_output and "score" in chrf_std_output:
    #     results["chrf"] = chrf_std_output["score"]
    # else:
    #     results["chrf"] = 0.0

    # 5. TER (Translation Edit Rate) - the smaller, the better
    ter_output = ter_metric.compute(predictions=processed_preds, references=flat_references)
    if ter_output and "score" in ter_output:
        results["ter"] = ter_output["score"]
    else:
        results["ter"] = 1.0 # Fallback on worst score TER possible

    # Mean length of generated predictions (excluding padding tokens)
    # 'current_preds_ids' are ID token of the predictions
    prediction_lengths = [np.count_nonzero(pid_seq != tokenizer.pad_token_id) for pid_seq in current_preds_ids]
    results["gen_len"] = np.mean(prediction_lengths) if prediction_lengths else 0.0

    # Rounding of all numerical results
    final_results = {k: round(v, 4) for k, v in results.items() if isinstance(v, (int, float))}

    return final_results

[nltk_data] Downloading package wordnet to /home/andrea/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/andrea/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/andrea/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [None]:
from datasets.features import Value, Features
hf = Dataset.from_csv(DATASET, features=
    Features({
        SRC_L : Value("string"),
        TRG_L : Value("string"),
        "Date": Value("string"),
        "Author":Value("string"),
        "Region":Value("string")
    })

    ).shuffle(2025).train_test_split(test_size=0.1)

## Tokenization

In [None]:
print(f"model max length: {max_length}")


def noprompt_it_it(examples):
    inputs = [example for example in examples[SRC_L]]
    targets = [example for example in examples[TRG_L]]

    model_inputs = tokenizer(inputs, text_target=targets, max_length=max_length, truncation=True, padding="max_length")
    return model_inputs

# I. Riscrivi
# II. Traduci
# III. Correggi
def minerva_base_prompt_it_it_train(examples):

    prompts = [
        f"""riscrivi la seguente frase {src} scritta in italiano arcaico in Italiano moderno: {dst}"""
        for src, dst, dat, dia in zip(examples[SRC_L], examples[TRG_L], examples["Date"], examples["Region"])
    ]

    # Tokenizza input+target e crea label con gli stessi token
    model_inputs = tokenizer(
        prompts,
        max_length=max_length,
        truncation=True,
        padding="max_length"
    )

    model_inputs["labels"] = [
        [(label if label != tokenizer.pad_token_id else -100) for label in input_ids]
        for input_ids in model_inputs["input_ids"]
    ]
    return model_inputs

def minerva_base_prompt_it_it_eval(examples):
    prompts = [

        f"""riscrivi la seguente frase {src} scritta in italiano arcaico in Italiano moderno: """
        for src in examples[SRC_L]
    ]

    # Tokenizza input+target e crea label con gli stessi token
    model_inputs = tokenizer(
        prompts,
        max_length=max_length,
        truncation=True,
        padding="max_length"
    )

    return model_inputs

def base_prompt_en(examples):
    inputs = ["translate from Ancient Italian to Modern Italian: " + example for example in examples[SRC_L]]

    # Tokenizza solo gli input
    model_inputs = tokenizer(inputs, max_length=max_length, truncation=True, padding="max_length")
    targets = [example for example in examples[TRG_L]]

    model_inputs = tokenizer(inputs, text_target=targets, max_length=max_length, truncation=True, padding="longest")

    return model_inputs

def base_prompt_it_it(examples):
    inputs = ["Riscrivi dall'Italiano Antico a l'Italiano Moderno: " + example for example in examples[SRC_L]]

    # Tokenizza solo gli input
    model_inputs = tokenizer(inputs, max_length=max_length, truncation=True, padding="max_length")
    targets = [example for example in examples[TRG_L]]

    model_inputs = tokenizer(inputs, text_target=targets, max_length=max_length, truncation=True, padding="longest")

    return model_inputs

def parafrasi_prompt_it_it(examples):
    inputs = ["Scrivi la parafrasi di questo testo: " + example for example in examples[SRC_L]]
    targets = [example for example in examples[TRG_L]]

    model_inputs = tokenizer(inputs, text_target=targets, max_length=max_length, truncation=True, padding="longest")
    return model_inputs

def informative_prompt_it_it(examples):
    inputs = [f"Riscrivi in uno stile più moderno il testo del seguente Autore: '{author}', anno di scrittura: {date}, luogo: Italia, dialetto: '{region}', testo: '{text}'." for text, date, region, author in zip(examples[SRC_L], examples["Date"], examples["Region"], examples["Author"]) ]
    targets = [example for example in examples[TRG_L]]

    model_inputs = tokenizer(inputs, text_target=targets, max_length=max_length, truncation=True, padding="longest")
    return model_inputs


model max length: 120


## Tokenizer Parameters

In [None]:
match tokenization_method:
    case "minerva_base":
        map_callback_train = minerva_base_prompt_it_it_train
        map_callback_eval   = minerva_base_prompt_it_it_eval

        hf_tokenized = DatasetDict({
            "train": hf["train"].map(map_callback_train, batched=True),
            "test":  hf["test"].map(map_callback_eval, batched=True)
        })

        hf_tokenized.set_format(type="torch", columns=['input_ids', 'attention_mask'])

    case "base_prompt_en":
        map_callback = base_prompt_en
        hf_tokenized = hf.map(map_callback, batched=True)

    case "base_prompt_it_it":
        map_callback = base_prompt_it_it
        hf_tokenized = hf.map(map_callback, batched=True)
    case _:
        raise ValueError("Tokenization method not avaiable")

Map:   0%|          | 0/87 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

In [None]:
print(hf_tokenized.column_names)
print(hf_tokenized.shape)

{'train': ['Sentence', 'Target', 'Date', 'Author', 'Region', 'input_ids', 'attention_mask', 'labels'], 'test': ['Sentence', 'Target', 'Date', 'Author', 'Region', 'input_ids', 'attention_mask']}
{'train': (87, 8), 'test': (10, 7)}


In [None]:
for idx, s in enumerate(hf_tokenized["train"].take(5), 1):
    print(f"===:(sentence n°{idx}):===")
    print(f"{SRC_L}:{tokenizer.decode(s["input_ids"], attention_mask=s["attention_mask"], skip_special_tokens=True)}" )
    #print(f"{TRG_L}:{tokenizer.decode(s["labels"], skip_special_tokens=True)}")


===:(sentence n°1):===
Sentence:riscrivi la seguente frase però che, sse nobile cosa e alta è abatte il nimico, ampoi nonn è meno laudabile sapere avere misiricordia scritta in italiano arcaico in Italiano moderno: Poiché, se è cosa nobile e grande abbattere il nemico, non è poi meno degno di lode saper mostrare misericordia.
===:(sentence n°2):===
Sentence:riscrivi la seguente frase Alexandri, ciò è il genero e 'l figliuolo, da Phausonia, gentile iovane di Macedonia, stando in uno luogo strecto sanza guardia, fue morto. scritta in italiano arcaico in Italiano moderno: Alessandro da Phausonia, cioè il genero del figlio, nobile giovane di Macedonia, fu ucciso mentre si trovava in un luogo isolato e senza protezione.
===:(sentence n°3):===
Sentence:riscrivi la seguente frase mostroe massimamente le forze sue, dando lui re a questa cittade ne la quale nacque servo; al quale avvenne lunghissimamente lo imperio tenere scritta in italiano arcaico in Italiano moderno: Manifestò pienamente la 

In [None]:
for idx, s in enumerate(hf_tokenized["test"].take(5), 1):
    print(f"===:(sentence n°{idx}):===")
    print(f"{SRC_L}:{tokenizer.decode(s["input_ids"], attention_mask=s["attention_mask"], skip_special_tokens=True)}" )
    #print(f"{TRG_L}:{tokenizer.decode(s["labels"], skip_special_tokens=True)}")

===:(sentence n°1):===
Sentence:riscrivi la seguente frase contra lui e contra le sue sorelle  e contra il reame e contra l' alto pregio della sua ingenerazione e della sua familia scritta in italiano arcaico in Italiano moderno: 
===:(sentence n°2):===
Sentence:riscrivi la seguente frase da' monti de' Romani si feciero nuovi nemici; contra i quali è conbactuto cum diversa ventura: perké nela primaia battaglia, essendo consolo Valerio, MMMD ne moriro de' Romani; scritta in italiano arcaico in Italiano moderno: 
===:(sentence n°3):===
Sentence:riscrivi la seguente frase Non lo volle cognoscere per nimico. Qesta è quella, la quale diede ardire al profeta Natan a riprendere con grande autoritade quello re, il quale avea peccato. scritta in italiano arcaico in Italiano moderno: 
===:(sentence n°4):===
Sentence:riscrivi la seguente frase la seconda suole talora per la grande provedenzia fare timoroso, e la prima per l'ardire rendere altrui matto. scritta in italiano arcaico in Italiano mode

## Models & Traning

### PEFT Fine-Tuning

In [None]:
if isinstance(model, PeftModelForSeq2SeqLM):
    print("[SEQ2SEQ Generation]")
    trainer = MyTrainerSeq2Seq(
                model=model,
                args=training_args,
                train_dataset=hf_tokenized["train"],
                eval_dataset=hf_tokenized["test"],
                processing_class=tokenizer,
                data_collator=data_collator,
                compute_metrics=compute_metrics,
                callbacks=[Report(OUT_DIR)]
            )
else:
    print("CAUSAL Generation")
    trainer = MyTrainer(
                model=model,
                args=training_args,
                train_dataset=hf_tokenized["train"],
                eval_dataset=hf_tokenized["test"],
                processing_class=tokenizer,
                data_collator=data_collator,
                compute_metrics=compute_metrics,
                callbacks=[Report(OUT_DIR), early_callback]
            )

CAUSAL Generation


In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss,Bleu,Rouge1,Rouge2,Rougel,Rougelsum,Meteor,Chrf++,Ter,Gen Len
1,3.7335,4.717964,1.0401,0.2421,0.0134,0.1946,0.1878,0.1559,21.4328,90.8284,119.5
2,3.7331,4.7115,1.0809,0.2344,0.0134,0.1922,0.1856,0.1493,21.5329,90.5325,119.4
3,3.7198,4.697144,1.0378,0.2335,0.0134,0.1924,0.1894,0.1521,21.4419,91.1243,119.2
4,3.7005,4.67838,1.0432,0.2362,0.0134,0.1952,0.189,0.1531,21.7912,90.5325,119.2
5,3.6784,4.654891,1.0511,0.239,0.0134,0.1949,0.1882,0.1515,22.0003,90.2367,119.4
6,3.6378,4.621964,1.0561,0.2447,0.0134,0.1971,0.1913,0.1527,22.2861,90.2367,119.8
7,3.5909,4.58185,1.157,0.2501,0.0188,0.2049,0.202,0.1516,22.6994,89.9408,119.8
8,3.5459,4.531985,1.1674,0.2531,0.0184,0.2099,0.2096,0.1566,23.8627,90.2367,119.9
9,3.4868,4.468873,1.2201,0.2698,0.0326,0.2265,0.2231,0.1767,25.0929,89.645,120.0
10,3.415,4.391948,3.1323,0.312,0.1036,0.2685,0.2642,0.2465,30.5816,86.3905,120.0


Training done. Generating graphs...


TrainOutput(global_step=462, training_loss=2.585878202925513, metrics={'train_runtime': 532.3405, 'train_samples_per_second': 6.864, 'train_steps_per_second': 0.868, 'total_flos': 7483350841344000.0, 'train_loss': 2.585878202925513, 'epoch': 42.0})

In [None]:
# Imposta il modello in modalità valutazione e spostalo sul device
model = model.eval()
model = model.to(device)

# Crea il DataLoader
hf_tokenized.set_format("torch", columns=["input_ids", "attention_mask"])
loader = torch.utils.data.DataLoader(hf_tokenized["test"], batch_size=8)


print(f"Inizio generazione su {device}")
print("=============================")

for batch in tqdm(loader):
    # Sposta l'intero batch sul device
    # Nota: DataLoader restituisce un batch come dizionario di tensori

    batch["input_ids"] = batch["input_ids"].to(device)
    batch["attention_mask"] = batch["attention_mask"].to(device)

    # Genera l'output per il batch
    # Input al generate devono essere input_ids e attention_mask
    result_ids = model.generate(
        input_ids=batch["input_ids"],
        attention_mask=batch["attention_mask"],
        max_new_tokens=max_length,
    )

    # Decodifica *separatamente* ogni prompt e ogni risultato generato
    # Iteriamo sul batch per decodificare uno per uno
    # batch["input_ids"] ha forma (batch_size, seq_len_input)
    # result_ids ha forma (batch_size, seq_len_output)

    # Decodifica i prompt originali
    decoded_prompts = tokenizer.batch_decode(batch["input_ids"], skip_special_tokens=True)

    # Decodifica i risultati generati
    decoded_results = tokenizer.batch_decode(result_ids, skip_special_tokens=True)

    # Stampa i risultati per ogni elemento del batch
    with open(OUT_DIR + "/output_chat.txt", "w", encoding="utf-8") as f:
        for i in range(len(decoded_prompts)):
            print(f"===:{i}-(Model for Prompt)===")
            print(f"{decoded_prompts[i]}")
            print("=========================")

            print(f"===:(model {network}):===")
            print(decoded_results[i])
            print("=================================")

            ############################################


            f.write("f===:({i}Model for Prompt)===")
            f.write(f"{decoded_prompts[i]}")
            f.write("=========================")

            f.write(f"===:(model {network}):===")
            f.write(decoded_results[i])
            f.write("==========================")

print("\nGenerazione completata.")

Inizio generazione su cuda


  0%|          | 0/2 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


===:0-(Model for Prompt)===
riscrivi la seguente frase contra lui e contra le sue sorelle  e contra il reame e contra l' alto pregio della sua ingenerazione e della sua familia scritta in italiano arcaico in Italiano moderno: 
===:(model sapienzanlp/Minerva-3B-base-v1.0):===
riscrivi la seguente frase contra lui e contra le sue sorelle  e contra il reame e contra l' alto pregio della sua ingenerazione e della sua familia scritta in italiano arcaico in Italiano moderno:  contro di lui e contro le sue sorelle e contro il regno e contro l’alto prestigio della sua stirpe e della sua famiglia. scritta in italiano moderno in Italiano arcaico in Italiano moderno: Contro di lui e contro le sue sorelle, e contro il regno, e contro il prestigio della sua famiglia. Il 2018 è stato un anno ricco di novità per il mondo delle criptovalute. Il Bitcoin ha raggiunto il suo massimo storico, mentre Ethereum ha superato il suo precedente record. Il 2019 è iniziato con il botto, con il Bitcoin che ha super