# Installazione librerie

In [None]:
!pip uninstall -y transformers bitsandbytes peft
!pip install transformers==4.39.3 bitsandbytes==0.45.2 peft==0.10.0 accelerate==0.27.2 datasets sentencepiece rouge-score evaluate


[0mCollecting transformers==4.39.3
  Using cached transformers-4.39.3-py3-none-any.whl.metadata (134 kB)
Collecting bitsandbytes==0.45.2
  Using cached bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting peft==0.10.0
  Using cached peft-0.10.0-py3-none-any.whl.metadata (13 kB)
Using cached transformers-4.39.3-py3-none-any.whl (8.8 MB)
Using cached bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl (69.7 MB)
Using cached peft-0.10.0-py3-none-any.whl (199 kB)
Installing collected packages: transformers, bitsandbytes, peft
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sentence-transformers 4.1.0 requires transformers<5.0.0,>=4.41.0, but you have transformers 4.39.3 which is incompatible.[0m[31m
[0mSuccessfully installed bitsandbytes-0.45.2 peft-0.10.0 transformers-4.39.3


# Import librerie

In [None]:
from datasets import load_dataset
from transformers import (
    AutoTokenizer, AutoModelForSeq2SeqLM,
    Seq2SeqTrainer, Seq2SeqTrainingArguments,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, TaskType
import torch
from google.colab import drive


# Monta Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


# Caricamento + split 70/10/20

In [None]:
# Carica train (80%) e test (20%)
dataset = load_dataset(
    "json",
    data_files={
        "train": "/content/drive/MyDrive/AIScientist/FlanT5/train_ready_trunc.json",
        "test": "/content/drive/MyDrive/AIScientist/FlanT5/test_ready_trunc.json"
    }
)

# Split train in 87.5% train + 12.5% validation → 70/10
split_dataset = dataset["train"].train_test_split(test_size=0.125, seed=42)
train_data = split_dataset["train"]
val_data = split_dataset["test"]
test_data = dataset["test"]


# Parametri base

In [None]:
model_name = "google/flan-t5-xl"
MAX_INPUT_LEN = 1024
MAX_TARGET_LEN = 1024
output_dir = "/content/drive/MyDrive/AIScientist/FlanT5/flan_t5xl_lora_1024_1024_v2"

# Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess_function(examples):
    model_inputs = tokenizer(
        examples["input"],
        max_length=MAX_INPUT_LEN,
        truncation=True,
        padding="max_length"
    )
    labels = tokenizer(
        examples["output"],
        max_length=MAX_TARGET_LEN,
        truncation=True,
        padding="max_length"
    )
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

train_tokenized = train_data.map(preprocess_function, batched=True)
val_tokenized = val_data.map(preprocess_function, batched=True)
test_tokenized = test_data.map(preprocess_function, batched=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Map:   0%|          | 0/234 [00:00<?, ? examples/s]

# Config quantizzazione 4-bit

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,  # bfloat16 su A100
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# Carica modello quantizzato

In [None]:
import transformers

# Salva il metodo originale
original_to = transformers.modeling_utils.PreTrainedModel.to

# Patch per ignorare il controllo bitsandbytes
def safe_to(self, *args, **kwargs):
    if getattr(self, "quantization_method", None) is not None:
        self.quantization_method = None
    return original_to(self, *args, **kwargs)

transformers.modeling_utils.PreTrainedModel.to = safe_to


In [None]:
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# Applica LoRA

In [None]:
peft_config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    r=16,               # <--- aumentato da 8 a 16
    lora_alpha=32,      # <--- aumentato proporzionalmente
    lora_dropout=0.1,
    bias="none"
)

model = get_peft_model(model, peft_config)

# Disattiva cache e attiva gradient checkpointing
model.config.use_cache = False
model.gradient_checkpointing_enable()

model.is_loaded_in_4bit = True
model.is_loaded_in_8bit = False
# Fix gradienti input
model.enable_input_require_grads()

# Congela tutto tranne LoRA
for param in model.parameters():
    param.requires_grad = False
for name, param in model.named_parameters():
    if "lora" in name:
        param.requires_grad = True

# Metriche ROUGE + BLEU

In [None]:
from evaluate import load

rouge = load("rouge")
bleu = load("bleu")

def compute_metrics(eval_pred):
    preds, labels = eval_pred

    # Decodifica predizioni e label
    preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Calcola ROUGE
    rouge_result = rouge.compute(predictions=preds, references=labels)

    # Calcola BLEU
    bleu_result = bleu.compute(predictions=preds, references=labels)

    # Combina risultati
    result = {
        "rouge1": round(rouge_result["rouge1"] * 100, 2),
        "rouge2": round(rouge_result["rouge2"] * 100, 2),
        "rougeL": round(rouge_result["rougeL"] * 100, 2),
        "bleu": round(bleu_result["bleu"] * 100, 2)
    }
    return result


# TrainingArguments

In [None]:
training_args = Seq2SeqTrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=500,
    logging_steps=100,
    num_train_epochs=3,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,   # manteniamo per stabilità
    learning_rate=1e-4,              # leggermente più alto
    fp16=True,
    save_total_limit=2,
    predict_with_generate=True,
    generation_max_length=MAX_TARGET_LEN,
    ddp_find_unused_parameters=False,
    report_to="none"
)

# Trainer

In [None]:
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized,
    tokenizer=tokenizer
)

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


# Avvia training

In [None]:
import torch
torch.cuda.empty_cache()
trainer.train()

Step,Training Loss,Validation Loss
500,186948.26,26.735123


TrainOutput(global_step=612, training_loss=30843.19387225232, metrics={'train_runtime': 4307.2219, 'train_samples_per_second': 1.137, 'train_steps_per_second': 0.142, 'total_flos': 8.402815724971622e+16, 'train_loss': 30843.19387225232, 'epoch': 3.0})

# Salva modello e tokenizer

In [None]:
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
trainer.evaluate(test_tokenized, max_length=MAX_TARGET_LEN)



{'eval_loss': 27.466571807861328,
 'eval_runtime': 123.1334,
 'eval_samples_per_second': 3.785,
 'eval_steps_per_second': 3.785,
 'epoch': 3.0}

# Ricarica modello e tokenizer

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Ricarica il modello LoRA e tokenizer salvati
model = AutoModelForSeq2SeqLM.from_pretrained(output_dir)
tokenizer = AutoTokenizer.from_pretrained(output_dir)
model.to("cuda")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

T5ForConditionalGeneration(
  (shared): Embedding(32128, 2048)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 2048)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k): Linear(in_features=2048, out_features=2048, bias=False)
       

# Calcolo metriche (ROUGE, BLEU)

In [None]:
from evaluate import load
import numpy as np

# Carica metriche
rouge = load("rouge")
bleu = load("bleu")

# Funzione per generare e calcolare metriche
def evaluate_model(model, tokenizer, dataset, max_target_len=1024):
    predictions = []
    references = []

    for example in dataset:
        # Tokenizza input
        inputs = tokenizer(example["input"], return_tensors="pt", truncation=True, max_length=1024).to("cuda")

        # Genera output
        outputs = model.generate(**inputs, max_length=max_target_len)
        pred_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Aggiungi predizioni e reference
        predictions.append(pred_text)
        references.append(example["output"])

    # Calcolo ROUGE
    rouge_result = rouge.compute(predictions=predictions, references=references)

    # Calcolo BLEU (BLEU richiede lista di liste per references)
    bleu_result = bleu.compute(predictions=predictions, references=[[ref] for ref in references])

    return rouge_result, bleu_result, predictions, references

import torch
torch.cuda.empty_cache()
# Valutazione sul test set
rouge_result, bleu_result, preds, refs = evaluate_model(model, tokenizer, test_data)

print("=== ROUGE ===")
print(rouge_result)
print("\n=== BLEU ===")
print(bleu_result)


=== ROUGE ===
{'rouge1': np.float64(0.12389869116873473), 'rouge2': np.float64(0.02457610823944534), 'rougeL': np.float64(0.08174830077850037), 'rougeLsum': np.float64(0.0957851775540901)}

=== BLEU ===
{'bleu': 0.003956072924342516, 'precisions': [0.25456303638341593, 0.043824863237955786, 0.012872628726287264, 0.008042617581825343], 'brevity_penalty': 0.12067551057148493, 'length_ratio': 0.32106335500145533, 'translation_length': 49638, 'reference_length': 154605}


# Visualizza alcuni esempi

In [None]:
# Mostra alcuni esempi di output vs reference
for i in range(5):
    print(f"\n--- Esempio {i+1} ---")
    print(f"Input:\n{test_data[i]['input']}")
    print(f"\nOutput generato:\n{preds[i]}")
    print(f"\nOutput corretto:\n{refs[i]}")



--- Esempio 1 ---
Input:
Generate a story for these personas: Developer, Researcher, Student

Section: Abstract
Abstract: ABSTRACT This paper describes Tacotron 2, a neural network architecture for speech synthesis directly from text. The system is composed of a recurrent sequence-to-sequence feature prediction network that maps character embeddings to mel-scale spectrograms, followed by a modified WaveNet model acting as a vocoder to synthesize time-domain waveforms from those spectrograms. Our model achieves a mean opinion score (MOS) of 4.53 comparable to a MOS of 4.58 for professionally recorded speech. To validate our design choices, we present ablation studies of key components of our system and evaluate the impact of using mel spectrograms as the conditioning input to WaveNet instead of linguistic, duration, and F0 features. We further show that using this compact acoustic intermediate representation allows for a significant reduction in the size of the WaveNet architecture. In