In [None]:
# 1. Disinstalla tutto e pulisci la cache
!pip uninstall -y transformers accelerate torch torchvision torchaudio textstat
!pip cache purge

# 2. Installa versioni compatibili (CUDA 11.8)
!pip install --index-url https://download.pytorch.org/whl/cu118 \
          torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0

!pip install transformers==4.48.2 accelerate==1.9.0 \
          datasets sentencepiece evaluate rouge-score bert-score textstat


Found existing installation: transformers 4.54.0
Uninstalling transformers-4.54.0:
  Successfully uninstalled transformers-4.54.0
Found existing installation: accelerate 1.9.0
Uninstalling accelerate-1.9.0:
  Successfully uninstalled accelerate-1.9.0
Found existing installation: torch 2.6.0+cu124
Uninstalling torch-2.6.0+cu124:
  Successfully uninstalled torch-2.6.0+cu124
Found existing installation: torchvision 0.21.0+cu124
Uninstalling torchvision-0.21.0+cu124:
  Successfully uninstalled torchvision-0.21.0+cu124
Found existing installation: torchaudio 2.6.0+cu124
Uninstalling torchaudio-2.6.0+cu124:
  Successfully uninstalled torchaudio-2.6.0+cu124
[0mFiles removed: 0
Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting torch==2.7.0
  Downloading https://download.pytorch.org/whl/cu118/torch-2.7.0%2Bcu118-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (28 kB)
Collecting torchvision==0.22.0
  Downloading https://download.pytorch.org/whl/cu118/torchvision-0.22.0%2Bcu11

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Percorsi dataset
TRAIN_PATH = "/content/drive/MyDrive/AIScientist/FlanT5/train_ready_trunc.json"
TEST_PATH = "/content/drive/MyDrive/AIScientist/FlanT5/test_ready_trunc.json"

# Dove salvare il modello
OUTPUT_DIR = "/content/drive/MyDrive/AIScientist/FlanT5/flan_t5_step2_model"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Prepara dataset Hugging Face

In [None]:
import json
from datasets import Dataset

# Carica JSON
with open(TRAIN_PATH, "r", encoding="utf-8") as f:
    train_data = json.load(f)
with open(TEST_PATH, "r", encoding="utf-8") as f:
    test_data = json.load(f)

# Converti in Dataset HuggingFace
train_dataset = Dataset.from_list([{"input_text": d["input"], "target_text": d["output"]} for d in train_data])
test_dataset = Dataset.from_list([{"input_text": d["input"], "target_text": d["output"]} for d in test_data])

print(train_dataset)
print(test_dataset)

Dataset({
    features: ['input_text', 'target_text'],
    num_rows: 1866
})
Dataset({
    features: ['input_text', 'target_text'],
    num_rows: 466
})


# Tokenizzazione

In [None]:
from transformers import T5Tokenizer

MODEL_NAME = "google/flan-t5-base"
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)

MAX_SOURCE_LEN = 1024
MAX_TARGET_LEN = 1024

def preprocess_data(examples):
    model_inputs = tokenizer(
        examples["input_text"],
        max_length=MAX_SOURCE_LEN,
        truncation=True,
        padding="max_length"
    )
    labels = tokenizer(
        examples["target_text"],
        max_length=MAX_TARGET_LEN,
        truncation=True,
        padding="max_length"
    )["input_ids"]
    model_inputs["labels"] = labels
    return model_inputs

train_enc = train_dataset.map(preprocess_data, batched=True, remove_columns=["input_text", "target_text"])
test_enc = test_dataset.map(preprocess_data, batched=True, remove_columns=["input_text", "target_text"])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Map:   0%|          | 0/1866 [00:00<?, ? examples/s]

Map:   0%|          | 0/466 [00:00<?, ? examples/s]

# Setup Trainer

In [None]:
from transformers import (
    T5ForConditionalGeneration,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
)

model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME)

import os
os.environ["WANDB_DISABLED"] = "true"

training_args = Seq2SeqTrainingArguments(
    output_dir=OUTPUT_DIR,
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=1000,
    logging_steps=100,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=5e-5,
    save_total_limit=2,
    predict_with_generate=True,
    generation_max_length=MAX_TARGET_LEN,  # opzionale ma consigliato
    fp16=True
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


# Avvia training

In [None]:
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_enc,
    eval_dataset=test_enc,
)

trainer.train()

Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss,Validation Loss
500,0.0,


TrainOutput(global_step=699, training_loss=0.0, metrics={'train_runtime': 2731.5416, 'train_samples_per_second': 2.049, 'train_steps_per_second': 0.256, 'total_flos': 7641892053319680.0, 'train_loss': 0.0, 'epoch': 2.990353697749196})

# Salva modello finale

In [None]:
trainer.save_model(OUTPUT_DIR)
print(f"Model saved to {OUTPUT_DIR}")

Model saved to /content/drive/MyDrive/AIScientist/FlanT5/flan_t5_step2_model


In [None]:
# =======================
# 2. Import e setup
# =======================
import json
import evaluate
from transformers import T5Tokenizer, T5ForConditionalGeneration
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from bert_score import score as bert_score
from textstat import flesch_kincaid_grade
from tqdm import tqdm

# Percorsi
MODEL_PATH = "/content/drive/MyDrive/AIScientist/FlanT5/flan_t5_step2_model"
TEST_FILE = "/content/drive/MyDrive/AIScientist/FlanT5/test_ready_trunc.json"

MAX_INPUT_TOKENS = 1024
MAX_OUTPUT_TOKENS = 1024
DEVICE = "cuda"  # usa "cpu" se non hai GPU

In [None]:
# =======================
# 3. Carica modello e tokenizer
# =======================
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model = T5ForConditionalGeneration.from_pretrained(MODEL_PATH).to(DEVICE)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [None]:
# =======================
# 4. Carica dati di test
# =======================
with open(TEST_FILE, "r", encoding="utf-8") as f:
    test_data = json.load(f)

inputs = [item["input"] for item in test_data]
references = [item["output"] for item in test_data]

In [None]:
# =======================
# 5. Generazione predizioni
# =======================
predictions = []
for text in tqdm(inputs, desc="Generating predictions"):
    enc = tokenizer(text, return_tensors="pt", max_length=MAX_INPUT_TOKENS, truncation=True).to(DEVICE)
    output_ids = model.generate(**enc, max_length=MAX_OUTPUT_TOKENS)
    decoded = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    predictions.append(decoded)

Generating predictions: 100%|██████████| 466/466 [1:03:21<00:00,  8.16s/it]


In [None]:
# =======================
# 6. Calcolo ROUGE
# =======================
rouge = evaluate.load("rouge")
rouge_results = rouge.compute(predictions=predictions, references=references)


Downloading builder script: 0.00B [00:00, ?B/s]

In [None]:
# =======================
# 7. Calcolo BLEU
# =======================
tokenized_preds = [pred.split() for pred in predictions]
tokenized_refs = [[ref.split()] for ref in references]
smoothing = SmoothingFunction().method1
bleu_score_value = corpus_bleu(tokenized_refs, tokenized_preds, smoothing_function=smoothing)


In [None]:
# =======================
# 8. Calcolo BERTScore
# =======================
P, R, F1 = bert_score(predictions, references, lang="en", rescale_with_baseline=True)


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# =======================
# 9. Calcolo FKGL (leggibilità)
# =======================
fkgl_scores = [flesch_kincaid_grade(pred) for pred in predictions]
avg_fkgl = sum(fkgl_scores) / len(fkgl_scores)


In [None]:
# =======================
# 10. Risultati finali
# =======================
print("\n===== Evaluation Results =====")
print(f"ROUGE-1: {rouge_results['rouge1']:.4f}")
print(f"ROUGE-2: {rouge_results['rouge2']:.4f}")
print(f"ROUGE-L: {rouge_results['rougeL']:.4f}")
print(f"BLEU: {bleu_score_value:.4f}")
print(f"BERTScore (F1): {F1.mean().item():.4f}")
print(f"Average FKGL: {avg_fkgl:.2f}")



===== Evaluation Results =====
ROUGE-1: 0.1306
ROUGE-2: 0.0222
ROUGE-L: 0.0926
BLEU: 0.0103
BERTScore (F1): -0.2417
Average FKGL: 16.30


In [None]:
# =======================
# 11. (Opzionale) Salva risultati su CSV
# =======================
import pandas as pd

df_results = pd.DataFrame({
    "input": inputs,
    "reference": references,
    "prediction": predictions,
    "fkgl": fkgl_scores
})
df_results.to_csv("/content/drive/MyDrive/AIScientist/FlanT5/evaluation_results.csv", index=False)
print("Detailed results saved to evaluation_results.csv")


Detailed results saved to evaluation_results.csv
