In [2]:
# Step 0: Install Required Packages
!pip install transformers datasets peft accelerate evaluate sentencepiece py7zr -q


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/487.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m487.4/487.4 kB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.9/67.9 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m92.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
# Step 1: Import Libraries and Load a Small Sample of the Dataset

from datasets import load_dataset

# Load the 'samsum' dataset which contains chat dialogues and summaries.
dataset = load_dataset("samsum")

# Let's see one sample (this is just to check how it looks)
print("A sample from the dataset:")
print(dataset["train"][0])

# We will work on a small subset for fast training.
# For example: use first 1000 samples for training and first 300 for evaluation.
small_train = dataset["train"].select(range(1000))
small_test  = dataset["test"].select(range(300))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/7.04k [00:00<?, ?B/s]

samsum.py:   0%|          | 0.00/3.36k [00:00<?, ?B/s]

The repository for samsum contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/samsum.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


corpus.7z:   0%|          | 0.00/2.94M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/14732 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/819 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/818 [00:00<?, ? examples/s]

A sample from the dataset:
{'id': '13818513', 'dialogue': "Amanda: I baked  cookies. Do you want some?\r\nJerry: Sure!\r\nAmanda: I'll bring you tomorrow :-)", 'summary': 'Amanda baked cookies and will bring Jerry some tomorrow.'}


In [4]:
# Step 2: Preprocess the Data (Tokenization)
# We convert the text into tokens that the model can understand.
# We also add the prompt "Summarize: " to help the model know what to do.
from transformers import AutoTokenizer

model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess_data(batch):
    # Create input strings with the prompt
    inputs = ["Summarize: " + d for d in batch["dialogue"]]
    # Tokenize inputs and pad to max length
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    # Tokenize the summaries (labels) and pad to max length
    labels = tokenizer(batch["summary"], max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply preprocessing in batched mode and remove original text columns
tokenized_train = small_train.map(
    preprocess_data,
    batched=True,
    remove_columns=small_train.column_names
)

tokenized_test = small_test.map(
    preprocess_data,
    batched=True,
    remove_columns=small_test.column_names
)


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

In [5]:
# Step 3: Set Up the Model and Apply PEFT (LoRA)
# We load FLAN-T5 and attach a LoRA adapter to train only a small part of the model.

from transformers import AutoModelForSeq2SeqLM
from peft import get_peft_model, LoraConfig, TaskType

# Load the base model
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Create a LoRA configuration: this tells the model which parts to train lightly.
peft_config = LoraConfig(
    r=16,                         # LoRA rank; smaller = lighter
    lora_alpha=32,                # scaling factor
    target_modules=["q", "v"],    # target the query and value matrices in T5
    lora_dropout=0.05,            # dropout for LoRA layers
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

# Apply the PEFT adapter to our model.
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()  # This will show only a small fraction of parameters are trainable.


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

trainable params: 1,769,472 || all params: 249,347,328 || trainable%: 0.7096


In [7]:
# Step 4: Set Up Training Arguments and Train with Trainer

from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq

training_args = TrainingArguments(
    output_dir="./results-peft",
    per_device_train_batch_size=8,  # small batch size for limited memory
    per_device_eval_batch_size=8,
    num_train_epochs=3,             # 1 epoch for fast training
    logging_steps=20,
    evaluation_strategy="epoch",      # Evaluate after each epoch
    save_strategy="no",             # don't save checkpoints
    report_to="none",
    remove_unused_columns=False         # Needed for PEFT sometimes
)

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=data_collator
)

# Train the model
trainer.train()


  trainer = Trainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch,Training Loss,Validation Loss
1,26.4881,24.951292
2,13.2367,9.627893
3,5.8799,4.660674


TrainOutput(global_step=375, training_loss=19.483274963378907, metrics={'train_runtime': 110.7917, 'train_samples_per_second': 27.078, 'train_steps_per_second': 3.385, 'total_flos': 2070579511296000.0, 'train_loss': 19.483274963378907, 'epoch': 3.0})

In [9]:
# Step 5: Test the Fine-Tuned Model with a Sample Dialogue

def test_model(example_idx=0):
    # Get the raw dialogue and its reference summary from our test set.
    dialogue = small_test[example_idx]["dialogue"]
    reference = small_test[example_idx]["summary"]

    # Prepare the input with our prompt
    input_str = "Summarize: " + dialogue
    inputs = tokenizer(input_str, return_tensors="pt", truncation=True, padding="max_length").to(model.device)

    # Generate the model's summary
    output = model.generate(**inputs, max_new_tokens=100)
    prediction = tokenizer.decode(output[0], skip_special_tokens=True)

    print("📜 Original Dialogue:\n", dialogue)
    print("\n✅ Reference Summary:\n", reference)
    print("\n🤖 Model (PEFT) Summary:\n", prediction)

# Test on the first example in the small test set
test_model(0)


📜 Original Dialogue:
 Hannah: Hey, do you have Betty's number?
Amanda: Lemme check
Hannah: <file_gif>
Amanda: Sorry, can't find it.
Amanda: Ask Larry
Amanda: He called her last time we were at the park together
Hannah: I don't know him well
Hannah: <file_gif>
Amanda: Don't be shy, he's very nice
Hannah: If you say so..
Hannah: I'd rather you texted him
Amanda: Just text him 🙂
Hannah: Urgh.. Alright
Hannah: Bye
Amanda: Bye bye

✅ Reference Summary:
 Hannah needs Betty's number but Amanda doesn't have it. She needs to contact Larry.

🤖 Model (PEFT) Summary:
 Hannah can't find Betty's number.


 ROUGE on the Model

In [12]:
!pip install evaluate -q
!pip install rouge_score -q


  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone


In [13]:
import evaluate
rouge = evaluate.load("rouge")


In [14]:
# Create a Function to Generate and Evaluate Summaries

def compute_rouge(model, tokenizer, dataset, num_samples=100):
    predictions = []
    references = []

    for i in range(num_samples):
        dialogue = dataset[i]["dialogue"]
        reference = dataset[i]["summary"]

        # Prepare input
        input_text = "Summarize: " + dialogue
        inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding="max_length").to(model.device)

        # Generate prediction
        outputs = model.generate(**inputs, max_new_tokens=100)
        prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)

        predictions.append(prediction)
        references.append(reference)

    # Compute ROUGE scores
    results = rouge.compute(predictions=predictions, references=references)
    return results


In [20]:
# Run ROUGE on Your Test Set

# Use 100 samples from test set (or less if needed)
rouge_results = compute_rouge(model, tokenizer, small_test, num_samples=100)

# Print the results nicely
for key, value in rouge_results.items():
    print(f"{key}: {value:.4f}")


rouge1: 0.3630
rouge2: 0.1508
rougeL: 0.3102
rougeLsum: 0.3097
