# FLAN-T5 Full Fine-Tuning on BioLaySumm Dataset

**Author:** Nathan Chung  
**Course:** COMP3710 Pattern Analysis  
**Task:** Expert-to-Layperson Radiology Report Translation  
**Model:** T5-small Full Fine-Tuning (60M parameters)

This notebook implements full fine-tuning of T5-small on the BioLaySumm dataset for translating expert radiology reports into layperson-friendly language.

---


## 1. Setup and Installation

Install required packages and set up the environment.


In [None]:
# Install required packages
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
%pip install transformers datasets accelerate evaluate peft rouge-score
%pip install pyyaml tqdm

print("✅ All packages installed successfully!")


In [None]:
# Import required libraries
import os
import json
import yaml
import torch
import numpy as np
from pathlib import Path
from typing import Dict, Any, List, Optional
from tqdm.auto import tqdm

# HuggingFace libraries
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    DataCollatorForSeq2Seq,
    GenerationConfig
)
from datasets import Dataset, load_dataset
import evaluate

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")


## 2. Configuration

Set up configuration for full fine-tuning.


In [None]:
# Configuration for full fine-tuning
config = {
    # Dataset Configuration
    'dataset': {
        'name': 'BioLaySumm/BioLaySumm2025-LaymanRRG-opensource-track',
        'max_source_length': 256,
        'max_target_length': 128,
        'seed': 42
    },
    
    # Model Configuration
    'model': {
        'name': 't5-small',  # T5-small for full fine-tuning
        'torch_dtype': 'bfloat16'
    },
    
    # Training Configuration
    'training': {
        'strategy': 'full',
        'batch_size': 4,
        'gradient_accumulation_steps': 4,  # Effective batch size = 16
        'learning_rate': 5e-5,  # Lower LR for full fine-tuning
        'num_epochs': 3,
        'warmup_steps': 500,
        'weight_decay': 0.01,
        'max_grad_norm': 1.0,
        'eval_steps': 1000,
        'save_steps': 1000,
        'logging_steps': 100,
        'eval_strategy': 'steps',
        'save_strategy': 'steps',
        'load_best_model_at_end': True,
        'report_to': 'none',
        'seed': 42
    },
    
    # Output Configuration
    'output': {
        'root': '/content/outputs',
        'run_name': 't5-small-full-finetuning'
    },
    
    # Evaluation Configuration
    'evaluation': {
        'max_new_tokens': 128,
        'num_beams': 4,
        'length_penalty': 0.6,
        'no_repeat_ngram_size': 3,
        'early_stopping': True
    }
}

print("✅ Configuration set up successfully!")
print(f"Model: {config['model']['name']}")
print(f"Strategy: {config['training']['strategy']}")
print(f"Batch size: {config['training']['batch_size']}")
print(f"Learning rate: {config['training']['learning_rate']}")


## 3. Dataset Loading and Model Setup

Load the BioLaySumm dataset and T5-small model for full fine-tuning.


In [None]:
# Load dataset and model
print("Loading BioLaySumm dataset...")
dataset = load_dataset(config['dataset']['name'], trust_remote_code=False)

print(f"Dataset loaded! Train: {len(dataset['train']):,}, Val: {len(dataset['validation']):,}, Test: {len(dataset['test']):,}")

# Check dataset columns
print("\nDataset columns:")
print(f"Train columns: {dataset['train'].column_names}")
print(f"Sample data:")
sample = dataset['train'][0]
for key, value in sample.items():
    print(f"  {key}: {str(value)[:100]}...")

# Load model and tokenizer
model_name = config['model']['name']
print(f"\nLoading {model_name} model...")

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=False)
model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map=None
).to(device)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"✅ Model loaded!")
print(f"Total parameters: {total_params:,} ({total_params/1e6:.1f}M)")
print(f"Trainable parameters: {trainable_params:,} ({trainable_params/1e6:.1f}M)")
print(f"Trainable percentage: {(trainable_params/total_params)*100:.1f}%")


## 4. Dataset Processing

Apply expert-to-layperson prompts and tokenize the datasets.


In [None]:
# Smart function to detect correct column names and apply prompts
def apply_prompts(examples):
    input_texts = []
    target_texts = []
    
    # Auto-detect column names
    expert_col = None
    layman_col = None
    
    # Try different possible column names
    possible_expert_cols = ['expert_report', 'radiology_report', 'medical_report', 'report', 'source']
    possible_layman_cols = ['layman_report', 'layman_summary', 'summary', 'lay_summary', 'target']
    
    for col in possible_expert_cols:
        if col in examples:
            expert_col = col
            break
    
    for col in possible_layman_cols:
        if col in examples:
            layman_col = col
            break
    
    if expert_col is None or layman_col is None:
        print(f"Available columns: {list(examples.keys())}")
        raise ValueError(f"Could not find expert column from {possible_expert_cols} or layman column from {possible_layman_cols}")
    
    print(f"Using columns: expert='{expert_col}', layman='{layman_col}'")
    
    for expert_report, layman_report in zip(examples[expert_col], examples[layman_col]):
        prompt = f"Translate this medical report into simple, easy-to-understand language for patients:\\n\\n{expert_report}"
        input_texts.append(prompt)
        target_texts.append(layman_report)
    
    return {'input_text': input_texts, 'target_text': target_texts}

def preprocess_function(examples):
    max_source_length = config['dataset']['max_source_length']
    max_target_length = config['dataset']['max_target_length']
    
    model_inputs = tokenizer(
        examples['input_text'],
        max_length=max_source_length,
        truncation=True,
        padding=False
    )
    
    labels = tokenizer(
        examples['target_text'],
        max_length=max_target_length,
        truncation=True,
        padding=False
    )
    
    model_inputs['labels'] = labels['input_ids']
    return model_inputs


In [None]:
# Process datasets
print("Processing datasets...")
train_dataset = dataset['train'].map(apply_prompts, batched=True, remove_columns=dataset['train'].column_names)
val_dataset = dataset['validation'].map(apply_prompts, batched=True, remove_columns=dataset['validation'].column_names)
test_dataset = dataset['test'].map(apply_prompts, batched=True, remove_columns=dataset['test'].column_names)

# Tokenize
print("Tokenizing datasets...")
tokenized_train = train_dataset.map(preprocess_function, batched=True, remove_columns=train_dataset.column_names)
tokenized_val = val_dataset.map(preprocess_function, batched=True, remove_columns=val_dataset.column_names)

print(f"✅ Datasets processed!")
print(f"Tokenized train: {len(tokenized_train):,}")
print(f"Tokenized validation: {len(tokenized_val):,}")

# Show processed sample
print("\nProcessed sample:")
sample = tokenized_train[0]
print(f"Input IDs length: {len(sample['input_ids'])}")
print(f"Labels length: {len(sample['labels'])}")
print(f"Sample input: {tokenizer.decode(sample['input_ids'][:50])}...")
print(f"Sample target: {tokenizer.decode(sample['labels'][:30])}...")


## 5. Training Setup and Execution

Set up training arguments and start training.


In [None]:
# Setup training
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model, padding=True)
rouge = evaluate.load("rouge")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    return {k: round(v * 100, 4) for k, v in result.items()}

# Create output directory
output_dir = Path(config['output']['root']) / config['output']['run_name']
output_dir.mkdir(parents=True, exist_ok=True)

# Training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir=str(output_dir),
    num_train_epochs=config['training']['num_epochs'],
    per_device_train_batch_size=config['training']['batch_size'],
    per_device_eval_batch_size=config['training']['batch_size'],
    gradient_accumulation_steps=config['training']['gradient_accumulation_steps'],
    learning_rate=config['training']['learning_rate'],
    weight_decay=config['training']['weight_decay'],
    max_grad_norm=config['training']['max_grad_norm'],
    warmup_steps=config['training']['warmup_steps'],
    eval_strategy=config['training']['eval_strategy'],
    eval_steps=config['training']['eval_steps'],
    save_strategy=config['training']['save_strategy'],
    save_steps=config['training']['save_steps'],
    load_best_model_at_end=config['training']['load_best_model_at_end'],
    logging_steps=config['training']['logging_steps'],
    report_to=config['training']['report_to'],
    seed=config['training']['seed'],
    bf16=True,
    remove_unused_columns=False,
    save_total_limit=3,
    metric_for_best_model="rouge1",
    greater_is_better=True
)

# Create trainer
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    processing_class=tokenizer
)

print("✅ Training setup complete!")
print(f"Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}")
print(f"Output directory: {training_args.output_dir}")


In [None]:
# Start training
print("🚀 Starting full fine-tuning training...")
print(f"Model: {model_name}")
print(f"Strategy: Full fine-tuning (100% parameters trainable)")
print(f"Total parameters: {total_params:,} ({total_params/1e6:.1f}M)")
print(f"Trainable parameters: {trainable_params:,} ({trainable_params/1e6:.1f}M)")

# Train the model
train_results = trainer.train()

print("\n✅ Training completed successfully!")
print(f"Final train loss: {train_results.training_loss:.4f}")
print(f"Training time: {train_results.metrics['train_runtime']:.2f} seconds")
print(f"Training samples per second: {train_results.metrics['train_samples_per_second']:.2f}")


## 6. Evaluation and Sample Predictions

Evaluate the trained model on the test set and generate sample predictions.


In [None]:
# Evaluate on test set
print("🔍 Evaluating model on test set...")

# Tokenize test set
tokenized_test = test_dataset.map(preprocess_function, batched=True, remove_columns=test_dataset.column_names)
trainer.eval_dataset = tokenized_test

# Run evaluation
eval_results = trainer.evaluate()

print("\n📊 Test Set Evaluation Results:")
print("=" * 50)
for metric, value in eval_results.items():
    if 'rouge' in metric:
        print(f"{metric}: {value:.4f}")
    else:
        print(f"{metric}: {value}")
print("=" * 50)

# Generate sample predictions
print("\n🎯 Sample Predictions:")
test_samples = tokenized_test.select(range(3))
predictions = trainer.predict(test_samples)

decoded_preds = tokenizer.batch_decode(predictions.predictions, skip_special_tokens=True)
decoded_labels = tokenizer.batch_decode(predictions.label_ids, skip_special_tokens=True)

for i in range(len(decoded_preds)):
    print(f"\nSample {i+1}:")
    print(f"Prediction: {decoded_preds[i]}")
    print(f"Reference:  {decoded_labels[i]}")
    print("-" * 80)


## 7. Save Model and Results

Save the trained model and results for future use.


In [None]:
# Save the trained model
print("💾 Saving trained model...")

model_save_path = output_dir / "final_model"
model_save_path.mkdir(exist_ok=True)

model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)

# Save configuration and results
with open(model_save_path / "training_config.json", 'w') as f:
    json.dump(config, f, indent=2)

with open(model_save_path / "evaluation_results.json", 'w') as f:
    json.dump(eval_results, f, indent=2)

print(f"✅ Model saved to: {model_save_path}")

# Final summary
print("\n🎉 Training Complete!")
print("=" * 50)
print(f"Model: {model_name}")
print(f"Strategy: Full fine-tuning")
print(f"Parameters: {total_params:,} ({total_params/1e6:.1f}M)")
print(f"Trainable: {trainable_params:,} ({trainable_params/1e6:.1f}M)")
print(f"ROUGE-1: {eval_results.get('eval_rouge1', 'N/A'):.4f}")
print(f"ROUGE-2: {eval_results.get('eval_rouge2', 'N/A'):.4f}")
print(f"ROUGE-L: {eval_results.get('eval_rougeL', 'N/A'):.4f}")
print("=" * 50)
