In [1]:
# Install required packages
!pip install optuna transformers peft datasets accelerate --quiet
!pip install sentencepiece --quiet

# Imports
import optuna
import torch
import numpy as np
from datasets import load_dataset
from transformers import (
    T5ForConditionalGeneration,
    T5Tokenizer,
    TrainingArguments,
    Trainer
)
from peft import get_peft_model, LoraConfig, TaskType
from sklearn.metrics import f1_score
import random
import os

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU in use:", torch.cuda.get_device_name(0))
else:
    print("Running on CPU")

# Load dataset
dataset = load_dataset("cnn_dailymail", "3.0.0", split="train[:80%]")
tokenizer = T5Tokenizer.from_pretrained("t5-small")

# Preprocessing function
def preprocess(example):
    input_text = "summarize: " + example["article"]
    target_text = example["highlights"]
    inputs = tokenizer(input_text, max_length=512, truncation=True, padding="max_length", return_tensors="pt")
    targets = tokenizer(target_text, max_length=128, truncation=True, padding="max_length", return_tensors="pt")
    return {
        "input_ids": inputs["input_ids"].squeeze(),
        "attention_mask": inputs["attention_mask"].squeeze(),
        "labels": targets["input_ids"].squeeze()
    }

# Apply preprocessing
processed_dataset = dataset.map(preprocess, remove_columns=dataset.column_names)

# Objective function for Optuna
def objective(trial):
    # Set random seed
    seed = trial.suggest_int("seed", 1, 9999)
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    # Hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 5e-5)
    r = trial.suggest_int("lora_r", 4, 16)
    lora_alpha = trial.suggest_int("lora_alpha", 8, 32)

    # Load model
    model = T5ForConditionalGeneration.from_pretrained("t5-small")
    model = model.to(device)

    # Apply LoRA
    peft_config = LoraConfig(
        r=r,
        lora_alpha=lora_alpha,
        target_modules=["q", "v"],
        lora_dropout=0.1,
        bias="none",
        task_type=TaskType.SEQ_2_SEQ_LM
    )
    model = get_peft_model(model, peft_config)
    model = model.to(device)

    # Training arguments
    args = TrainingArguments(
        output_dir="./results",
        num_train_epochs=1,
        per_device_train_batch_size=4,
        learning_rate=learning_rate,
        save_strategy="no",
        logging_dir="./logs",
        report_to="none"
    )

    # Trainer
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=processed_dataset.shuffle(seed=seed).select(range(100)),
        eval_dataset=processed_dataset.shuffle(seed=seed+1).select(range(20)),
    )

    trainer.train()
    eval_loss = trainer.evaluate()["eval_loss"]
    return eval_loss

# Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)

print("Best Hyperparameters:", study.best_params)
print("Best Validation Loss:", study.best_value)

# Save the best model
best_config = LoraConfig(
    r=study.best_params["lora_r"],
    lora_alpha=study.best_params["lora_alpha"],
    target_modules=["q", "v"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")
model = get_peft_model(model, best_config)
model = model.to(device)

model.save_pretrained("./best_t5_model")
tokenizer.save_pretrained("./best_t5_model")

# Zip and download
!zip -r best_t5_model.zip ./best_t5_model
from google.colab import files
files.download("best_t5_model.zip")

CUDA available: True
GPU in use: Tesla T4


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
[I 2025-05-06 19:28:21,428] A new study created in memory with name: no-name-f49e97f9-42d1-4c86-84c8-79ddb4b5e0b7
No label_nam

Step,Training Loss


[I 2025-05-06 19:28:29,919] Trial 0 finished with value: 9.217378616333008 and parameters: {'seed': 3584, 'learning_rate': 4.9325833832067384e-05, 'lora_r': 12, 'lora_alpha': 31}. Best is trial 0 with value: 9.217378616333008.
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


[I 2025-05-06 19:28:35,290] Trial 1 finished with value: 11.07805347442627 and parameters: {'seed': 6468, 'learning_rate': 4.887747378278119e-05, 'lora_r': 7, 'lora_alpha': 25}. Best is trial 0 with value: 9.217378616333008.
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


[I 2025-05-06 19:28:40,503] Trial 2 finished with value: 10.467657089233398 and parameters: {'seed': 6605, 'learning_rate': 2.1542714073893407e-05, 'lora_r': 9, 'lora_alpha': 18}. Best is trial 0 with value: 9.217378616333008.
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


[I 2025-05-06 19:28:45,742] Trial 3 finished with value: 11.589811325073242 and parameters: {'seed': 6939, 'learning_rate': 1.2365870475692727e-05, 'lora_r': 6, 'lora_alpha': 24}. Best is trial 0 with value: 9.217378616333008.
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


[I 2025-05-06 19:28:50,812] Trial 4 finished with value: 9.87341022491455 and parameters: {'seed': 827, 'learning_rate': 1.94858032914352e-05, 'lora_r': 6, 'lora_alpha': 30}. Best is trial 0 with value: 9.217378616333008.
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


[I 2025-05-06 19:28:55,807] Trial 5 finished with value: 9.8447904586792 and parameters: {'seed': 4555, 'learning_rate': 2.1506342423968602e-05, 'lora_r': 13, 'lora_alpha': 19}. Best is trial 0 with value: 9.217378616333008.
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


[I 2025-05-06 19:29:00,970] Trial 6 finished with value: 9.938972473144531 and parameters: {'seed': 5308, 'learning_rate': 3.200473280926155e-05, 'lora_r': 4, 'lora_alpha': 11}. Best is trial 0 with value: 9.217378616333008.
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


[I 2025-05-06 19:29:05,961] Trial 7 finished with value: 9.457006454467773 and parameters: {'seed': 1765, 'learning_rate': 3.506456280927527e-05, 'lora_r': 11, 'lora_alpha': 22}. Best is trial 0 with value: 9.217378616333008.
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


[I 2025-05-06 19:29:11,099] Trial 8 finished with value: 9.46103286743164 and parameters: {'seed': 4674, 'learning_rate': 4.702884324084798e-05, 'lora_r': 5, 'lora_alpha': 18}. Best is trial 0 with value: 9.217378616333008.
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


[I 2025-05-06 19:29:16,128] Trial 9 finished with value: 10.000483512878418 and parameters: {'seed': 2485, 'learning_rate': 2.986236480743948e-05, 'lora_r': 6, 'lora_alpha': 27}. Best is trial 0 with value: 9.217378616333008.


Best Hyperparameters: {'seed': 3584, 'learning_rate': 4.9325833832067384e-05, 'lora_r': 12, 'lora_alpha': 31}
Best Validation Loss: 9.217378616333008
updating: best_t5_model/ (stored 0%)
updating: best_t5_model/spiece.model (deflated 48%)
updating: best_t5_model/special_tokens_map.json (deflated 85%)
updating: best_t5_model/adapter_config.json (deflated 55%)
updating: best_t5_model/tokenizer_config.json (deflated 94%)
updating: best_t5_model/README.md (deflated 66%)
updating: best_t5_model/adapter_model.safetensors (deflated 54%)
updating: best_t5_model/added_tokens.json (deflated 83%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [2]:
# Install required packages
!pip install optuna transformers peft datasets accelerate --quiet
!pip install sentencepiece --quiet

# Imports
import optuna
import torch
import numpy as np
from datasets import load_dataset
from transformers import (
    T5ForConditionalGeneration,
    T5Tokenizer,
    TrainingArguments,
    Trainer
)
from peft import get_peft_model, LoraConfig, TaskType
from sklearn.metrics import f1_score
import random
import os

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU in use:", torch.cuda.get_device_name(0))
else:
    print("Running on CPU")

# Load dataset
dataset = load_dataset("cnn_dailymail", "3.0.0", split="train[:80%]")
tokenizer = T5Tokenizer.from_pretrained("t5-small")

# Preprocessing function
def preprocess(example):
    input_text = "summarize: " + example["article"]
    target_text = example["highlights"]
    inputs = tokenizer(input_text, max_length=512, truncation=True, padding="max_length", return_tensors="pt")
    targets = tokenizer(target_text, max_length=128, truncation=True, padding="max_length", return_tensors="pt")
    return {
        "input_ids": inputs["input_ids"].squeeze(),
        "attention_mask": inputs["attention_mask"].squeeze(),
        "labels": targets["input_ids"].squeeze()
    }

# Apply preprocessing
processed_dataset = dataset.map(preprocess, remove_columns=dataset.column_names)

# Objective function for Optuna
def objective(trial):
    # Set random seed
    seed = trial.suggest_int("seed", 1, 9999)
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    # Hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 5e-5)
    r = trial.suggest_int("lora_r", 4, 16)
    lora_alpha = trial.suggest_int("lora_alpha", 8, 32)

    # Load model
    model = T5ForConditionalGeneration.from_pretrained("t5-small")
    model = model.to(device)

    # Apply LoRA
    peft_config = LoraConfig(
        r=r,
        lora_alpha=lora_alpha,
        target_modules=["q", "v"],
        lora_dropout=0.1,
        bias="none",
        task_type=TaskType.SEQ_2_SEQ_LM
    )
    model = get_peft_model(model, peft_config)
    model = model.to(device)

    # Training arguments
    args = TrainingArguments(
        output_dir="./results",
        num_train_epochs=1,
        per_device_train_batch_size=4,
        learning_rate=learning_rate,
        save_strategy="no",
        logging_dir="./logs",
        report_to="none"
    )

    # Trainer
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=processed_dataset.shuffle(seed=seed).select(range(100)),
        eval_dataset=processed_dataset.shuffle(seed=seed+1).select(range(20)),
    )

    trainer.train()
    eval_loss = trainer.evaluate()["eval_loss"]
    return eval_loss

# Run Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)

print("Best Hyperparameters:", study.best_params)
print("Best Validation Loss:", study.best_value)

# Save the best model
best_config = LoraConfig(
    r=study.best_params["lora_r"],
    lora_alpha=study.best_params["lora_alpha"],
    target_modules=["q", "v"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")
model = get_peft_model(model, best_config)
model = model.to(device)

model.save_pretrained("./best_t5_model")
tokenizer.save_pretrained("./best_t5_model")

# Zip and download
!zip -r best_t5_model.zip ./best_t5_model
from google.colab import files
files.download("best_t5_model.zip")

!pip install evaluate --quiet
!pip install rouge_score
import evaluate

rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    vocab_size = tokenizer.vocab_size

    def flatten_and_clean(lst):
        flat = []
        for token in lst:
            if isinstance(token, (list, np.ndarray, torch.Tensor)):
                flat.extend(flatten_and_clean(token))
            else:
                try:
                    token = int(token)
                    if 0 <= token < vocab_size:
                        flat.append(token)
                except:
                    continue
        return flat

    cleaned_preds = [flatten_and_clean(p) for p in predictions]
    cleaned_labels = [flatten_and_clean(l) for l in labels]

    # Decode safely
    decoded_preds = tokenizer.batch_decode(cleaned_preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(cleaned_labels, skip_special_tokens=True)

    # Trim both lists to same length (smallest of the two)
    n = min(len(decoded_preds), len(decoded_labels))
    decoded_preds = decoded_preds[:n]
    decoded_labels = decoded_labels[:n]

    decoded_preds = [pred.strip() for pred in decoded_preds]
    decoded_labels = [label.strip() for label in decoded_labels]

    # Compute metrics
    rouge_result = rouge.compute(predictions=decoded_preds, references=decoded_labels)
    bleu_result = bleu.compute(predictions=decoded_preds, references=[[label] for label in decoded_labels])

    return {
        "rouge1": rouge_result["rouge1"],
        "rouge2": rouge_result["rouge2"],
        "rougeL": rouge_result["rougeL"],
        "bleu": bleu_result["bleu"]
    }
print("Best Hyperparameters:", study.best_params)
print("Best Validation Loss:", study.best_value)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    vocab_size = tokenizer.vocab_size

    def flatten_and_clean(lst):
        flat = []
        for token in lst:
            if isinstance(token, (list, np.ndarray, torch.Tensor)):
                flat.extend(flatten_and_clean(token))
            else:
                try:
                    token = int(token)
                    if 0 <= token < vocab_size:
                        flat.append(token)
                except:
                    continue
        return flat

    cleaned_preds = [flatten_and_clean(p) for p in predictions]
    cleaned_labels = [flatten_and_clean(l) for l in labels]

    # Decode safely
    decoded_preds = tokenizer.batch_decode(cleaned_preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(cleaned_labels, skip_special_tokens=True)

    # Trim both lists to same length (smallest of the two)
    n = min(len(decoded_preds), len(decoded_labels))
    decoded_preds = decoded_preds[:n]
    decoded_labels = decoded_labels[:n]

    decoded_preds = [pred.strip() for pred in decoded_preds]
    decoded_labels = [label.strip() for label in decoded_labels]

    # Compute metrics
    rouge_result = rouge.compute(predictions=decoded_preds, references=decoded_labels)
    bleu_result = bleu.compute(predictions=decoded_preds, references=[[label] for label in decoded_labels])

    return {
        "rouge1": rouge_result["rouge1"],
        "rouge2": rouge_result["rouge2"],
        "rougeL": rouge_result["rougeL"],
        "bleu": bleu_result["bleu"]
    }
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    learning_rate=4.528513019349172e-05,  # from Optuna trial
    save_strategy="no",
    logging_dir="./logs",
    report_to="none"
)
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=processed_dataset.shuffle(seed=6680).select(range(100)),
    eval_dataset=processed_dataset.shuffle(seed=6680 + 1).select(range(20)),
    compute_metrics=compute_metrics
)
import time

start = time.time()
trainer.train()
end = time.time()

train_time = end - start
print(f"Training time: {train_time:.2f} seconds")

if torch.cuda.is_available():
    max_memory = torch.cuda.max_memory_allocated() / 1e6  # convert bytes to MB
    print(f"Max GPU memory used: {max_memory:.2f} MB")
else:
    max_memory = 0
    print("CUDA not available. Skipping memory tracking.")

!pip install -U transformers --quiet

# Objective function
def objective(trial):
    # Sample hyperparameters
    seed = trial.suggest_int("seed", 1, 9999)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 5e-5)
    r = trial.suggest_int("lora_r", 4, 16)
    lora_alpha = trial.suggest_int("lora_alpha", 8, 32)

    # Set seeds
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    # Load model
    model = T5ForConditionalGeneration.from_pretrained("t5-small")

    # Apply LoRA
    peft_config = LoraConfig(
        r=r,
        lora_alpha=lora_alpha,
        target_modules=["q", "v"],
        lora_dropout=0.1,
        bias="none",
        task_type=TaskType.SEQ_2_SEQ_LM
    )
    model = get_peft_model(model, peft_config)

    args = TrainingArguments(
        output_dir="./results",
        num_train_epochs=1,
        per_device_train_batch_size=4,
        learning_rate=learning_rate,
        save_strategy="no",
        logging_dir="./logs",
        report_to="none"
    )



    # Prepare trainer
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=processed_dataset.shuffle(seed=seed).select(range(100)),
        eval_dataset=processed_dataset.shuffle(seed=seed + 1).select(range(20)),
        compute_metrics=compute_metrics
    )

    # Track time
    start = time.time()
    trainer.train()
    end = time.time()
    train_time = end - start
    print(f"Training time: {train_time:.2f} seconds")

    # GPU usage
    if torch.cuda.is_available():
        max_memory = torch.cuda.max_memory_allocated() / 1e6
        print(f"Max GPU memory used: {max_memory:.2f} MB")
    else:
        max_memory = 0
        print("CUDA not available.")

    # Evaluate
    eval_metrics = trainer.evaluate()
    eval_loss = eval_metrics["eval_loss"]

    # Scalarized loss (tune lambda if needed)
    lambda_weight = 0.0001
    scalarized_objective = eval_loss + lambda_weight * train_time

    print(f"Eval Loss: {eval_loss:.4f}, Time: {train_time:.2f}s, Scalarized: {scalarized_objective:.4f}")
    return scalarized_objective

# Run optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)

# Output best results
print("Best Hyperparameters:", study.best_params)
print("Best Validation Loss:", study.best_value)

import shutil
shutil.make_archive('best_model_lora', 'zip', 'best_model_lora')

from google.colab import files
files.download('best_model_lora.zip')

# Install required packages
!pip install optuna transformers peft datasets accelerate --quiet
!pip install sentencepiece --quiet

from transformers import T5Tokenizer, T5ForConditionalGeneration
from peft import PeftModel, PeftConfig

# Load base model and tokenizer
base_model = T5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("best_model_lora")

# Load fine-tuned LoRA adapter
model = PeftModel.from_pretrained(base_model, "best_model_lora")
model.eval()

# Example input
text = "summarize: The US economy is facing challenges due to inflation and rate hikes."
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)

# Generate summary
outputs = model.generate(**inputs, max_length=50, num_beams=4, early_stopping=True)
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("📝 Summary:", summary)

In [3]:
!pip install rouge_score
import evaluate

rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")



In [5]:
print("Best Hyperparameters:", study.best_params)
print("Best Validation Loss:", study.best_value)

Best Hyperparameters: {'seed': 3584, 'learning_rate': 4.9325833832067384e-05, 'lora_r': 12, 'lora_alpha': 31}
Best Validation Loss: 9.217378616333008


In [6]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    learning_rate=4.528513019349172e-05,  # from Optuna trial
    save_strategy="no",
    logging_dir="./logs",
    report_to="none"
)


In [7]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=processed_dataset.shuffle(seed=6680).select(range(100)),
    eval_dataset=processed_dataset.shuffle(seed=6680 + 1).select(range(20)),
    compute_metrics=compute_metrics
)


No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [8]:
import time

start = time.time()
trainer.train()
end = time.time()

train_time = end - start
print(f"Training time: {train_time:.2f} seconds")

Step,Training Loss


Training time: 6.80 seconds


In [9]:
if torch.cuda.is_available():
    max_memory = torch.cuda.max_memory_allocated() / 1e6  # convert bytes to MB
    print(f"Max GPU memory used: {max_memory:.2f} MB")
else:
    max_memory = 0
    print("CUDA not available. Skipping memory tracking.")

Max GPU memory used: 1568.94 MB


In [10]:
!pip install -U transformers --quiet

In [15]:
# Objective function
def objective(trial):
    # Sample hyperparameters
    seed = trial.suggest_int("seed", 1, 9999)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 5e-5)
    r = trial.suggest_int("lora_r", 4, 16)
    lora_alpha = trial.suggest_int("lora_alpha", 8, 32)

    # Set seeds
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    # Load model
    model = T5ForConditionalGeneration.from_pretrained("t5-small")

    # Apply LoRA
    peft_config = LoraConfig(
        r=r,
        lora_alpha=lora_alpha,
        target_modules=["q", "v"],
        lora_dropout=0.1,
        bias="none",
        task_type=TaskType.SEQ_2_SEQ_LM
    )
    model = get_peft_model(model, peft_config)

    args = TrainingArguments(
        output_dir="./results",
        num_train_epochs=1,
        per_device_train_batch_size=4,
        learning_rate=learning_rate,
        save_strategy="no",
        logging_dir="./logs",
        report_to="none"
    )



    # Prepare trainer
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=processed_dataset.shuffle(seed=seed).select(range(100)),
        eval_dataset=processed_dataset.shuffle(seed=seed + 1).select(range(20)),
        compute_metrics=compute_metrics
    )

    # Track time
    start = time.time()
    trainer.train()
    end = time.time()
    train_time = end - start
    print(f"Training time: {train_time:.2f} seconds")

    # GPU usage
    if torch.cuda.is_available():
        max_memory = torch.cuda.max_memory_allocated() / 1e6
        print(f"Max GPU memory used: {max_memory:.2f} MB")
    else:
        max_memory = 0
        print("CUDA not available.")

    # Evaluate
    eval_metrics = trainer.evaluate()
    eval_loss = eval_metrics["eval_loss"]

    # Scalarized loss (tune lambda if needed)
    lambda_weight = 0.0001
    scalarized_objective = eval_loss + lambda_weight * train_time

    print(f"Eval Loss: {eval_loss:.4f}, Time: {train_time:.2f}s, Scalarized: {scalarized_objective:.4f}")
    return scalarized_objective

# Run optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)

# Output best results
print("Best Hyperparameters:", study.best_params)
print("Best Validation Loss:", study.best_value)

[I 2025-05-06 19:41:31,464] A new study created in memory with name: no-name-109b959a-bfc6-458c-8395-9a875bef74d5
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


Training time: 7.84 seconds
Max GPU memory used: 2131.42 MB


[I 2025-05-06 19:46:36,837] Trial 0 finished with value: 10.985109649920464 and parameters: {'seed': 8749, 'learning_rate': 1.7667252438765223e-05, 'lora_r': 13, 'lora_alpha': 20}. Best is trial 0 with value: 10.985109649920464.


Eval Loss: 10.9843, Time: 7.84s, Scalarized: 10.9851


No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


Training time: 4.28 seconds
Max GPU memory used: 2131.42 MB


[I 2025-05-06 19:51:38,428] Trial 1 finished with value: 9.44226360039711 and parameters: {'seed': 2287, 'learning_rate': 2.6406374650900043e-05, 'lora_r': 8, 'lora_alpha': 27}. Best is trial 1 with value: 9.44226360039711.


Eval Loss: 9.4418, Time: 4.28s, Scalarized: 9.4423


No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


Training time: 4.26 seconds
Max GPU memory used: 2131.42 MB


[I 2025-05-06 19:56:38,908] Trial 2 finished with value: 10.336676637601853 and parameters: {'seed': 6525, 'learning_rate': 3.487395447154521e-05, 'lora_r': 5, 'lora_alpha': 19}. Best is trial 1 with value: 9.44226360039711.


Eval Loss: 10.3363, Time: 4.26s, Scalarized: 10.3367


No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


Training time: 4.37 seconds
Max GPU memory used: 2131.42 MB


[I 2025-05-06 20:01:39,559] Trial 3 finished with value: 10.139937579584121 and parameters: {'seed': 2075, 'learning_rate': 1.5638347967361974e-05, 'lora_r': 4, 'lora_alpha': 15}. Best is trial 1 with value: 9.44226360039711.


Eval Loss: 10.1395, Time: 4.37s, Scalarized: 10.1399


No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


Training time: 4.18 seconds
Max GPU memory used: 2131.42 MB


[I 2025-05-06 20:06:41,837] Trial 4 finished with value: 10.555175150108337 and parameters: {'seed': 6728, 'learning_rate': 1.141627876174082e-05, 'lora_r': 6, 'lora_alpha': 13}. Best is trial 1 with value: 9.44226360039711.


Eval Loss: 10.5548, Time: 4.18s, Scalarized: 10.5552


No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


Training time: 4.16 seconds
Max GPU memory used: 2131.42 MB


[I 2025-05-06 20:11:42,411] Trial 5 finished with value: 10.32590690703392 and parameters: {'seed': 5428, 'learning_rate': 2.157256246229599e-05, 'lora_r': 12, 'lora_alpha': 17}. Best is trial 1 with value: 9.44226360039711.


Eval Loss: 10.3255, Time: 4.16s, Scalarized: 10.3259


No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


Training time: 4.35 seconds
Max GPU memory used: 2131.42 MB


[I 2025-05-06 20:16:45,145] Trial 6 finished with value: 10.759498871541023 and parameters: {'seed': 6258, 'learning_rate': 3.061262870352042e-05, 'lora_r': 13, 'lora_alpha': 16}. Best is trial 1 with value: 9.44226360039711.


Eval Loss: 10.7591, Time: 4.35s, Scalarized: 10.7595


No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


Training time: 4.24 seconds
Max GPU memory used: 2131.42 MB


[I 2025-05-06 20:21:54,618] Trial 7 finished with value: 12.029436410546303 and parameters: {'seed': 274, 'learning_rate': 3.6939998773275036e-05, 'lora_r': 5, 'lora_alpha': 26}. Best is trial 1 with value: 9.44226360039711.


Eval Loss: 12.0290, Time: 4.24s, Scalarized: 12.0294


No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


Training time: 4.46 seconds
Max GPU memory used: 2131.42 MB


[I 2025-05-06 20:26:57,346] Trial 8 finished with value: 10.599328813099861 and parameters: {'seed': 4529, 'learning_rate': 1.874338483543256e-05, 'lora_r': 8, 'lora_alpha': 28}. Best is trial 1 with value: 9.44226360039711.


Eval Loss: 10.5989, Time: 4.46s, Scalarized: 10.5993


No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss


Training time: 4.28 seconds
Max GPU memory used: 2133.30 MB


[I 2025-05-06 20:31:58,646] Trial 9 finished with value: 11.335437957668304 and parameters: {'seed': 4165, 'learning_rate': 3.230649777601528e-05, 'lora_r': 16, 'lora_alpha': 26}. Best is trial 1 with value: 9.44226360039711.


Eval Loss: 11.3350, Time: 4.28s, Scalarized: 11.3354
Best Hyperparameters: {'seed': 2287, 'learning_rate': 2.6406374650900043e-05, 'lora_r': 8, 'lora_alpha': 27}
Best Validation Loss: 9.44226360039711


In [16]:
model.save_pretrained("best_model_lora")
tokenizer.save_pretrained("best_model_lora")

('best_model_lora/tokenizer_config.json',
 'best_model_lora/special_tokens_map.json',
 'best_model_lora/spiece.model',
 'best_model_lora/added_tokens.json')

In [17]:
import shutil
shutil.make_archive('best_model_lora', 'zip', 'best_model_lora')

'/content/best_model_lora.zip'

In [2]:
from google.colab import files
files.download('best_model_lora.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Install required packages
!pip install optuna transformers peft datasets accelerate --quiet
!pip install sentencepiece --quiet

In [3]:
# Install required packages
!pip install optuna transformers peft datasets accelerate --quiet
!pip install sentencepiece --quiet

from transformers import T5Tokenizer, T5ForConditionalGeneration
from peft import PeftModel, PeftConfig

# Load base model and tokenizer
base_model = T5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("best_model_lora")

# Load fine-tuned LoRA adapter
model = PeftModel.from_pretrained(base_model, "best_model_lora")
model.eval()

# Example input
text = "summarize: The US economy is facing challenges due to inflation and rate hikes."
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)

# Generate summary
outputs = model.generate(**inputs, max_length=50, num_beams=4, early_stopping=True)
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("📝 Summary:", summary)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


📝 Summary: the economy is facing challenges due to inflation and rate hikes.
