# Fine-tune Google FLAN-T5 Small with LoRA

This notebook logs into Hugging Face, loads the `google/flan-t5-small` model for basic inference, and then fine-tunes it on a local JSON file of question–answer pairs using LoRA via the PEFT library.

In [None]:
%pip install -U --quiet transformers datasets peft accelerate huggingface_hub


[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
# from huggingface_hub import notebook_login

# Log into Hugging Face (follow the popup in the notebook)
# notebook_login()
from huggingface_hub import login


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model_name = 'google/flan-t5-small'

model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

print('Model and tokenizer loaded.')

Model and tokenizer loaded.


In [3]:
input_text = 'Roger Federer year end world number one'
inputs = tokenizer(input_text, return_tensors='pt')
outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Roger Federer year end world number one


In [4]:
from datasets import load_dataset

# Assumes `qa_data.json` exists in the same directory and contains a list of {"question": ..., "answer": ...}
dataset = load_dataset('json', data_files='Federer_dataset.json', split='train')
print(dataset[0])

{'question': 'When was Roger Federer born?', 'answer': 'August 8, 1981'}


In [5]:
def preprocess(example):
    input_text = example['question']
    target_text = example['answer']
    model_inputs = tokenizer(input_text, truncation=True, padding='max_length', max_length=128)
    labels = tokenizer(target_text, truncation=True, padding='max_length', max_length=128)
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

processed_dataset = dataset.map(preprocess, remove_columns=dataset.column_names, batched=False)

In [38]:
from peft import LoraConfig, get_peft_model, TaskType
from transformers import GenerationConfig

lora_config = LoraConfig(
    r=64,
    lora_alpha=32,
    target_modules=['q', 'v'],
    lora_dropout=0.1,
    bias='none',
    task_type=TaskType.SEQ_2_SEQ_LM,
)




gen_config = GenerationConfig.from_pretrained("google/flan-t5-small")



peft_model = get_peft_model(model, lora_config)
peft_model.generation_config = gen_config
peft_model.print_trainable_parameters()


trainable params: 2,752,512 || all params: 79,713,664 || trainable%: 3.4530




In [39]:
from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainer, TrainingArguments

training_args = TrainingArguments(
    output_dir='flan-t5-small-lora',
    per_device_train_batch_size=8,
    num_train_epochs=10,
    learning_rate=1e-4,
    logging_steps=10,
    save_strategy='no',
    fp16=False,
    label_names=["labels"], 
)

training_args.generation_config = gen_config
collator = DataCollatorForSeq2Seq(tokenizer, model=model)

trainer = Seq2SeqTrainer(
    model=peft_model,
    args=training_args,
    train_dataset=processed_dataset,
    data_collator=collator,
)

trainer.train()



Step,Training Loss
10,43.4114
20,43.2857


TrainOutput(global_step=20, training_loss=43.348565673828126, metrics={'train_runtime': 68.0455, 'train_samples_per_second': 2.351, 'train_steps_per_second': 0.294, 'total_flos': 7773848862720.0, 'train_loss': 43.348565673828126, 'epoch': 10.0})

In [40]:
peft_model.save_pretrained('flan_t5_small_lora_adapter')
print('LoRA adapter saved to flan_t5_small_lora_adapter')

LoRA adapter saved to flan_t5_small_lora_adapter


In [41]:
from peft import PeftModel

base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
lora_model = PeftModel.from_pretrained(base_model, 'flan_t5_small_lora_adapter')

input_text = "When was the Open Era started?"
inputs = tokenizer(input_text, return_tensors='pt')
outputs = lora_model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

1897


In [None]:
# from huggingface_hub import HfApi
# api = HfApi()
# api.upload_folder(
#     repo_id='username/flan-t5-small-lora-adapter',
#     folder_path='flan_t5_small_lora_adapter',
#     repo_type='model',
#     commit_message='Add LoRA adapter'
# )

## FULL FINE TUNING

In [7]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig

# Load base model & tokenizer
model_name = "google/flan-t5-small"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load generation config
gen_config = GenerationConfig.from_pretrained(model_name)
model.generation_config = gen_config

# Make sure all weights are trainable
model.train()

# Print total trainable parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"Trainable params: {trainable_params:,} / {total_params:,} "
      f"({trainable_params / total_params:.2%})")


Trainable params: 76,961,152 / 76,961,152 (100.00%)


In [9]:
from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainer, TrainingArguments

# Training configuration
training_args = TrainingArguments(
    output_dir="flan_t5_small_finetuned",
    per_device_train_batch_size=8,
    num_train_epochs=20,              # Adjust as needed
    learning_rate=1e-2,
    logging_steps=2,
    save_strategy="epoch",
    fp16=False,                       # Set True if GPU supports
    label_names=["labels"],           # Needed for Seq2SeqTrainer
)

# Data collator
collator = DataCollatorForSeq2Seq(tokenizer, model=model)
training_args.generation_config = gen_config
# Trainer
trainer = Seq2SeqTrainer(
    model=model,                      # 🔥 Use full model (no adapters)
    args=training_args,
    train_dataset=processed_dataset,
    data_collator=collator,
    tokenizer=tokenizer,
)

# Start training
trainer.train()


  trainer = Seq2SeqTrainer(
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss
2,31.0861
4,5.0783
6,2.1611
8,0.6109
10,0.3254
12,1.0067
14,0.3666
16,0.1583
18,0.1497
20,0.0931




TrainOutput(global_step=40, training_loss=2.07377796350047, metrics={'train_runtime': 918.5218, 'train_samples_per_second': 0.348, 'train_steps_per_second': 0.044, 'total_flos': 14871240376320.0, 'train_loss': 2.07377796350047, 'epoch': 20.0})

In [10]:
# Save fine-tuned model and tokenizer
model.save_pretrained("flan_t5_small_finetuned")
tokenizer.save_pretrained("flan_t5_small_finetuned")

print("🎉 Full fine-tuned model saved to 'flan_t5_small_finetuned'")


🎉 Full fine-tuned model saved to 'flan_t5_small_finetuned'


In [14]:


# Load the fine-tuned model
finetuned_model = AutoModelForSeq2SeqLM.from_pretrained("flan_t5_small_finetuned")
finetuned_tokenizer = AutoTokenizer.from_pretrained("flan_t5_small_finetuned")

# Example inference
input_text = "Bruh"
inputs = finetuned_tokenizer(input_text, return_tensors='pt')
outputs = finetuned_model.generate(**inputs, max_new_tokens=50)
print(finetuned_tokenizer.decode(outputs[0], skip_special_tokens=True))


5 times
