In [7]:
#  Install Only Missing Dependencies
!pip install -q peft rouge-score
!pip install -q evaluate


# STEP 2: Import Libraries
import torch
from transformers import (
    AutoModelForSeq2SeqLM, AutoTokenizer, DataCollatorForSeq2Seq,
    TrainingArguments, Trainer
)
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
import evaluate
import shutil

# STEP 3: Load Base Model + Tokenizer
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

#  STEP 4: Apply LoRA Config
lora_config = LoraConfig(
    r=16,                      
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.1,          
    bias="none",
    task_type="SEQ_2_SEQ_LM"
)
model = get_peft_model(model, lora_config)

#  STEP 5: Load Dataset (subset for demo)
dataset = load_dataset("cnn_dailymail", "3.0.0", split="train[:50000]")

#  STEP 6: Preprocess Dataset
def preprocess(examples):
    inputs = examples["article"]
    targets = examples["highlights"]
    model_inputs = tokenizer(
        inputs, max_length=512, truncation=True
    )
    labels = tokenizer(
        targets, max_length=150, truncation=True
    )
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_dataset = dataset.map(
    preprocess, batched=True, remove_columns=["article", "highlights", "id"]
)

#  STEP 7: Split train/test
split_dataset = tokenized_dataset.train_test_split(test_size=0.1)
train_ds = split_dataset['train']
eval_ds = split_dataset['test']

#  STEP 8: Setup Data Collator
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

#  STEP 9: Training Arguments
training_args = TrainingArguments(
    output_dir="/kaggle/working/results",
    eval_strategy="steps",     
    eval_steps=500,                   
    logging_steps=50,
    learning_rate=3e-4,                
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=4,                
    weight_decay=0.01,
    save_total_limit=1,
    fp16=True,                         
    save_strategy="epoch",
    logging_dir="/kaggle/working/logs",
    report_to="none"                   
)

#  STEP 10: Trainer Setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=eval_ds,
    tokenizer=tokenizer,
    data_collator=data_collator
)

#  STEP 11: Train Model
trainer.train()

#  STEP 12: Save Fine-Tuned Model + Tokenizer
save_path = "/kaggle/working/finetuned_model"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

print("✅ Model fine-tuned and saved to:", save_path)

#  STEP 13: Zip model for download
zip_path = "/kaggle/working/finetuned_model.zip"
shutil.make_archive("/kaggle/working/finetuned_model", 'zip', save_path)
print("✅ Model zipped successfully!")


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  trainer = Trainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss,Validation Loss
500,0.0,
1000,0.0,
1500,0.0,
2000,0.0,
2500,0.0,
3000,0.0,
3500,0.0,
4000,0.0,
4500,0.0,
5000,0.0,


✅ Model fine-tuned and saved to: /kaggle/working/finetuned_model
✅ Model zipped successfully!


In [8]:
from IPython.display import FileLink

# Link to download the zipped model
zip_path = "/kaggle/working/finetuned_model.zip"
FileLink(zip_path)
