In [None]:
# Install Required Libraries

!pip install -q transformers datasets peft accelerate

In [None]:
# Load Dataset

from datasets import load_dataset

dataset = load_dataset("cnn_dailymail", "3.0.0")

In [None]:
# Load Model & Tokenizer

from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model_name = "facebook/bart-large-cnn"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)


In [None]:
# Apply LoRA using PEFT  - Only LoRA adapter layers will be trained, not the full model — making training 10x lighter!


from peft import LoraConfig, get_peft_model, TaskType

# Define LoRA config
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # works for BART/T5
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

# Inject LoRA layers into the model
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


In [None]:
# Preprocessing Function

def preprocess(example):
    inputs = tokenizer(example["article"], max_length=512, truncation=True, padding="max_length")
    targets = tokenizer(example["highlights"], max_length=128, truncation=True, padding="max_length")
    inputs["labels"] = targets["input_ids"]
    return inputs

tokenized = dataset.map(preprocess, batched=True, remove_columns=dataset["train"].column_names)


In [None]:
# Define Training Arguments

from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./bart-lora",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    evaluation_strategy="epoch",
    learning_rate=2e-4,
    num_train_epochs=1,
    weight_decay=0.01,
    logging_dir="./logs",
    report_to="none"
)

In [None]:
# Train with Trainer

from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["validation"],
    tokenizer=tokenizer
)

trainer.train()

In [None]:
# Save Fine-Tuned LoRA Model

model.save_pretrained("bart-lora-summary")
tokenizer.save_pretrained("bart-lora-summary")