1. installing libraries

In [None]:
!pip install datasets transformers
import numpy as np


2. Loading dataset

In [None]:
from datasets import load_dataset

# Load XSum dataset
dataset = load_dataset("xsum")

3. EDA OF xSum dataset


In [None]:
print(dataset)


In [None]:
sample = dataset["train"][0]

print("DOCUMENT:\n", sample["document"])
print("\nSUMMARY:\n", sample["summary"])


In [None]:
train_data = dataset["train"].select(range(1000))
val_data = dataset["validation"].select(range(100))
test_data = dataset['test'].select(range(1000))

In [None]:

def word_count(text):
    return len(text.split())

doc_lengths = [
    word_count(sample["document"])
    for sample in dataset["train"].select(range(5000))
]

summary_lengths = [
    word_count(sample["summary"])
    for sample in dataset["train"].select(range(5000))
]

print("Document length stats (words):")
print("Min:", min(doc_lengths))
print("Mean:", int(np.mean(doc_lengths)))
print("Max:", max(doc_lengths))

print("\nSummary length stats (words):")
print("Min:", min(summary_lengths))
print("Mean:", int(np.mean(summary_lengths)))
print("Max:", max(summary_lengths))


In [None]:
doc_lengths = [word_count(sample["document"]) for sample in train_data]
p50 = np.percentile(doc_lengths, 50)
p95 = np.percentile(doc_lengths, 95)
p99 = np.percentile(doc_lengths, 99)

print(f"50th percentile (median): {int(p50)} words")
print(f"95th percentile: {int(p95)} words")
print(f"99th percentile: {int(p99)} words")


Based on percentile analysis, 99% of XSum news articles are under ~1,300 words, which after tokenization largely fit within the 1024-token context window of BART-Large-CNN with minimal truncation.
BART-Large-CNN is an encoder–decoder (seq2seq) model, specifically designed for generative tasks like abstractive summarization, making it well-suited for long news articles compared to encoder-only models.

3. Loading the model, making baseline, then fintuning it using lora and slora finetuning methods

In [47]:
pip install transformers datasets peft accelerate evaluate rouge-score nltk




In [48]:
from transformers import BartForConditionalGeneration, BartTokenizer

model_name = "facebook/bart-large-cnn"

tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)


Baseline inference (before fine-tuning)

In [49]:
def summarize(text, model, tokenizer, max_length=60):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        max_length=1024
    )

    summary_ids = model.generate(
        inputs["input_ids"],
        num_beams=4,
        max_length=max_length
    )

    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)


tokenization for training

In [50]:
def preprocess(batch):
    inputs = tokenizer(
        batch["document"],
        truncation=True,
        padding="max_length",
        max_length=1024
    )
    outputs = tokenizer(
        batch["summary"],
        truncation=True,
        padding="max_length",
        max_length=64
    )

    inputs["labels"] = outputs["input_ids"]
    return inputs

tokenized_train = train_data.map(preprocess, batched=True)
tokenized_val   = val_data.map(preprocess, batched=True)


Baseline accuracy

In [51]:
import torch
import evaluate
import nltk
nltk.download("punkt")

# Load ROUGE metric
rouge = evaluate.load("rouge")

# Put model in eval mode
model.eval()

# If GPU available (recommended)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

def generate_summary_baseline(text):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        max_length=1024
    ).to(device)

    with torch.no_grad():
        summary_ids = model.generate(
            inputs["input_ids"],
            num_beams=4,
            max_length=60
        )

    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [52]:
predictions = []
references = []

# Use a subset for baseline
test_subset = test_data.select(range(100))

for sample in test_subset:
    pred = generate_summary_baseline(sample["document"])
    predictions.append(pred)
    references.append(sample["summary"])
baseline_scores = rouge.compute(
    predictions=predictions,
    references=references
)

print("Baseline ROUGE scores:")
for k, v in baseline_scores.items():
    print(f"{k}: {v:.4f}")


KeyboardInterrupt: 

the pretrained BART-Large-CNN model was first evaluated on the XSum test set without fine-tuning to establish a baseline. Due to domain differences between CNN/DailyMail and XSum, the zero-shot ROUGE scores were modest (ROUGE-1 ≈ 0.20), highlighting the need for task-specific adaptation through fine-tuning.

In [None]:
!zip -r bart_lora_merged.zip bart_lora_merged
from google.colab import files
files.download("bart_lora_merged.zip")



  adding: bart_lora_merged/ (stored 0%)
  adding: bart_lora_merged/special_tokens_map.json (deflated 85%)
  adding: bart_lora_merged/merges.txt (deflated 53%)
  adding: bart_lora_merged/generation_config.json (deflated 46%)
  adding: bart_lora_merged/tokenizer_config.json (deflated 75%)
  adding: bart_lora_merged/config.json (deflated 62%)
  adding: bart_lora_merged/model.safetensors

In [None]:
!ps aux | grep zip



Apply LoRA

In [None]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_2_SEQ_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./bart_lora_xsum",
    eval_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=1,
    fp16=True,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    logging_steps=100,
    save_total_limit=2,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer
)

trainer.train()


In [None]:
best_model_path = "./bart_lora_xsum/best_model"
trainer.model.save_pretrained(best_model_path)
tokenizer.save_pretrained(best_model_path)


In [None]:
merged_model = model.merge_and_unload()
merged_model.save_pretrained("bart_lora_merged")
tokenizer.save_pretrained("bart_lora_merged")


RS-LoRA (Rank-Stabilized LoRA)

In [None]:
from peft import LoraConfig, get_peft_model

model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")

rslora_config = LoraConfig(
    r=8,
    lora_alpha=8,       # alpha == r → RS-LoRA
    use_rslora=True,
    target_modules=["q_proj", "v_proj"],
    task_type="SEQ_2_SEQ_LM"
)

model = get_peft_model(model, rslora_config)
model.print_trainable_parameters()


In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer
)

trainer.train()


In [None]:
trainer.model.save_pretrained("./bart_rslora_xsum")
tokenizer.save_pretrained("./bart_rslora_xsum")
