<a href="https://colab.research.google.com/github/Prianka-Mukhopadhyay/bart_distilbart_pegasus_t5_text_summarization/blob/main/bart_distilbart_pegasus_t5_summarization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#installing everything necessary
!pip install transformers torch gradio


Importing BART

In [None]:
from transformers import BartTokenizer, BartForConditionalGeneration

# Load the tokenizer
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')

# Load the model
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')


In [None]:
# def summarize_text(text):
#     # Encode the text into tokens
#     inputs = tokenizer([text], max_length=1024, return_tensors='pt', truncation=True)

#     # Generate summary IDs
#     summary_ids = model.generate(
#         inputs['input_ids'],
#         max_length=150,
#         min_length=40,
#         length_penalty=2.0,
#         num_beams=4,
#         early_stopping=True
#     )

#     # Decode the generated IDs back into text
#     summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
#     return summary


In [None]:
def summarize_text(text):
    # Encode the text into tokens
    inputs = tokenizer([text], max_length=1024, return_tensors='pt', truncation=True)

    # Generate summary IDs
    summary_ids = model.generate(
      inputs['input_ids'],
      max_length=100,   # shorter summaries
      min_length=20,
      length_penalty=1.0,
      num_beams=6,      # higher beam search for better results
      early_stopping=True
    )

    # Decode the generated IDs back into text
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary


In [None]:
sample_text = """
My favorite book in the Harry Potter series is "Harry Potter and the Prisoner of Azkaban". It's a pivotal book in the series, introducing darker themes and complex characters while still maintaining the whimsical charm of the earlier books. The introduction of Sirius Black and Remus Lupin, along with the concept of dementors, adds depth to the wizarding world and Harry's personal journey. The intricate plot, with its time-travel elements and moral ambiguities, keeps the reader guessing until the very end. It's a book that truly showcases J.K. Rowling's storytelling prowess and her ability to blend fantasy with relatable human emotions
"""
print(summarize_text(sample_text))

Gradio

In [None]:
!pip install gradio --quiet
import gradio as gr


In [None]:
# Define Gradio interface
interface = gr.Interface(
    fn=summarize_text,           # Function to call
    inputs=gr.Textbox(lines=10, placeholder="Paste text here..."),  # Input box
    outputs="text",              # Output is plain text
    title="BART Text Summarizer",
    description="Enter text and get a summary using Hugging Face BART model."
)

# Launch the app
interface.launch()


**Load DistilBART and Tokenizer**

In [None]:
from transformers import BartTokenizer, BartForConditionalGeneration

# DistilBART is a smaller, faster version of BART.
# Model: sshleifer/distilbart-cnn-12-6
# Tokenizer converts text into numerical tokens the model can understand.

distilbart_model_name = "sshleifer/distilbart-cnn-12-6"
distilbart_tokenizer = BartTokenizer.from_pretrained(distilbart_model_name)
distilbart_model = BartForConditionalGeneration.from_pretrained(distilbart_model_name)

**Generate Summary with DistilBART**

In [None]:
def summarize_distilbart(text):
    # Encode the input text into tokens
    inputs = distilbart_tokenizer([text], max_length=1024, return_tensors='pt', truncation=True)

    # Generate summary IDs with beam search
    summary_ids = distilbart_model.generate(
        inputs['input_ids'],
        max_length=150,        # max length of summary
        min_length=40,         # minimum length
        length_penalty=2.0,    # encourages more concise summary
        num_beams=4,           # beam search size for better quality
        early_stopping=True    # stop generation once complete
    )

    # Decode generated tokens back into text
    summary = distilbart_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

In [None]:
# =========================
# Step 3: Test DistilBART Summary
# =========================

sample_text = """
My favorite book in the Harry Potter series is "Harry Potter and the Prisoner of Azkaban". It's a pivotal book in the series, introducing darker themes and complex characters while still maintaining the whimsical charm of the earlier books. The introduction of Sirius Black and Remus Lupin, along with the concept of dementors, adds depth to the wizarding world and Harry's personal journey. The intricate plot, with its time-travel elements and moral ambiguities, keeps the reader guessing until the very end. It's a book that truly showcases J.K. Rowling's storytelling prowess and her ability to blend fantasy with relatable human emotions
"""

# Generate summary
distilbart_summary = summarize_distilbart(sample_text)

# Print output
print("=== DistilBART Summary ===")
print(distilbart_summary)


In [None]:
# =========================
# Step 1: Load Pegasus Model and Tokenizer
# =========================

from transformers import PegasusTokenizer, PegasusForConditionalGeneration

# Model trained for summarization tasks
pegasus_model_name = "google/pegasus-cnn_dailymail"
pegasus_tokenizer = PegasusTokenizer.from_pretrained(pegasus_model_name)
pegasus_model = PegasusForConditionalGeneration.from_pretrained(pegasus_model_name)


In [None]:
# =========================
# Step 2: Summarize Text with Pegasus
# =========================

def summarize_pegasus(text):
    # Encode the input text into tokens
    inputs = pegasus_tokenizer([text], max_length=1024, return_tensors='pt', truncation=True)

    # Generate summary IDs
    summary_ids = pegasus_model.generate(
        inputs['input_ids'],
        max_length=150,        # max length of summary
        min_length=40,         # min length of summary
        length_penalty=2.0,    # controls length
        num_beams=4,           # beam search for better quality
        early_stopping=True
    )

    # Decode generated IDs back into text
    summary = pegasus_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary


In [None]:
# =========================
# Step 3: Test Pegasus Summary
# =========================

sample_text = """
My favorite book in the Harry Potter series is "Harry Potter and the Prisoner of Azkaban". It's a pivotal book in the series, introducing darker themes and complex characters while still maintaining the whimsical charm of the earlier books. The introduction of Sirius Black and Remus Lupin, along with the concept of dementors, adds depth to the wizarding world and Harry's personal journey. The intricate plot, with its time-travel elements and moral ambiguities, keeps the reader guessing until the very end. It's a book that truly showcases J.K. Rowling's storytelling prowess and her ability to blend fantasy with relatable human emotions
"""

pegasus_summary = summarize_pegasus(sample_text)

print("=== Pegasus Summary ===")
print(pegasus_summary)


In [None]:
# =========================
# Step 1: Load T5 Model and Tokenizer
# =========================
# T5 = "Text-to-Text Transfer Transformer"
# For summarization, T5 expects the input to start with the prefix: "summarize: "

from transformers import T5Tokenizer, T5ForConditionalGeneration

t5_model_name = "t5-base"   # alternatives: "t5-small" (faster), "t5-large" (better but heavier)
t5_tokenizer = T5Tokenizer.from_pretrained(t5_model_name)
t5_model = T5ForConditionalGeneration.from_pretrained(t5_model_name)


In [None]:
# =========================
# Step 2: Summarize Text with T5
# =========================
# Key differences vs BART/Pegasus:
# - We prepend "summarize: " to the input text (task instruction).
# - T5-base typically accepts up to 512 tokens as input (vs ~1024 for BART/Pegasus).

def summarize_t5(text):
    # Add the task prefix so T5 knows we're summarizing
    prefixed_text = "summarize: " + text.strip()

    # Tokenize/encode; truncate to fit T5's input limits
    inputs = t5_tokenizer(
        [prefixed_text],
        max_length=512,         # T5-base input limit is usually 512 tokens
        return_tensors='pt',
        truncation=True
    )

    # Generate the summary token IDs
    output_ids = t5_model.generate(
        inputs['input_ids'],
        max_length=150,         # cap the summary length
        min_length=30,          # encourage non-trivial summaries
        length_penalty=1.0,     # neutral length penalty for T5
        num_beams=4,            # beam search for better quality
        early_stopping=True
    )

    # Decode the tokens back to text
    summary = t5_tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return summary


In [None]:
# =========================
# Step 3: Test T5 Summary
# =========================

sample_text = """
My favorite book in the Harry Potter series is "Harry Potter and the Prisoner of Azkaban". It's a pivotal book in the series, introducing darker themes and complex characters while still maintaining the whimsical charm of the earlier books. The introduction of Sirius Black and Remus Lupin, along with the concept of dementors, adds depth to the wizarding world and Harry's personal journey. The intricate plot, with its time-travel elements and moral ambiguities, keeps the reader guessing until the very end. It's a book that truly showcases J.K. Rowling's storytelling prowess and her ability to blend fantasy with relatable human emotions
"""

t5_summary = summarize_t5(sample_text)

print("=== T5 Summary ===")
print(t5_summary)


In [None]:
# =========================
# Step 1: Compare All 4 Models Side by Side
# =========================

def compare_models(text):
    print("=== Input Text ===")
    print(text)
    print("\n")

    # BART
    bart_summary = summarize_text(text)
    print("=== BART Summary ===")
    print(bart_summary)
    print("\n")

    # DistilBART
    distil_summary = summarize_distilbart(text)
    print("=== DistilBART Summary ===")
    print(distil_summary)
    print("\n")

    # Pegasus
    pegasus_summary = summarize_pegasus(text)
    print("=== Pegasus Summary ===")
    print(pegasus_summary)
    print("\n")

    # T5
    t5_summary = summarize_t5(text)
    print("=== T5 Summary ===")
    print(t5_summary)
    print("\n")

    return {
        "BART": bart_summary,
        "DistilBART": distil_summary,
        "Pegasus": pegasus_summary,
        "T5": t5_summary
    }

# Example run
sample_text = """
My favorite book in the Harry Potter series is "Harry Potter and the Prisoner of Azkaban". It's a pivotal book in the series, introducing darker themes and complex characters while still maintaining the whimsical charm of the earlier books. The introduction of Sirius Black and Remus Lupin, along with the concept of dementors, adds depth to the wizarding world and Harry's personal journey. The intricate plot, with its time-travel elements and moral ambiguities, keeps the reader guessing until the very end. It's a book that truly showcases J.K. Rowling's storytelling prowess and her ability to blend fantasy with relatable human emotions
"""
results = compare_models(sample_text)


In [None]:
# =========================
# Step 2a: Install Evaluation Library
# =========================
!pip install evaluate


In [None]:
# =========================
# Step 2b: Evaluate ROUGE for One Example
# =========================
!pip install rouge_score
import evaluate
rouge = evaluate.load("rouge")

# Suppose we have a reference (true summary) and a model prediction
reference = "Hugging Face provides pretrained models for NLP tasks like classification, QA, and summarization."
prediction = results["BART"]   # for example, compare BART’s output

# Compute ROUGE
scores = rouge.compute(predictions=[prediction], references=[reference])
print(scores)


In [None]:
# =========================
# Step 3: Compute ROUGE Scores for All Models
# =========================

import evaluate
rouge = evaluate.load("rouge")

def evaluate_models(text, reference):
    # Run all models
    outputs = compare_models(text)

    scores = {}
    for model_name, summary in outputs.items():
        # Compute ROUGE for each model vs reference
        result = rouge.compute(predictions=[summary], references=[reference])
        scores[model_name] = result

    return scores

# Example reference summary (you can write this yourself, or take it from a dataset)
reference_summary = "Hugging Face provides pretrained models for NLP tasks like classification, question answering, translation, and summarization."

# Run evaluation
scores = evaluate_models(sample_text, reference_summary)

# Print results
import pprint
pprint.pprint(scores)


In [None]:
# =========================
# Step 4: Visualize ROUGE Scores
# =========================

import matplotlib.pyplot as plt
import numpy as np

# Convert scores dictionary into a plottable format
models = list(scores.keys())   # ["BART", "DistilBART", "Pegasus", "T5"]

# Extract values for each metric
rouge1 = [float(scores[m]['rouge1']) for m in models]
rouge2 = [float(scores[m]['rouge2']) for m in models]
rougeL = [float(scores[m]['rougeL']) for m in models]

x = np.arange(len(models))  # positions for bars
width = 0.25  # width of each bar

# Create bar chart
plt.figure(figsize=(10,6))
plt.bar(x - width, rouge1, width, label='ROUGE-1')
plt.bar(x, rouge2, width, label='ROUGE-2')
plt.bar(x + width, rougeL, width, label='ROUGE-L')

# Add labels & legend
plt.xticks(x, models)
plt.ylabel("Score")
plt.title("ROUGE Scores Comparison Across Models")
plt.legend()
plt.show()


Fine tuning the best performing model

In [None]:
# ======================
# Step 5.1: Install dependencies
# ======================
!pip install datasets rouge_score transformers accelerate -q


In [None]:
# ======================
# Step 5.2: Load dataset
# ======================
from datasets import load_dataset

# Load small subset to keep training light
dataset = load_dataset("cnn_dailymail", "3.0.0", split="train[:1%]")  # only 1% for demo
valid_dataset = load_dataset("cnn_dailymail", "3.0.0", split="validation[:1%]")

print(dataset[0])


In [None]:
# ======================
# Step 5.3: Tokenization
# ======================
from transformers import PegasusTokenizer

model_name = "google/pegasus-cnn_dailymail"
tokenizer = PegasusTokenizer.from_pretrained(model_name)

def preprocess(example):
    inputs = tokenizer(
        example["article"], truncation=True, padding="max_length", max_length=1024
    )
    labels = tokenizer(
        example["highlights"], truncation=True, padding="max_length", max_length=128
    )
    inputs["labels"] = labels["input_ids"]
    return inputs

tokenized_train = dataset.map(preprocess, batched=True)
tokenized_valid = valid_dataset.map(preprocess, batched=True)


In [None]:
# =========================
# Step 1 (fixed): Multi-Model Summarizer with 4 models
# =========================

import gc
import torch

from transformers import (
    BartTokenizer, BartForConditionalGeneration,
    PegasusTokenizer, PegasusForConditionalGeneration,
    T5Tokenizer, T5ForConditionalGeneration,
)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

MODEL_KEYS = [
    "BART (facebook/bart-large-cnn)",
    "DistilBART (sshleifer/distilbart-cnn-12-6)",
    "Pegasus (google/pegasus-xsum)",
    "T5 (t5-base)"
]

MODEL_CONFIGS = {
    "BART (facebook/bart-large-cnn)": {
        "name": "facebook/bart-large-cnn",
        "tok_cls": BartTokenizer,
        "mdl_cls": BartForConditionalGeneration,
        "prefix": "",
        "max_input": 1024
    },
    "DistilBART (sshleifer/distilbart-cnn-12-6)": {
        "name": "sshleifer/distilbart-cnn-12-6",
        "tok_cls": BartTokenizer,
        "mdl_cls": BartForConditionalGeneration,
        "prefix": "",
        "max_input": 1024
    },
    "Pegasus (google/pegasus-xsum)": {
        "name": "google/pegasus-xsum",
        "tok_cls": PegasusTokenizer,
        "mdl_cls": PegasusForConditionalGeneration,
        "prefix": "",
        "max_input": 1024
    },
    "T5 (t5-base)": {
        "name": "t5-base",
        "tok_cls": T5Tokenizer,
        "mdl_cls": T5ForConditionalGeneration,
        "prefix": "summarize: ",  # T5 needs explicit instruction
        "max_input": 512
    }
}

_LOADED = {}

def get_model(model_key):
    if model_key in _LOADED:
        return _LOADED[model_key]
    cfg = MODEL_CONFIGS[model_key]
    tok = cfg["tok_cls"].from_pretrained(cfg["name"])
    mdl = cfg["mdl_cls"].from_pretrained(cfg["name"])
    mdl.to(DEVICE)
    mdl.eval()
    _LOADED[model_key] = (tok, mdl)
    return tok, mdl

@torch.inference_mode()
def summarize_with(model_key, text, max_length=150, min_length=30, num_beams=4, length_penalty=1.0):
    cfg = MODEL_CONFIGS[model_key]
    tok, mdl = get_model(model_key)
    prefixed = (cfg.get("prefix","") + text.strip())
    enc = tok([prefixed], max_length=cfg["max_input"], truncation=True, return_tensors="pt")
    enc = {k: v.to(DEVICE) for k, v in enc.items()}
    out_ids = mdl.generate(
        enc["input_ids"],
        max_length=int(max_length),
        min_length=int(min_length),
        num_beams=int(num_beams),
        length_penalty=float(length_penalty),
        early_stopping=True
    )
    return tok.decode(out_ids[0], skip_special_tokens=True)

def summarize_all(text, max_length=150, min_length=30, num_beams=4, length_penalty=1.0, memory_efficient=True):
    outputs = {}
    for key in MODEL_KEYS:
        outputs[key] = summarize_with(key, text, max_length, min_length, num_beams, length_penalty)
        if memory_efficient and key in _LOADED:
            del _LOADED[key]
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
    return outputs


In [None]:
# =========================
# Step 2 (fixed): Gradio UI with 4 models
# =========================

!pip install gradio evaluate --quiet

import gradio as gr
import evaluate

rouge = evaluate.load("rouge")

def compare_and_score(text, reference, max_length, min_length, num_beams, length_penalty):
    if not text or not text.strip():
        empty = [""] * len(MODEL_KEYS)
        return (*empty, {})
    outputs = summarize_all(
        text,
        max_length=max_length,
        min_length=min_length,
        num_beams=num_beams,
        length_penalty=length_penalty,
        memory_efficient=True
    )
    summaries_in_order = [outputs[k] for k in MODEL_KEYS]

    if reference and reference.strip():
        scores = {}
        for k in MODEL_KEYS:
            res = rouge.compute(predictions=[outputs[k]], references=[reference])
            scores[k] = {m: float(v) for m, v in res.items()}
    else:
        scores = {}
    return (*summaries_in_order, scores)

with gr.Blocks(title="Multi-Model Text Summarizer") as demo:
    gr.Markdown("## 📝 Compare 4 Summarization Models (BART, DistilBART, Pegasus, T5)")
    text_in = gr.Textbox(lines=10, label="Input Text", placeholder="Paste an article here...")

    with gr.Accordion("Advanced Settings", open=False):
        max_len = gr.Slider(40, 300, value=150, step=10, label="max_length")
        min_len = gr.Slider(10, 150, value=30, step=5, label="min_length")
        beams = gr.Slider(1, 8, value=4, step=1, label="num_beams")
        lpen = gr.Slider(0.1, 2.5, value=1.0, step=0.1, label="length_penalty")

    ref_in = gr.Textbox(lines=3, label="(Optional) Reference Summary for ROUGE")

    run_btn = gr.Button("Compare Models")

    out_bart       = gr.Textbox(label=MODEL_KEYS[0])
    out_distilbart = gr.Textbox(label=MODEL_KEYS[1])
    out_pegasus    = gr.Textbox(label=MODEL_KEYS[2])
    out_t5         = gr.Textbox(label=MODEL_KEYS[3])
    metrics_json   = gr.JSON(label="ROUGE Scores")

    run_btn.click(
        fn=compare_and_score,
        inputs=[text_in, ref_in, max_len, min_len, beams, lpen],
        outputs=[out_bart, out_distilbart, out_pegasus, out_t5, metrics_json]
    )

demo.launch()


In [None]:
# # ======================
# # Step 5.4: Fine-tune model (with W&B disabled)
# # ======================
# import os
# os.environ["WANDB_DISABLED"] = "true"

# from transformers import PegasusForConditionalGeneration, Trainer, TrainingArguments

# model = PegasusForConditionalGeneration.from_pretrained(model_name)

# training_args = TrainingArguments(
#     output_dir="./results",
#     eval_strategy="epoch",
#     learning_rate=2e-5,
#     per_device_train_batch_size=2,
#     per_device_eval_batch_size=2,
#     num_train_epochs=1,  # keep small for demo
#     weight_decay=0.01,
#     save_total_limit=1,
#     logging_dir="./logs",
#     logging_steps=10,
#     push_to_hub=False,
#     report_to="none"  # also disables other loggers
# )

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=tokenized_train,
#     eval_dataset=tokenized_valid,
# )

# trainer.train()


In [None]:
# Write the requirements.txt file
with open("requirements.txt", "w") as f:
    f.write("transformers>=4.30.0\n")
    f.write("torch\n")
    f.write("gradio\n")
    f.write("evaluate\n")


In [None]:
# readme_text = """
# ---
# title: Multi-Model Text Summarizer
# emoji: 📝
# colorFrom: blue
# colorTo: green
# sdk: gradio
# sdk_version: "3.42"
# app_file: app.py
# pinned: true
# ---


# # 📝 Multi-Model Text Summarizer


# This project compares **4 state-of-the-art summarization models** using Hugging Face Transformers and Gradio:

# - **BART (facebook/bart-large-cnn)**
# - **DistilBART (sshleifer/distilbart-cnn-12-6)**
# - **Pegasus (google/pegasus-xsum)**
# - **T5 (t5-base)**

# ## 🚀 Features
# - Input any long text (e.g., articles, reports).
# - Generate summaries from all 4 models.
# - Compare results side by side.
# - (Optional) Provide a reference summary and compute **ROUGE scores**.

# ## 🖥️ Tech Stack
# - Python
# - Hugging Face Transformers
# - PyTorch
# - Gradio (for UI)
# - Evaluate (for metrics)

# ## 📦 Setup
# To run locally:

#     pip install -r requirements.txt
#     python app.py

# ## 🌐 Deployment
# This app can run locally or be deployed on **Hugging Face Spaces**.
# """

# with open("README.md", "w") as f:
#     f.write(readme_text)


In [None]:
readme_text = """
---
title: Multi-Model Text Summarizer
emoji: 📝
colorFrom: blue
colorTo: green
sdk: gradio
app_file: app.py
pinned: true
---

# 📝 Multi-Model Text Summarizer

This project compares **4 state-of-the-art summarization models** using Hugging Face Transformers and Gradio:

- **BART (facebook/bart-large-cnn)**
- **DistilBART (sshleifer/distilbart-cnn-12-6)**
- **Pegasus (google/pegasus-xsum)**
- **T5 (t5-base)**

## 🚀 Features
- Input any long text (e.g., articles, reports).
- Generate summaries from all 4 models.
- Compare results side by side.
- (Optional) Provide a reference summary and compute **ROUGE scores**.

## 🖥️ Tech Stack
- Python
- Hugging Face Transformers
- PyTorch
- Gradio (for UI)
- Evaluate (for metrics)

## 📦 Setup
To run locally:

    pip install -r requirements.txt
    python app.py

## 🌐 Deployment
This app can run locally or be deployed on **Hugging Face Spaces**.
"""

with open("README.md", "w") as f:
    f.write(readme_text)


In [None]:
# ======================
# app.py - Main Gradio UI
# ======================

import gradio as gr
from transformers import BartTokenizer, BartForConditionalGeneration, PegasusForConditionalGeneration, PegasusTokenizer, T5Tokenizer, T5ForConditionalGeneration
import torch

# ----------------------
# Load Models & Tokenizers
# ----------------------
bart_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
bart_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")

distilbart_tokenizer = BartTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
distilbart_model = BartForConditionalGeneration.from_pretrained("sshleifer/distilbart-cnn-12-6")

pegasus_tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
pegasus_model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")

t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")
t5_model = T5ForConditionalGeneration.from_pretrained("t5-base")

# ----------------------
# Helper function for summarization
# ----------------------
def summarize(text, model, tokenizer, prefix=""):
    inputs = tokenizer(
        [prefix + text],
        max_length=1024,
        return_tensors="pt",
        truncation=True
    )
    summary_ids = model.generate(
        inputs["input_ids"],
        max_length=150,
        min_length=40,
        num_beams=4,
        early_stopping=True
    )
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# ----------------------
# Gradio App
# ----------------------
def multi_model_summarizer(text):
    return {
        "BART": summarize(text, bart_model, bart_tokenizer),
        "DistilBART": summarize(text, distilbart_model, distilbart_tokenizer),
        "Pegasus": summarize(text, pegasus_model, pegasus_tokenizer),
        "T5": summarize(text, t5_model, t5_tokenizer, prefix="summarize: ")
    }

demo = gr.Interface(
    fn=multi_model_summarizer,
    inputs=gr.Textbox(lines=8, placeholder="Paste text here..."),
    outputs=[
        gr.Textbox(label="BART Summary"),
        gr.Textbox(label="DistilBART Summary"),
        gr.Textbox(label="Pegasus Summary"),
        gr.Textbox(label="T5 Summary")
    ],
    title="📝 Multi-Model Text Summarizer",
    description="Compare summaries from BART, DistilBART, Pegasus, and T5."
)

if __name__ == "__main__":
    demo.launch()


In [None]:
app_code = """# ======================
# app.py - Main Gradio UI
# ======================

import gradio as gr
from transformers import BartTokenizer, BartForConditionalGeneration, PegasusForConditionalGeneration, PegasusTokenizer, T5Tokenizer, T5ForConditionalGeneration
import torch

# ----------------------
# Load Models & Tokenizers
# ----------------------
bart_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
bart_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")

distilbart_tokenizer = BartTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
distilbart_model = BartForConditionalGeneration.from_pretrained("sshleifer/distilbart-cnn-12-6")

pegasus_tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
pegasus_model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")

t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")
t5_model = T5ForConditionalGeneration.from_pretrained("t5-base")

# ----------------------
# Helper function for summarization
# ----------------------
def summarize(text, model, tokenizer, prefix=""):
    inputs = tokenizer(
        [prefix + text],
        max_length=1024,
        return_tensors="pt",
        truncation=True
    )
    summary_ids = model.generate(
        inputs["input_ids"],
        max_length=150,
        min_length=40,
        num_beams=4,
        early_stopping=True
    )
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# ----------------------
# Gradio App
# ----------------------
def multi_model_summarizer(text):
    return (
        summarize(text, bart_model, bart_tokenizer),
        summarize(text, distilbart_model, distilbart_tokenizer),
        summarize(text, pegasus_model, pegasus_tokenizer),
        summarize(text, t5_model, t5_tokenizer, prefix="summarize: ")
    )

demo = gr.Interface(
    fn=multi_model_summarizer,
    inputs=gr.Textbox(lines=8, placeholder="Paste text here..."),
    outputs=[
        gr.Textbox(label="BART Summary"),
        gr.Textbox(label="DistilBART Summary"),
        gr.Textbox(label="Pegasus Summary"),
        gr.Textbox(label="T5 Summary")
    ],
    title="📝 Multi-Model Text Summarizer",
    description="Compare summaries from BART, DistilBART, Pegasus, and T5."
)

if __name__ == "__main__":
    demo.launch()
"""

# write to file
with open("app.py", "w") as f:
    f.write(app_code)


In [None]:
!pip install huggingface_hub --quiet
from huggingface_hub import login

# This will ask you for your Hugging Face token
login()


In [None]:
!git clone https://huggingface.co/spaces/priankaM/multi-model-text-summarizer


In [None]:
!cp /content/app.py /content/multi-model-text-summarizer/
!cp /content/requirements.txt /content/multi-model-text-summarizer/
!cp /content/README.md /content/multi-model-text-summarizer/


In [None]:
%cd /content/multi-model-text-summarizer/
!git add .
!git commit -m "Initial commit of multi-model summarizer"
!git push


In [None]:
!git config --global user.email "your_email@example.com"
!git config --global user.name "Y"


In [None]:
!git add README.md
!git commit -m "Fix Space metadata YAML"
!git push


In [None]:
%cd /content/multi-model-text-summarizer


In [None]:
!git init


In [None]:
!git config --global user.email "your-email@example.com"
!git config --global user.name "your-username"


In [None]:
!git remote add origin https://huggingface.co/spaces/priankaM/multi-model-text-summarizer


In [None]:
!git add .
!git commit -m "Initial commit: multi-model summarizer placeholders"


In [None]:
!git push origin main


In [None]:
# Save app.py
app_code = """
import gradio as gr
from transformers import BartTokenizer, BartForConditionalGeneration, PegasusForConditionalGeneration, T5ForConditionalGeneration, T5Tokenizer

# Load models and tokenizers here (we won't run them locally, just define)
# ...

def summarize_text(text):
    # Dummy function for Space; actual inference happens in the Space
    return "Summary placeholder"

iface = gr.Interface(
    fn=summarize_text,
    inputs=gr.Textbox(lines=10, placeholder="Paste text here..."),
    outputs=[gr.Textbox(label="BART Summary"),
             gr.Textbox(label="DistilBART Summary"),
             gr.Textbox(label="Pegasus Summary"),
             gr.Textbox(label="T5 Summary")],
    title="Multi-Model Text Summarizer",
    description="Compare summaries from 4 models"
)

if __name__ == "__main__":
    iface.launch()
"""

with open("app.py", "w") as f:
    f.write(app_code)


# Save README.md with correct YAML front matter
readme_text = """---
title: Multi-Model Text Summarizer
emoji: 📝
colorFrom: blue
colorTo: green
sdk: gradio
app_file: app.py
pinned: true
---

# Multi-Model Text Summarizer
Compare 4 state-of-the-art summarization models in Hugging Face Spaces.
"""

with open("README.md", "w") as f:
    f.write(readme_text)

# Save requirements.txt
requirements = """gradio
transformers
torch
"""
with open("requirements.txt", "w") as f:
    f.write(requirements)
