In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### üöÄ PEGASUS Model Overview

- **üß† Type:** Encoder-Decoder (Sequence-to-Sequence) Transformer  
- **üìö Pretraining:** Unlabeled text for abstractive summarization. Recent work uses [PEGASUS](https://huggingface.co/papers/1912.08777) to pre-train on massive text corpora with self-supervised objectives.  
- **üéØ Objective Functions:**  
  - üìù Masked Language Modeling (MLM)  
  - üîç Gap Sentence Generation (GSG) ‚Äî whole sentences are masked and the model predicts them  
- **‚ö° Fine-Tuning:** Performs well even on small datasets (as few as 1000 examples)  
- **üíæ Checkpoints:** Original Pegasus checkpoints are available under the Google organization  


### üõ†Ô∏è Tools & Technologies Used

- **üêç Python** ‚Äì Main programming language for the project  
- **ü§ó Transformers Library** ‚Äì `transformers` by Hugging Face, used to load pre-trained PEGASUS models and pipelines  
- **üöÄ PEGASUS Model** ‚Äì Pre-trained encoder-decoder Transformer for abstractive summarization (model: `google/pegasus-cnn_dailymail`)   
- **üé® Gradio** ‚Äì Creates a simple and interactive web interface for input and output  
- **üíª Kaggle Notebooks** ‚Äì Environments to run and test the app interactively  


In [3]:
# -------------------------------
# 1. Install & Import Libraries
# -------------------------------


from transformers import pipeline

# -------------------------------
# 2. Load Pretrained Summarization Model
# -------------------------------

# Pegasus model trained on CNN/DailyMail dataset ‚Äî great for news-style summaries
summarizer = pipeline("summarization",model="google/pegasus-cnn_dailymail")



Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cuda:0


In [16]:
def summarize_text(text):
    summary = summarizer(text, max_length=200, min_length=30, do_sample=False)
    return summary[0]['summary_text']

In [18]:
# -------------------------------
# 3. Create Gradio Interface
# -------------------------------
import gradio as gr
import re

iface = gr.Interface(
    fn=summarize_text,
    inputs=gr.Textbox(
        lines=15, 
        placeholder="üìù Enter your text here...", 
        label="Input Text"
    ),
    outputs=gr.Textbox(
        lines=10,
        placeholder="Summary will appear here...",
        label="Summary"
    ),
    title="üöÄ PEGASUS Summarizer",
    description="""
    **Summarize long text quickly!** ‚ú®  
    Enter any text and get an **abstractive summary** using the [PEGASUS](https://huggingface.co/papers/1912.08777) model.  
    Works well even for **long paragraphs and articles**. üì∞üìö
    """,
)

# -------------------------------
# 4. Launch the App
# -------------------------------
iface.launch()

* Running on local URL:  http://127.0.0.1:7866
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://4d592fdae74dda92e5.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


