In [1]:
!pip install gradio transformers sumy wordcloud textstat python-docx PyPDF2 rouge-score docx2txt --quiet

#  Required Imports
import gradio as gr
from transformers import pipeline
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from rouge_score import rouge_scorer
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import textstat
import docx2txt
import PyPDF2
import tempfile
import os

# 🔍 Load Transformer Models
models = {
    "BART (facebook/bart-large-cnn)": pipeline("summarization", model="facebook/bart-large-cnn"),
    "T5 Small (t5-small)": pipeline("summarization", model="t5-small"),
    "Pegasus (google/pegasus-xsum)": pipeline("summarization", model="google/pegasus-xsum"),
    "DistilBART (sshleifer/distilbart-cnn-12-6)": pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
}

#  Extractive Summary using LSA
def extractive_summary(text, num_sentences=3):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = LsaSummarizer()
    summary = summarizer(parser.document, num_sentences)
    return " ".join(str(sentence) for sentence in summary)

# 🧮 ROUGE Score Calculation
def compute_rouge(generated, reference):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, generated)
    return {
        "ROUGE-1": round(scores["rouge1"].fmeasure, 4),
        "ROUGE-L": round(scores["rougeL"].fmeasure, 4)
    }

# WordCloud Generator
def generate_wordcloud(summary):
    wc = WordCloud(width=400, height=200, background_color='white').generate(summary)
    plt.figure(figsize=(5, 3))
    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')
    tmp_img = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
    plt.savefig(tmp_img.name, bbox_inches='tight')
    plt.close()
    return tmp_img.name

#  File Reader (PDF/DOCX/TXT)
def read_file(file):
    ext = file.name.split('.')[-1]
    with open(file.name, 'rb') as f:
        if ext == 'pdf':
            reader = PyPDF2.PdfReader(f)
            return "\n".join([page.extract_text() for page in reader.pages])
        elif ext == 'docx':
            return docx2txt.process(file.name)
        elif ext == 'txt':
            return f.read().decode("utf-8")
    return ""

#  Main Summarizer Function
def summarize_text(text, model_choice, min_len, max_len, reference, is_extractive):
    if is_extractive:
        summary = extractive_summary(text)
    else:
        summarizer = models[model_choice]
        summary = summarizer(text, min_length=min_len, max_length=max_len, do_sample=False)[0]['summary_text']

    rouge_score = compute_rouge(summary, reference) if reference.strip() else "No reference provided"
    readability = textstat.flesch_reading_ease(summary)
    wordcloud_path = generate_wordcloud(summary)

    with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as f:
        f.write(summary.encode())
        download_path = f.name

    return summary, str(rouge_score), readability, wordcloud_path, download_path

# 🗂 File Upload Handler
def upload_and_extract(file):
    return read_file(file)

# 💻 Gradio UI
iface = gr.Interface(
    fn=summarize_text,
    inputs=[
        gr.Textbox(label="Input Text", lines=10, placeholder="Paste or upload your article below..."),
        gr.Dropdown(list(models.keys()), label="Choose Model"),
        gr.Slider(minimum=20, maximum=250, step=10, label="Minimum Summary Length", value=50),
        gr.Slider(minimum=50, maximum=512, step=10, label="Maximum Summary Length", value=150),
        gr.Textbox(label="Optional Reference Summary (for ROUGE Evaluation)", lines=4),
        gr.Checkbox(label="Use Extractive Summary instead of Abstractive")
    ],
    outputs=[
        gr.Textbox(label="Generated Summary"),
        gr.Textbox(label="ROUGE Score"),
        gr.Number(label="Readability Score (Flesch Reading Ease)"),
        gr.Image(label="Word Cloud"),
        gr.File(label="Download Summary (.txt)")
    ],
    title=" Advanced Text Summarizer",
    description="Multi-Model Summarization, ROUGE, WordCloud, Readability & File Upload (PDF/DOCX/TXT)"
)

upload_interface = gr.Interface(
    fn=upload_and_extract,
    inputs=gr.File(label="Upload a Text, PDF, or DOCX File"),
    outputs=gr.Textbox(label="Extracted Text")
)

gr.TabbedInterface([upload_interface, iface], ["📤 Upload File", "📝 Summarize Text"]).launch()


  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.3/97.3 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.3/175.3 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m18.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m56.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m939.4/939.4 kB[0m [31m50.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m70.4 MB/s[0m eta [36m0:00:00[0m
[?25h

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Device set to use cpu


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/259 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Device set to use cpu


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

Device set to use cpu


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f7c04d0f987fed39c8.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


