In [2]:
!pip install -q gradio transformers sentencepiece pdfplumber python-docx langdetect

import gradio as gr
import pdfplumber
import docx
import os
from transformers import pipeline
from langdetect import detect
from io import BytesIO

# Model caching system
MODEL_CACHE = {}

def get_summarizer(model_name):
    """Get cached model or load new one"""
    if model_name not in MODEL_CACHE:
        MODEL_CACHE[model_name] = pipeline("summarization", model=model_name)
    return MODEL_CACHE[model_name]

def get_translator():
    """Get cached translator"""
    if "translator" not in MODEL_CACHE:
        MODEL_CACHE["translator"] = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
    return MODEL_CACHE["translator"]

def extract_text(file):
    """Extract text from different file types"""
    if file.name.endswith('.pdf'):
        with pdfplumber.open(file.name) as pdf:
            return "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
    elif file.name.endswith('.docx'):
        doc = docx.Document(file.name)
        return "\n".join([para.text for para in doc.paragraphs if para.text])
    else:  # .txt
        with open(file.name, 'r', encoding='utf-8') as f:
            return f.read()

def translate_to_english(text):
    """Detect language and translate to English if needed"""
    try:
        if len(text) > 20 and detect(text) != 'en':
            return get_translator()(text)[0]['translation_text']
    except:
        pass
    return text

def summarize_text(text, max_length=150, min_length=30, model_choice="facebook/bart-large-cnn", translate=False):
    """Summarize text with optional translation"""
    # Translate if requested
    if translate:
        text = translate_to_english(text)

    # Get the model
    summarizer = get_summarizer(model_choice)

    # Handle long text by chunking
    if len(text) > 10000:
        chunks = [text[i:i+4000] for i in range(0, len(text), 4000)]
        summaries = []
        for chunk in chunks:
            result = summarizer(
                chunk,
                max_length=max(60, max_length//len(chunks)),
                min_length=min(30, min_length//len(chunks)),
                do_sample=False
            )
            summaries.append(result[0]['summary_text'])
        return " ".join(summaries)

    # Generate summary for normal text
    results = summarizer(
        text,
        max_length=max_length,
        min_length=min_length,
        do_sample=False
    )
    return results[0]['summary_text']

def process_files(files, max_length, min_length, model_choice, translate):
    """Process multiple files and return summaries"""
    summaries = []
    for file in files:
        try:
            text = extract_text(file)
            if not text.strip():
                raise ValueError("No text extracted from file")

            summary = summarize_text(text, max_length, min_length, model_choice, translate)
            filename = os.path.basename(file.name)
            summaries.append(f"📄 **{filename}**\n{summary}\n{'-'*50}")
        except Exception as e:
            summaries.append(f"❌ Error processing {file.name}: {str(e)}")
    return "\n\n".join(summaries)

# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🚀 Advanced LLM Text Summarizer")
    gr.Markdown("Supports PDF/DOCX files, multi-document processing, and automatic translation!")

    with gr.Tab("📝 Text Input"):
        with gr.Row():
            with gr.Column():
                input_text = gr.Textbox(label="Input Text", lines=8,
                                       placeholder="Paste your text here...")

                with gr.Accordion("🌍 Translation & Language", open=False):
                    translate_check = gr.Checkbox(label="Translate to English (for non-English texts)", value=False)

                with gr.Accordion("⚙️ Advanced Options", open=False):
                    model_select = gr.Dropdown(
                        choices=[
                            ("BART (Recommended)", "facebook/bart-large-cnn"),
                            ("DistilBART (Faster)", "sshleifer/distilbart-cnn-12-6"),
                            ("PEGASUS (Abstractive)", "google/pegasus-xsum")
                        ],
                        value="facebook/bart-large-cnn",
                        label="Model"
                    )
                    max_length = gr.Slider(50, 300, value=150,
                                          label="Max Summary Length")
                    min_length = gr.Slider(10, 150, value=30,
                                          label="Min Summary Length")

                submit_btn = gr.Button("Generate Summary", variant="primary")

            with gr.Column():
                output_summary = gr.Textbox(label="Summary", lines=8, interactive=False)
                with gr.Accordion("📊 Summary Analysis", open=False):
                    char_count = gr.Number(label="Original Characters", interactive=False)
                    summary_char_count = gr.Number(label="Summary Characters", interactive=False)
                    reduction_pct = gr.Number(label="Reduction (%)", interactive=False)

    with gr.Tab("📁 File Upload"):
        with gr.Row():
            with gr.Column():
                file_upload = gr.File(label="Upload Files",
                                     file_types=[".txt", ".pdf", ".docx"],
                                     file_count="multiple")

                with gr.Row():
                    with gr.Column():
                        translate_check_files = gr.Checkbox(label="Translate non-English files", value=True)
                    with gr.Column():
                        process_btn = gr.Button("Process Files", variant="primary")

                with gr.Accordion("⚙️ Summary Settings", open=False):
                    model_select_files = gr.Dropdown(
                        choices=[
                            ("BART (Recommended)", "facebook/bart-large-cnn"),
                            ("DistilBART (Faster)", "sshleifer/distilbart-cnn-12-6"),
                            ("PEGASUS (Abstractive)", "google/pegasus-xsum")
                        ],
                        value="facebook/bart-large-cnn",
                        label="Model"
                    )
                    max_length_files = gr.Slider(50, 300, value=150,
                                                label="Max Summary Length")
                    min_length_files = gr.Slider(10, 150, value=30,
                                                label="Min Summary Length")

            with gr.Column():
                file_summaries = gr.Textbox(label="Summaries", lines=15, interactive=False)
                file_count = gr.Label(label="Files Processed")

    # Text tab events
    submit_btn.click(
        fn=summarize_text,
        inputs=[input_text, max_length, min_length, model_select, translate_check],
        outputs=output_summary
    )

    input_text.change(
        fn=lambda x: len(x),
        inputs=input_text,
        outputs=char_count
    )

    output_summary.change(
        fn=lambda s, o: (len(s), (1 - len(s)/len(o))*100) if o and len(o) > 0 else (0, 0),
        inputs=[output_summary, input_text],
        outputs=[summary_char_count, reduction_pct]
    )

    # File tab events
    process_btn.click(
        fn=process_files,
        inputs=[file_upload, max_length_files, min_length_files, model_select_files, translate_check_files],
        outputs=file_summaries
    )

    file_upload.change(
        fn=lambda files: f"📦 {len(files)} file(s) ready for processing" if files else "No files uploaded",
        inputs=file_upload,
        outputs=file_count
    )

    # Examples
    gr.Examples(
        examples=[
            ["Artificial intelligence (AI) is intelligence demonstrated by machines, as opposed to natural intelligence displayed by animals including humans. Leading AI textbooks define the field as the study of intelligent agents: any system that perceives its environment and takes actions that maximize its chance of achieving its goals. AI applications include advanced web search engines (e.g., Google Search), recommendation systems (used by YouTube, Amazon, and Netflix), understanding human speech (such as Siri and Alexa), self-driving cars (e.g., Waymo), generative or creative tools (ChatGPT and AI art), automated decision-making, and competing at the highest level in strategic game systems (such as chess and Go)."],
            ["La inteligencia artificial (IA) es la inteligencia llevada a cabo por máquinas. En ciencias de la computación, una máquina «inteligente» ideal es un agente flexible que percibe su entorno y lleva a cabo acciones que maximicen sus posibilidades de éxito en algún objetivo o tarea. Coloquialmente, el término inteligencia artificial se aplica cuando una máquina imita las funciones «cognitivas» que los humanos asocian con otras mentes humanas, como por ejemplo: percibir, razonar, aprender y resolver problemas."]
        ],
        inputs=input_text,
        label="Example Texts"
    )

# Launch the app
demo.launch(debug=True)

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://a38b9a848ff971ea67.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Device set to use cuda:0


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

Device set to use cuda:0
Device set to use cuda:0
Your max_length is set to 150, but your input_length is only 113. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=56)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://a38b9a848ff971ea67.gradio.live




In [3]:
# Updated and Advanced Gradio Text Summarizer
!pip install -q gradio transformers sentencepiece pdfplumber python-docx langdetect nltk

import gradio as gr
import pdfplumber
import docx
import os
import nltk
import gc
from transformers import pipeline
from langdetect import detect
from nltk.tokenize import sent_tokenize
from io import BytesIO
nltk.download('punkt')

MODEL_CACHE = {}

# Smart chunking with sentence boundary
def smart_chunk(text, max_tokens=500):
    sentences = sent_tokenize(text)
    chunks, chunk, count = [], [], 0
    for sentence in sentences:
        tokens = len(sentence.split())
        if count + tokens > max_tokens:
            chunks.append(" ".join(chunk))
            chunk, count = [], 0
        chunk.append(sentence)
        count += tokens
    if chunk:
        chunks.append(" ".join(chunk))
    return chunks

def get_summarizer(model_name):
    if model_name not in MODEL_CACHE:
        MODEL_CACHE[model_name] = pipeline("summarization", model=model_name)
    return MODEL_CACHE[model_name]

def get_translator():
    if "translator" not in MODEL_CACHE:
        MODEL_CACHE["translator"] = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
    return MODEL_CACHE["translator"]

def extract_text(file):
    if file.name.endswith('.pdf'):
        with pdfplumber.open(file.name) as pdf:
            return "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
    elif file.name.endswith('.docx'):
        doc = docx.Document(file.name)
        return "\n".join([para.text for para in doc.paragraphs if para.text])
    else:
        with open(file.name, 'r', encoding='utf-8') as f:
            return f.read()

def translate_to_english(text):
    try:
        if len(text) > 20 and detect(text) != 'en':
            return get_translator()(text)[0]['translation_text']
    except:
        pass
    return text

def summarize_text(text, max_length=150, min_length=30, model_choice="facebook/bart-large-cnn", translate=False):
    if translate:
        text = translate_to_english(text)
    summarizer = get_summarizer(model_choice)
    if len(text) > 1000:
        chunks = smart_chunk(text, max_tokens=400)
        summaries = []
        for chunk in chunks:
            result = summarizer(chunk, max_length=max(60, max_length//len(chunks)), min_length=min(30, min_length//len(chunks)), do_sample=False)
            summaries.append(result[0]['summary_text'])
        return " ".join(summaries)
    results = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
    return results[0]['summary_text']

def process_files(files, max_length, min_length, model_choice, translate):
    summaries = []
    for file in files:
        try:
            text = extract_text(file)
            if not text.strip():
                raise ValueError("No text extracted from file")
            summary = summarize_text(text, max_length, min_length, model_choice, translate)
            filename = os.path.basename(file.name)
            summaries.append(f"\U0001F4C4 **{filename}**\n{summary}\n{'-'*50}")
        except Exception as e:
            summaries.append(f"\u274C Error processing {file.name}: {str(e)}")
    return "\n\n".join(summaries)

def generate_download(summary):
    filename = "summary_output.txt"
    with open(filename, "w", encoding="utf-8") as f:
        f.write(summary)
    return filename

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# \U0001F680 Advanced LLM Text Summarizer")
    gr.Markdown("Supports PDF/DOCX/TXT files, multilingual translation, smart chunking, and downloadable output!")

    with gr.Tab("\U0001F4DD Text Input"):
        input_text = gr.Textbox(label="Input Text", lines=8, placeholder="Paste your text here...")
        translate_check = gr.Checkbox(label="Translate to English", value=False)
        model_select = gr.Dropdown([
            ("BART (Recommended)", "facebook/bart-large-cnn"),
            ("DistilBART (Faster)", "sshleifer/distilbart-cnn-12-6"),
            ("PEGASUS (Abstractive)", "google/pegasus-xsum")
        ], value="facebook/bart-large-cnn", label="Summarization Model")
        max_length = gr.Slider(50, 300, value=150, label="Max Summary Length")
        min_length = gr.Slider(10, 150, value=30, label="Min Summary Length")
        submit_btn = gr.Button("Generate Summary")
        output_summary = gr.Textbox(label="Summary", lines=8, interactive=False)
        download_btn = gr.Button("Download Summary")
        download_file = gr.File(label="Download Link")

    with gr.Tab("\U0001F4C1 File Upload"):
        file_upload = gr.File(label="Upload Files", file_types=[".txt", ".pdf", ".docx"], file_count="multiple")
        translate_check_files = gr.Checkbox(label="Translate non-English files", value=True)
        model_select_files = model_select
        max_length_files = max_length
        min_length_files = min_length
        process_btn = gr.Button("Process Files")
        file_summaries = gr.Textbox(label="Summaries", lines=15, interactive=False)

    submit_btn.click(
        fn=summarize_text,
        inputs=[input_text, max_length, min_length, model_select, translate_check],
        outputs=output_summary
    )
    download_btn.click(
        fn=generate_download,
        inputs=output_summary,
        outputs=download_file
    )
    process_btn.click(
        fn=process_files,
        inputs=[file_upload, max_length_files, min_length_files, model_select_files, translate_check_files],
        outputs=file_summaries
    )

demo.launch(debug=True)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://6bd181602ac8eeeeac.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Device set to use cuda:0
Your max_length is set to 150, but your input_length is only 133. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=66)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://6bd181602ac8eeeeac.gradio.live




In [8]:
!pip install -q gradio>=3.0 transformers sentencepiece pdfplumber python-docx langdetect nltk pandas

import gradio as gr
import pdfplumber
import docx
import os
import nltk
import pandas as pd
import torch
from transformers import pipeline
from langdetect import detect
from nltk.tokenize import sent_tokenize
from io import BytesIO

nltk.download('punkt')

# --- Constants ---
MODEL_OPTIONS = [
    ("BART (Balanced)", "facebook/bart-large-cnn"),
    ("DistilBART (Fast)", "sshleifer/distilbart-cnn-12-6"),
    ("PEGASUS (Abstractive)", "google/pegasus-xsum"),
    ("T5 (Technical)", "t5-small")
]

THEME = gr.themes.Soft(
    primary_hue="blue",
    secondary_hue="gray",
    font=[gr.themes.GoogleFont("Open Sans"), "sans-serif"]
)

# --- Core Functions ---
class SummarizationEngine:
    def __init__(self):
        self.model_cache = {}
        self.translation_cache = {}

    def get_model(self, model_name):
        if model_name not in self.model_cache:
            self.model_cache[model_name] = pipeline(
                "summarization",
                model=model_name,
                device=0 if torch.cuda.is_available() else -1
            )
        return self.model_cache[model_name]

    def get_translator(self):
        if "translator" not in self.translation_cache:
            self.translation_cache["translator"] = pipeline(
                "translation",
                model="Helsinki-NLP/opus-mt-mul-en",
                device=0 if torch.cuda.is_available() else -1
            )
        return self.translation_cache["translator"]

engine = SummarizationEngine()

def smart_chunk(text, max_tokens=500):
    """Intelligent text chunking preserving sentence boundaries"""
    sentences = sent_tokenize(text)
    chunks, current_chunk, current_length = [], [], 0

    for sentence in sentences:
        sentence_length = len(sentence.split())
        if current_length + sentence_length > max_tokens and current_chunk:
            chunks.append(" ".join(current_chunk))
            current_chunk, current_length = [], 0
        current_chunk.append(sentence)
        current_length += sentence_length

    if current_chunk:
        chunks.append(" ".join(current_chunk))
    return chunks

# --- Processing Functions ---
def process_document(file):
    """Handle multiple file types with error recovery"""
    try:
        if file.name.endswith('.pdf'):
            with pdfplumber.open(file.name) as pdf:
                return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
        elif file.name.endswith('.docx'):
            doc = docx.Document(file.name)
            return "\n".join(para.text for para in doc.paragraphs if para.text)
        else:
            with open(file.name, 'r', encoding='utf-8') as f:
                return f.read()
    except Exception as e:
        raise ValueError(f"Document processing error: {str(e)}")

def generate_summary(text, params):
    """Main summarization workflow with quality controls"""
    # Pre-processing
    if params['translate'] and detect(text) != 'en':
        text = engine.get_translator()(text)[0]['translation_text']

    # Model selection
    summarizer = engine.get_model(params['model'])

    # Chunking strategy for long documents
    if len(text.split()) > 1000:
        chunks = smart_chunk(text)
        summaries = []
        for chunk in chunks:
            output = summarizer(
                chunk,
                max_length=max(60, params['max_length']//len(chunks)),
                min_length=min(30, params['min_length']//len(chunks))),
                do_sample=False
            )
            summaries.append(output[0]['summary_text'])
        return " ".join(summaries)

    # Standard summarization
    results = summarizer(
        text,
        max_length=params['max_length'],
        min_length=params['min_length'],
        do_sample=False
    )
    return results[0]['summary_text']

# --- UI Components ---
def create_control_panel():
    """Generate consistent control panels for both tabs"""
    with gr.Accordion("Processing Parameters", open=True):
        model = gr.Dropdown(
            choices=MODEL_OPTIONS,
            value="facebook/bart-large-cnn",
            label="Summarization Model",
            interactive=True
        )
        max_len = gr.Slider(
            minimum=50, maximum=500, value=150, step=10,
            label="Maximum Summary Length"
        )
        min_len = gr.Slider(
            minimum=10, maximum=150, value=30, step=5,
            label="Minimum Summary Length"
        )
        translate = gr.Checkbox(
            label="Auto-translate to English",
            value=False
        )
    return model, max_len, min_len, translate

def create_analytics(summary, original):
    """Generate metrics dashboard compatible with all Gradio versions"""
    with gr.Accordion("Analytics Dashboard", open=False):
        with gr.Row():
            # Using HTML components for wider compatibility
            gr.HTML("""
            <div style="
                background: white;
                padding: 15px;
                border-radius: 5px;
                margin: 5px;
                width: 100%;
                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
            ">
                <h4 style="margin-top: 0;">Text Analytics</h4>
                <table style="width: 100%; border-collapse: collapse;">
                    <tr>
                        <td style="padding: 5px; border-bottom: 1px solid #eee;"><strong>Metric</strong></td>
                        <td style="padding: 5px; border-bottom: 1px solid #eee;"><strong>Original</strong></td>
                        <td style="padding: 5px; border-bottom: 1px solid #eee;"><strong>Summary</strong></td>
                    </tr>
                    <tr>
                        <td style="padding: 5px;">Words</td>
                        <td style="padding: 5px;">{}</td>
                        <td style="padding: 5px;">{}</td>
                    </tr>
                    <tr>
                        <td style="padding: 5px;">Characters</td>
                        <td style="padding: 5px;">{}</td>
                        <td style="padding: 5px;">{}</td>
                    </tr>
                    <tr>
                        <td style="padding: 5px;">Compression</td>
                        <td style="padding: 5px;" colspan="2">{:.1%}</td>
                    </tr>
                </table>
            </div>
            """.format(
                len(original.split()),
                len(summary.split()),
                len(original),
                len(summary),
                1 - len(summary)/len(original) if original else 0
            ))

# --- Main Interface ---
with gr.Blocks(theme=THEME, title="Enterprise Text Summarizer") as app:
    # Header Section
    gr.Markdown("""
    <div style="text-align: center;">
        <h1 style="margin-bottom: 0;">📝 Enterprise Text Summarization Suite</h1>
        <p style="color: #666; margin-top: 0;">AI-powered document processing with quality controls and analytics</p>
    </div>
    """)

    # Tab Interface
    with gr.Tabs():
        with gr.Tab("Single Document", id="single"):
            with gr.Row():
                with gr.Column(scale=2):
                    input_text = gr.TextArea(
                        label="Input Content",
                        placeholder="Paste text or upload files below...",
                        lines=10,
                        max_lines=20
                    )
                    file_input = gr.File(
                        label="Or Upload Document",
                        file_types=[".txt", ".pdf", ".docx"],
                        file_count="single"
                    )
                    model, max_len, min_len, translate = create_control_panel()
                    submit_btn = gr.Button("Process Document", variant="primary")

                with gr.Column(scale=1):
                    output_summary = gr.TextArea(
                        label="Generated Summary",
                        interactive=False,
                        lines=10
                    )
                    with gr.Group():
                        download_btn = gr.Button("Export Summary")
                        download = gr.File(interactive=False)
                    create_analytics(output_summary, input_text)

        with gr.Tab("Batch Processing", id="batch"):
            with gr.Row():
                with gr.Column():
                    batch_files = gr.File(
                        label="Upload Multiple Documents",
                        file_types=[".txt", ".pdf", ".docx"],
                        file_count="multiple"
                    )
                    batch_model, batch_max, batch_min, batch_trans = create_control_panel()
                    process_btn = gr.Button("Process Batch", variant="primary")

                with gr.Column():
                    batch_output = gr.Dataframe(
                        headers=["File", "Summary", "Status"],
                        datatype=["str", "str", "str"],
                        interactive=False
                    )
                    batch_download = gr.File(interactive=False)

    # Event Handling
    submit_btn.click(
        fn=generate_summary,
        inputs=[input_text, {
            "model": model,
            "max_length": max_len,
            "min_length": min_len,
            "translate": translate
        }],
        outputs=output_summary
    )

    file_input.change(
        fn=lambda f: process_document(f) if f else "",
        inputs=file_input,
        outputs=input_text
    )

# Launch Configuration
app.launch()

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 131)

In [14]:
!pip install -q gradio>=3.0 transformers sentencepiece pdfplumber python-docx langdetect nltk pandas

import gradio as gr
import pdfplumber
import docx
import os
import nltk
import pandas as pd
import torch
from transformers import pipeline
from langdetect import detect
from nltk.tokenize import sent_tokenize
from io import BytesIO

nltk.download('punkt')

# --- Constants ---
MODEL_OPTIONS = [
    ("BART (Balanced)", "facebook/bart-large-cnn"),
    ("DistilBART (Fast)", "sshleifer/distilbart-cnn-12-6"),
    ("PEGASUS (Abstractive)", "google/pegasus-xsum"),
    ("T5 (Technical)", "t5-small")
]

THEME = gr.themes.Soft(
    primary_hue="blue",
    secondary_hue="gray",
    font=[gr.themes.GoogleFont("Open Sans"), "sans-serif"]
)

# --- Core Functions ---
class SummarizationEngine:
    def __init__(self):
        self.model_cache = {}
        self.translation_cache = {}

    def get_model(self, model_name):
        if model_name not in self.model_cache:
            self.model_cache[model_name] = pipeline(
                "summarization",
                model=model_name,
                device=0 if torch.cuda.is_available() else -1
            )
        return self.model_cache[model_name]

    def get_translator(self):
        if "translator" not in self.translation_cache:
            self.translation_cache["translator"] = pipeline(
                "translation",
                model="Helsinki-NLP/opus-mt-mul-en",
                device=0 if torch.cuda.is_available() else -1
            )
        return self.translation_cache["translator"]

engine = SummarizationEngine()

def smart_chunk(text, max_tokens=500):
    """Intelligent text chunking preserving sentence boundaries"""
    sentences = sent_tokenize(text)
    chunks, current_chunk, current_length = [], [], 0

    for sentence in sentences:
        sentence_length = len(sentence.split())
        if current_length + sentence_length > max_tokens and current_chunk:
            chunks.append(" ".join(current_chunk))
            current_chunk, current_length = [], 0
        current_chunk.append(sentence)
        current_length += sentence_length

    if current_chunk:
        chunks.append(" ".join(current_chunk))
    return chunks

# --- Processing Functions ---
def process_document(file):
    """Handle multiple file types with error recovery"""
    try:
        if file.name.endswith('.pdf'):
            with pdfplumber.open(file.name) as pdf:
                return "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
        elif file.name.endswith('.docx'):
            doc = docx.Document(file.name)
            return "\n".join(para.text for para in doc.paragraphs if para.text)
        else:
            with open(file.name, 'r', encoding='utf-8') as f:
                return f.read()
    except Exception as e:
        raise ValueError(f"Document processing error: {str(e)}")

def generate_summary(text, model_name, max_length, min_length, translate):
    """Main summarization workflow with quality controls"""
    # Pre-processing
    if translate and detect(text) != 'en':
        text = engine.get_translator()(text)[0]['translation_text']

    # Model selection
    summarizer = engine.get_model(model_name)

    # Chunking strategy for long documents
    if len(text.split()) > 1000:
        chunks = smart_chunk(text)
        summaries = []
        for chunk in chunks:
            output = summarizer(
                chunk,
                max_length=max(60, max_length//len(chunks)),
                min_length=min(30, min_length//len(chunks)),
                do_sample=False
            )
            summaries.append(output[0]['summary_text'])
        return " ".join(summaries)

    # Standard summarization
    results = summarizer(
        text,
        max_length=max_length,
        min_length=min_length,
        do_sample=False
    )
    return results[0]['summary_text']

# --- UI Components ---
def create_control_panel():
    """Generate consistent control panels for both tabs"""
    with gr.Accordion("Processing Parameters", open=True):
        model = gr.Dropdown(
            choices=MODEL_OPTIONS,
            value="facebook/bart-large-cnn",
            label="Summarization Model",
            interactive=True
        )
        max_len = gr.Slider(
            minimum=50, maximum=500, value=150, step=10,
            label="Maximum Summary Length"
        )
        min_len = gr.Slider(
            minimum=10, maximum=150, value=30, step=5,
            label="Minimum Summary Length"
        )
        translate = gr.Checkbox(
            label="Auto-translate to English",
            value=False
        )
    return model, max_len, min_len, translate

def create_analytics(summary, original):
    """Generate metrics dashboard compatible with all Gradio versions"""
    with gr.Accordion("Analytics Dashboard", open=False):
        with gr.Row():
            # Using HTML components for wider compatibility
            gr.HTML("""
            <div style="
                background: white;
                padding: 15px;
                border-radius: 5px;
                margin: 5px;
                width: 100%;
                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
            ">
                <h4 style="margin-top: 0;">Text Analytics</h4>
                <table style="width: 100%; border-collapse: collapse;">
                    <tr>
                        <td style="padding: 5px; border-bottom: 1px solid #eee;"><strong>Metric</strong></td>
                        <td style="padding: 5px; border-bottom: 1px solid #eee;"><strong>Original</strong></td>
                        <td style="padding: 5px; border-bottom: 1px solid #eee;"><strong>Summary</strong></td>
                    </tr>
                    <tr>
                        <td style="padding: 5px;">Words</td>
                        <td style="padding: 5px;">{}</td>
                        <td style="padding: 5px;">{}</td>
                    </tr>
                    <tr>
                        <td style="padding: 5px;">Characters</td>
                        <td style="padding: 5px;">{}</td>
                        <td style="padding: 5px;">{}</td>
                    </tr>
                    <tr>
                        <td style="padding: 5px;">Compression</td>
                        <td style="padding: 5px;" colspan="2">{:.1%}</td>
                    </tr>
                </table>
            </div>
            """.format(
                len(original.split()) if original else 0,
                len(summary.split()) if summary else 0,
                len(original) if original else 0,
                len(summary) if summary else 0,
                1 - len(summary)/len(original) if original and len(original) > 0 else 0
            ))

# --- Main Interface ---
with gr.Blocks(theme=THEME, title="Enterprise Text Summarizer") as app:
    # Header Section
    gr.Markdown("""
    <div style="text-align: center;">
        <h1 style="margin-bottom: 0;">📝 Enterprise Text Summarization Suite</h1>
        <p style="color: #666; margin-top: 0;">AI-powered document processing with quality controls and analytics</p>
    </div>
    """)

    # Tab Interface
    with gr.Tabs():
        with gr.Tab("Single Document", id="single"):
            with gr.Row():
                with gr.Column(scale=2):
                    input_text = gr.TextArea(
                        label="Input Content",
                        placeholder="Paste text or upload files below...",
                        lines=10,
                        max_lines=20
                    )
                    file_input = gr.File(
                        label="Or Upload Document",
                        file_types=[".txt", ".pdf", ".docx"],
                        file_count="single"
                    )
                    model, max_len, min_len, translate = create_control_panel()
                    submit_btn = gr.Button("Process Document", variant="primary")

                with gr.Column(scale=1):
                    output_summary = gr.TextArea(
                        label="Generated Summary",
                        interactive=False,
                        lines=10
                    )
                    with gr.Group():
                        download_btn = gr.Button("Export Summary")
                        download = gr.File(interactive=False)
                    analytics_output = gr.HTML(label="Analytics") # Add a HTML component to display analytics

    # Event Handling
    submit_btn.click(
        fn=generate_summary,
        inputs=[input_text, model, max_len, min_len, translate],
        outputs=output_summary
    ).then(
        fn=create_analytics, # Call create_analytics after summarization
        inputs=[output_summary, input_text], # Pass the values of the components
        outputs=analytics_output # Output to the new HTML component
    )

    file_input.change(
        fn=lambda f: process_document(f) if f else "",
        inputs=file_input,
        outputs=input_text
    )

    download_btn.click(
        fn=lambda text: BytesIO(text.encode('utf-8')) if text else None,
        inputs=output_summary,
        outputs=download,
        api_name="download_summary"
    )


# Launch Configuration
app.launch(debug)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d2755c23037cb205ae.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


