<a href="https://colab.research.google.com/github/Priya-gawhane/Text-Summarizer/blob/main/Text_Summarizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers torch accelerate sentencepiece
!pip install gradio
!pip install nltk
!pip install requests beautifulsoup4
!pip install readability-lxml
!pip install html2text


Collecting readability-lxml
  Downloading readability_lxml-0.8.4.1-py3-none-any.whl.metadata (4.0 kB)
Collecting cssselect (from readability-lxml)
  Downloading cssselect-1.3.0-py3-none-any.whl.metadata (2.6 kB)
Collecting lxml_html_clean (from lxml[html_clean]->readability-lxml)
  Downloading lxml_html_clean-0.4.2-py3-none-any.whl.metadata (2.4 kB)
Downloading readability_lxml-0.8.4.1-py3-none-any.whl (19 kB)
Downloading cssselect-1.3.0-py3-none-any.whl (18 kB)
Downloading lxml_html_clean-0.4.2-py3-none-any.whl (14 kB)
Installing collected packages: lxml_html_clean, cssselect, readability-lxml
Successfully installed cssselect-1.3.0 lxml_html_clean-0.4.2 readability-lxml-0.8.4.1
Collecting html2text
  Downloading html2text-2025.4.15-py3-none-any.whl.metadata (4.1 kB)
Downloading html2text-2025.4.15-py3-none-any.whl (34 kB)
Installing collected packages: html2text
Successfully installed html2text-2025.4.15


In [None]:
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr
import nltk
import requests
from bs4 import BeautifulSoup
import re
import warnings
import html2text
from urllib.parse import urlparse
warnings.filterwarnings('ignore')

# Download NLTK data
try:
    nltk.download('punkt_tab', quiet=True)
    nltk.download('punkt', quiet=True)  # Fallback for older NLTK versions
except:
    nltk.download('punkt', quiet=True)

nltk.download('stopwords', quiet=True)

True

In [None]:
class TextSummarizer:
    def __init__(self):
        print("🚀 Initializing summarization models...")

        # Model 1: BART (Facebook's model - good for general summarization)
        print("Loading BART model...")
        self.bart_summarizer = pipeline(
            "summarization",
            model="facebook/bart-large-cnn",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
        )

        # Model 2: T5 (Google's model - versatile)
        print("Loading T5 model...")
        self.t5_summarizer = pipeline(
            "summarization",
            model="t5-small",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
        )

        # Model 3: Pegasus (Google's model - specifically for summarization)
        print("Loading Pegasus model...")
        self.pegasus_summarizer = pipeline(
            "summarization",
            model="google/pegasus-xsum",
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
        )

        print("✅ All models loaded successfully!")

        # Check if GPU is available
        device = "GPU" if torch.cuda.is_available() else "CPU"
        print(f"🔧 Running on: {device}")

    def preprocess_text(self, text):
        """Clean and preprocess the input text"""
        # Remove extra whitespace and newlines
        text = re.sub(r'\s+', ' ', text.strip())

        # Remove URLs
        text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)

        # Remove email addresses
        text = re.sub(r'\S+@\S+', '', text)

        return text

    def chunk_text(self, text, max_length=1024):
        """Split text into chunks for processing with fallback for NLTK issues"""
        try:
            sentences = nltk.sent_tokenize(text)
        except (LookupError, OSError):
            # Fallback: split by sentence-ending punctuation
            sentences = [s.strip() for s in re.split(r'[.!?]+', text) if s.strip()]

        chunks = []
        current_chunk = ""

        for sentence in sentences:
            if len(current_chunk) + len(sentence) < max_length:
                current_chunk += sentence + " "
            else:
                if current_chunk:
                    chunks.append(current_chunk.strip())
                current_chunk = sentence + " "

        if current_chunk:
            chunks.append(current_chunk.strip())

        return chunks

    def summarize_with_model(self, text, model_name="bart", max_length=150, min_length=50):
        """Summarize text using specified model"""
        try:
            # Preprocess text
            clean_text = self.preprocess_text(text)

            if len(clean_text.split()) < 50:
                return "⚠️ Text is too short for meaningful summarization. Please provide longer content."

            # Select model
            if model_name == "bart":
                summarizer = self.bart_summarizer
            elif model_name == "t5":
                summarizer = self.t5_summarizer
            elif model_name == "pegasus":
                summarizer = self.pegasus_summarizer
            else:
                summarizer = self.bart_summarizer

            # Handle long texts by chunking
            if len(clean_text.split()) > 800:
                chunks = self.chunk_text(clean_text, max_length=800)
                summaries = []

                for chunk in chunks:
                    if len(chunk.split()) > 30:  # Only summarize substantial chunks
                        summary = summarizer(
                            chunk,
                            max_length=max_length//len(chunks) + 50,
                            min_length=min_length//len(chunks) + 10,
                            do_sample=False
                        )
                        summaries.append(summary[0]['summary_text'])

                # Combine chunk summaries
                combined_summary = " ".join(summaries)

                # Final summarization if combined text is still long
                if len(combined_summary.split()) > max_length:
                    final_summary = summarizer(
                        combined_summary,
                        max_length=max_length,
                        min_length=min_length,
                        do_sample=False
                    )
                    return final_summary[0]['summary_text']
                else:
                    return combined_summary
            else:
                # Direct summarization for shorter texts
                summary = summarizer(
                    clean_text,
                    max_length=max_length,
                    min_length=min_length,
                    do_sample=False
                )
                return summary[0]['summary_text']

        except Exception as e:
            return f"❌ Error during summarization: {str(e)}"

    def extract_from_url(self, url):
        """Extract text content from a URL using BeautifulSoup and html2text"""
        try:
            # Add headers to mimic a real browser
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            }

            # Get the webpage
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()

            # Parse with BeautifulSoup
            soup = BeautifulSoup(response.content, 'html.parser')

            # Remove unwanted elements
            for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside', 'advertisement']):
                element.decompose()

            # Try to find the main content area
            main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=re.compile(r'content|article|post', re.I))

            if main_content:
                content_soup = main_content
            else:
                content_soup = soup

            # Extract title
            title_element = soup.find('title')
            title = title_element.get_text().strip() if title_element else "Article"

            # Also try h1 for title
            h1_element = soup.find('h1')
            if h1_element and len(h1_element.get_text().strip()) < len(title):
                title = h1_element.get_text().strip()

            # Convert HTML to text using html2text
            h = html2text.HTML2Text()
            h.ignore_links = True
            h.ignore_images = True
            h.body_width = 0  # Don't wrap lines

            text = h.handle(str(content_soup))

            # Clean up the text
            lines = text.split('\n')
            clean_lines = []
            for line in lines:
                line = line.strip()
                if line and not line.startswith('#') and len(line) > 20:  # Skip headers and short lines
                    clean_lines.append(line)

            cleaned_text = ' '.join(clean_lines)

            # Remove excessive whitespace
            cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()

            if len(cleaned_text) < 100:
                return "❌ Could not extract sufficient content from this URL. The page might be protected or contain mainly non-text content.", "Error"

            return cleaned_text, title

        except requests.exceptions.RequestException as e:
            return f"❌ Error accessing URL: {str(e)}", "Error"
        except Exception as e:
            return f"❌ Error extracting from URL: {str(e)}", "Error"

# Initialize the summarizer
summarizer = TextSummarizer()

🚀 Initializing summarization models...
Loading BART model...


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


Loading T5 model...


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Device set to use cpu


Loading Pegasus model...


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/259 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Device set to use cpu


✅ All models loaded successfully!
🔧 Running on: CPU


In [None]:
def get_text_stats(text):
    """Get basic statistics about the text with fallback for NLTK issues"""
    words = len(text.split())
    chars = len(text)

    # Try to get sentence count with NLTK, fall back to simple method if it fails
    try:
        sentences = len(nltk.sent_tokenize(text))
    except (LookupError, OSError):
        # Fallback: count sentences by splitting on common sentence endings
        sentences = len([s for s in re.split(r'[.!?]+', text) if s.strip()])

    return f"📊 **Text Statistics:**\n- Words: {words:,}\n- Characters: {chars:,}\n- Sentences: {sentences:,}"

def summarize_text(text, model_choice, summary_length, url_input=""):
    """Main function to handle text summarization"""

    # Handle URL input
    if url_input.strip():
        print("🌐 Extracting text from URL...")
        extracted_text, title = summarizer.extract_from_url(url_input.strip())
        if extracted_text.startswith("❌"):
            return extracted_text, "", ""
        text = extracted_text
        source_info = f"📰 **Source:** {title}\n🔗 **URL:** {url_input}\n\n"
    else:
        source_info = ""

    if not text.strip():
        return "⚠️ Please provide text to summarize or enter a valid URL.", "", ""

    # Set summary length parameters
    length_configs = {
        "Short (50-100 words)": {"max_length": 100, "min_length": 30},
        "Medium (100-200 words)": {"max_length": 200, "min_length": 80},
        "Long (200-300 words)": {"max_length": 300, "min_length": 150}
    }

    config = length_configs[summary_length]

    # Generate summary
    print(f"🤖 Generating summary using {model_choice}...")
    summary = summarizer.summarize_with_model(
        text,
        model_name=model_choice.lower(),
        max_length=config["max_length"],
        min_length=config["min_length"]
    )

    # Get statistics
    original_stats = get_text_stats(text)
    summary_stats = get_text_stats(summary) if not summary.startswith("❌") else ""

    # Calculate compression ratio
    if not summary.startswith("❌") and not summary.startswith("⚠️"):
        original_words = len(text.split())
        summary_words = len(summary.split())
        compression_ratio = round((1 - summary_words/original_words) * 100, 1)
        summary_stats += f"\n- Compression: {compression_ratio}% reduction"

    return source_info + summary, original_stats, summary_stats

In [None]:
# Custom CSS for better styling
css = """
.gradio-container {
    font-family: 'Arial', sans-serif;
}
.main-header {
    text-align: center;
    margin-bottom: 30px;
}
.stats-box {
    background-color: #f0f0f0;
    padding: 15px;
    border-radius: 10px;
    margin: 10px 0;
}
"""

# Create the Gradio interface
def create_interface():
    with gr.Blocks(css=css, title="Text Summarizer") as demo:

        # Header
        gr.Markdown("""
        # 📝 Advanced Text Summarizer

        **Powered by State-of-the-Art AI Models**

        This tool can summarize long articles, documents, research papers, or web pages using multiple AI models.
        Simply paste your text or provide a URL, choose your preferred model and summary length, then get instant results!
        """, elem_classes=["main-header"])

        with gr.Row():
            with gr.Column(scale=2):
                # Input section
                gr.Markdown("## 📥 Input")

                url_input = gr.Textbox(
                    label="🔗 URL (Optional)",
                    placeholder="https://example.com/article - Leave empty if pasting text below",
                    lines=1
                )

                text_input = gr.Textbox(
                    label="📄 Text to Summarize",
                    placeholder="Paste your article, document, or long text here...",
                    lines=10,
                    max_lines=15
                )

                with gr.Row():
                    model_choice = gr.Dropdown(
                        choices=["BART", "T5", "Pegasus"],
                        label="🤖 AI Model",
                        value="BART",
                        info="BART: General purpose | T5: Versatile | Pegasus: News-focused"
                    )

                    summary_length = gr.Dropdown(
                        choices=["Short (50-100 words)", "Medium (100-200 words)", "Long (200-300 words)"],
                        label="📏 Summary Length",
                        value="Medium (100-200 words)"
                    )

                summarize_btn = gr.Button("✨ Generate Summary", variant="primary", size="lg")

                # Example buttons
                gr.Markdown("### 📚 Try These Examples:")
                with gr.Row():
                    example_news = gr.Button("📰 News Article", size="sm")
                    example_research = gr.Button("🔬 Research Abstract", size="sm")
                    example_story = gr.Button("📖 Short Story", size="sm")

            with gr.Column(scale=2):
                # Output section
                gr.Markdown("## 📤 Output")

                summary_output = gr.Textbox(
                    label="📋 Generated Summary",
                    lines=8,
                    max_lines=12,
                    show_copy_button=True
                )

                with gr.Row():
                    with gr.Column():
                        original_stats = gr.Markdown(
                            label="Original Text Stats",
                            elem_classes=["stats-box"]
                        )

                    with gr.Column():
                        summary_stats = gr.Markdown(
                            label="Summary Stats",
                            elem_classes=["stats-box"]
                        )

        # Example texts
        news_example = """
        Artificial Intelligence has revolutionized numerous industries over the past decade, transforming how businesses operate and how people interact with technology. From healthcare diagnostics to autonomous vehicles, AI applications have become increasingly sophisticated and widespread. Machine learning algorithms now power recommendation systems on streaming platforms, enable real-time language translation, and assist doctors in medical imaging analysis. The technology has also raised important questions about employment, privacy, and ethical considerations. Major tech companies have invested billions of dollars in AI research and development, leading to breakthrough innovations in natural language processing, computer vision, and robotics. As AI continues to evolve, experts predict it will play an even more significant role in shaping the future of work, education, and daily life. However, challenges remain in ensuring AI systems are fair, transparent, and beneficial for all members of society.
        """

        research_example = """
        The study examined the effects of climate change on marine ecosystems over a 20-year period from 2000 to 2020. Researchers analyzed temperature data, ocean acidity levels, and marine biodiversity indicators across 50 monitoring stations worldwide. Results showed a significant increase in ocean temperatures of 0.8°C on average, with corresponding changes in fish migration patterns and coral reef health. The research team found that 23% of coral reefs showed signs of severe bleaching, while fish populations shifted northward by an average of 150 kilometers. Ocean acidity increased by 15%, affecting shellfish and other calcifying organisms. The study concludes that immediate action is needed to mitigate further damage to marine ecosystems. Recommendations include reducing carbon emissions, establishing marine protected areas, and implementing sustainable fishing practices. The research was conducted by an international team of marine biologists and oceanographers from universities in the United States, Australia, and Norway.
        """

        story_example = """
        Sarah had always been fascinated by the old lighthouse that stood at the edge of her hometown's rocky coastline. Every evening, she would walk along the shore and gaze up at its weathered white tower, wondering about the stories it could tell. The lighthouse had been abandoned for decades, but local legends spoke of a mysterious light that still appeared on foggy nights. One particularly misty evening, Sarah decided to investigate. As she approached the lighthouse, she noticed a warm glow emanating from the lantern room at the top. The rusty door at the base creaked open at her touch, revealing a spiral staircase that seemed to stretch endlessly upward. With each step, the light grew brighter, and she could hear the faint sound of ocean waves that seemed to come from within the walls themselves. When she finally reached the top, Sarah discovered an elderly man tending to an ornate brass lamp. He smiled warmly and explained that he was the lighthouse's last keeper, maintaining the beacon not for ships, but for lost souls seeking their way home. Sarah realized she had found exactly what she had been searching for all along.
        """

        # Event handlers for example buttons
        example_news.click(
            lambda: news_example,
            outputs=text_input
        )

        example_research.click(
            lambda: research_example,
            outputs=text_input
        )

        example_story.click(
            lambda: story_example,
            outputs=text_input
        )

        # Main summarization event
        summarize_btn.click(
            fn=summarize_text,
            inputs=[text_input, model_choice, summary_length, url_input],
            outputs=[summary_output, original_stats, summary_stats]
        )

        # Footer
        gr.Markdown("""
        ---
        ### 💡 Tips for Better Summaries:
        - **Longer texts** (200+ words) work best for summarization
        - **BART** is great for news articles and general content
        - **T5** works well with technical and academic texts
        - **Pegasus** excels with news and journalistic content
        - For **web articles**, just paste the URL and leave the text box empty

        ### 🛠️ How it Works:
        This tool uses transformer-based neural networks trained on millions of documents to understand context and extract key information. The models generate abstractive summaries, meaning they create new sentences rather than just extracting existing ones.
        """)

    return demo


In [None]:
def batch_summarize_urls(urls_text, model_choice, summary_length):
    """Summarize multiple URLs at once"""
    urls = [url.strip() for url in urls_text.split('\n') if url.strip()]
    results = []

    for i, url in enumerate(urls, 1):
        print(f"Processing URL {i}/{len(urls)}: {url}")
        try:
            extracted_text, title = summarizer.extract_from_url(url)
            if not extracted_text.startswith("❌"):
                summary = summarizer.summarize_with_model(
                    extracted_text,
                    model_name=model_choice.lower(),
                    max_length=150,
                    min_length=50
                )
                results.append(f"**{title}**\n{url}\n\n{summary}\n\n---\n")
            else:
                results.append(f"**Error with {url}**\n{extracted_text}\n\n---\n")
        except Exception as e:
            results.append(f"**Error with {url}**\nFailed to process: {str(e)}\n\n---\n")

    return "\n".join(results)

def compare_models(text):
    """Compare summaries from different models"""
    if not text.strip():
        return "Please provide text to compare", "", ""

    bart_summary = summarizer.summarize_with_model(text, "bart", max_length=150, min_length=50)
    t5_summary = summarizer.summarize_with_model(text, "t5", max_length=150, min_length=50)
    pegasus_summary = summarizer.summarize_with_model(text, "pegasus", max_length=150, min_length=50)

    return bart_summary, t5_summary, pegasus_summary

# =============================================================================
# CELL 7: Create Advanced Interface with Additional Features
# =============================================================================

def create_advanced_interface():
    with gr.Blocks(css=css, title="Advanced Text Summarizer") as demo:

        gr.Markdown("""
        # 🚀 Advanced Text Summarizer Suite

        **Multiple AI Models | Batch Processing | Model Comparison**
        """, elem_classes=["main-header"])

        with gr.Tabs():
            # Main Summarizer Tab
            with gr.TabItem("📝 Single Document"):
                with gr.Row():
                    with gr.Column():
                        url_input = gr.Textbox(label="🔗 URL (Optional)", placeholder="https://example.com/article")
                        text_input = gr.Textbox(label="📄 Text", placeholder="Paste your text here...", lines=8)

                        with gr.Row():
                            model_choice = gr.Dropdown(
                                choices=["BART", "T5", "Pegasus"],
                                label="🤖 Model",
                                value="BART"
                            )
                            summary_length = gr.Dropdown(
                                choices=["Short (50-100 words)", "Medium (100-200 words)", "Long (200-300 words)"],
                                label="📏 Length",
                                value="Medium (100-200 words)"
                            )

                        summarize_btn = gr.Button("✨ Summarize", variant="primary")

                    with gr.Column():
                        summary_output = gr.Textbox(label="📋 Summary", lines=6, show_copy_button=True)
                        original_stats = gr.Markdown()
                        summary_stats = gr.Markdown()

                summarize_btn.click(
                    fn=summarize_text,
                    inputs=[text_input, model_choice, summary_length, url_input],
                    outputs=[summary_output, original_stats, summary_stats]
                )

            # Batch Processing Tab
            with gr.TabItem("📚 Batch URLs"):
                gr.Markdown("### Process Multiple URLs at Once")

                urls_input = gr.Textbox(
                    label="🔗 URLs (one per line)",
                    placeholder="https://example1.com\nhttps://example2.com\nhttps://example3.com",
                    lines=6
                )

                with gr.Row():
                    batch_model = gr.Dropdown(
                        choices=["BART", "T5", "Pegasus"],
                        label="🤖 Model",
                        value="BART"
                    )
                    batch_length = gr.Dropdown(
                        choices=["Short (50-100 words)", "Medium (100-200 words)", "Long (200-300 words)"],
                        label="📏 Length",
                        value="Short (50-100 words)"
                    )

                batch_btn = gr.Button("🚀 Process Batch", variant="primary")
                batch_output = gr.Textbox(label="📋 Batch Results", lines=15, show_copy_button=True)

                batch_btn.click(
                    fn=batch_summarize_urls,
                    inputs=[urls_input, batch_model, batch_length],
                    outputs=batch_output
                )

            # Model Comparison Tab
            with gr.TabItem("🔍 Compare Models"):
                gr.Markdown("### See How Different Models Summarize the Same Text")

                compare_text = gr.Textbox(
                    label="📄 Text to Compare",
                    placeholder="Paste text to see how different models summarize it...",
                    lines=6
                )

                compare_btn = gr.Button("🔄 Compare Models", variant="primary")

                with gr.Row():
                    bart_result = gr.Textbox(label="🤖 BART Summary", lines=4, show_copy_button=True)
                    t5_result = gr.Textbox(label="🤖 T5 Summary", lines=4, show_copy_button=True)
                    pegasus_result = gr.Textbox(label="🤖 Pegasus Summary", lines=4, show_copy_button=True)

                compare_btn.click(
                    fn=compare_models,
                    inputs=compare_text,
                    outputs=[bart_result, t5_result, pegasus_result]
                )

            # Help Tab
            with gr.TabItem("❓ Help & Info"):
                gr.Markdown("""
                ##  How to Use This Tool

                ###  Single Document
                1. **Option A:** Paste text directly into the text box
                2. **Option B:** Enter a URL to automatically extract and summarize web content
                3. Choose your preferred AI model and summary length
                4. Click "Summarize" to generate your summary

                ### Batch Processing
                - Enter multiple URLs (one per line) to summarize several articles at once
                - Great for research, news monitoring, or content curation

                ###  Model Comparison
                - Compare how different AI models summarize the same text
                - Helps you choose the best model for your specific content type

                ##  AI Models Explained

                **BART (Facebook AI)**
                - Best for: General content, news articles, blog posts
                - Strengths: Balanced and coherent summaries

                **T5 (Google AI)**
                - Best for: Technical content, academic papers, complex texts
                - Strengths: Handles complex language and technical terms well

                **Pegasus (Google AI)**
                - Best for: News articles, journalistic content
                - Strengths: Trained specifically on news summarization

                ##  Tips for Best Results

                - **Minimum length:** Provide at least 200 words for meaningful summaries
                - **URL extraction:** Works with most news sites, blogs, and articles
                - **Technical content:** Use T5 for scientific or technical documents
                - **Multiple articles:** Use batch processing for research efficiency
                - **Experiment:** Try different models to see which works best for your content

                ## Limitations

                - Very short texts may not summarize well
                - Some websites may block automated content extraction
                - Processing time varies based on text length and model complexity
                - Generated summaries should be reviewed for accuracy

                ## Technical Details

                This tool uses pre-trained transformer models from Hugging Face:
                - Models run on GPU when available for faster processing
                - Text is automatically chunked for very long documents
                - Content is cleaned and preprocessed for optimal results
                """)

    return demo


In [None]:
# Create and launch the basic interface
print(" Creating Text Summarizer Interface...")
demo = create_interface()

# Launch the app
print(" Launching the application...")
print(" The interface will open in a new tab/window")
print(" You can also access advanced features in the 'Advanced Interface' section below")

demo.launch(
    share=True,  # Creates a public link you can share
    debug=True,  # Enable debug mode for development
    height=800,  # Set interface height
    favicon_path=None
)

In [None]:
print(" Creating Advanced Interface with Extra Features...")
advanced_demo = create_advanced_interface()
advanced_demo.launch(share=True, debug=True, height=900)

🔥 Creating Advanced Interface with Extra Features...
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://7b4f0fe4d31ec1840e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


🌐 Extracting text from URL...
Processing URL 1/1: https://theconversation.com/the-paradox-of-pluralism-how-college-shapes-students-views-of-other-religions-261901


Your max_length is set to 150, but your input_length is only 118. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=59)
Your max_length is set to 150, but your input_length is only 125. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=62)
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Your max_length is set to 150, but your input_length is only 111. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=55)


🌐 Extracting text from URL...
🤖 Generating summary using BART...
🌐 Extracting text from URL...
🤖 Generating summary using BART...
