<a href="https://colab.research.google.com/github/SakilMalek/Article-Summarizer/blob/main/Text_Summarization_tool.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [107]:
import shutil
shutil.rmtree('/root/nltk_data/tokenizers/punkt', ignore_errors=True)


In [108]:
# Download NLTK data
import nltk
nltk.download('punkt')
nltk.download('punkt_tab') # Download punkt_tab resource
nltk.download('stopwords')
nltk.data.path.append('/usr/share/nltk_data')
from nltk.tokenize import sent_tokenize

text = "This is a test. Let's check if it works!"
print(sent_tokenize(text))

['This is a test.', "Let's check if it works!"]


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [109]:
# Install required packages
!pip install -q streamlit pyngrok nltk transformers sumy newspaper3k
!npm install -g localtunnel  # Alternative tunnel option

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K
changed 22 packages in 1s
[1G[0K⠴[1G[0K
[1G[0K⠴[1G[0K3 packages are looking for funding
[1G[0K⠴[1G[0K  run `npm fund` for details
[1G[0K⠴[1G[0K

In [110]:
!pip install newspaper3k==0.2.8



In [111]:
# Write the Streamlit app to a file
%%writefile article_summarizer.py
import streamlit as st
from nltk.tokenize import word_tokenize, sent_tokenize
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer
from transformers import pipeline
from newspaper import Article
import nltk
import time

# Text summarization functions
def sumy_summarizer(text, summarizer_type='lsa', sentences_count=5):
    """Extractive summarization using Sumy library"""
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    if summarizer_type == 'lsa':
        summarizer = LsaSummarizer()
    elif summarizer_type == 'lex':
        summarizer = LexRankSummarizer()
    elif summarizer_type == 'luhn':
        summarizer = LuhnSummarizer()
    elif summarizer_type == 'textrank':
        summarizer = TextRankSummarizer()
    summary = summarizer(parser.document, sentences_count)
    return ' '.join([str(sentence) for sentence in summary])

def abstractive_summarizer(text, max_length=130, min_length=30):
    """Abstractive summarization using BART model"""
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    return summarizer(text, max_length=max_length, min_length=min_length)[0]['summary_text']

def get_article_text(url):
    """Extract text from news article URL"""
    article = Article(url)
    article.download()
    article.parse()
    return article.text

# Streamlit UI
def main():
    st.set_page_config(page_title="Article Summarizer", layout="wide")

    st.title("📝 Article Summarization Tool")
    st.markdown("""
    Summarize lengthy articles using NLP techniques.
    Paste text directly or provide a URL.
    """)

    # Input options
    input_method = st.radio("Input method:", ("Text", "URL"), horizontal=True)

    text = ""
    if input_method == "Text":
        text = st.text_area("Paste your article text here:", height=200)
    else:
        url = st.text_input("Enter article URL:")
        if url:
            with st.spinner("Fetching article..."):
                try:
                    text = get_article_text(url)
                    st.text_area("Extracted Text:", text, height=200)
                except Exception as e:
                    st.error(f"Error: {str(e)}")

    if text:
        # Summarization options
        st.subheader("Summarization Options")
        col1, col2 = st.columns(2)

        with col1:
            summary_type = st.selectbox(
                "Summary type:",
                ("Extractive (LSA)", "Extractive (LexRank)",
                 "Extractive (Luhn)", "Extractive (TextRank)",
                 "Abstractive (BART)")
            )

        with col2:
            length = st.slider(
                "Summary length:",
                min_value=1, max_value=10, value=5
            )

        if st.button("Generate Summary"):
            with st.spinner("Generating summary..."):
                try:
                    start_time = time.time()

                    if "Extractive" in summary_type:
                        method = summary_type.split("(")[1].split(")")[0].lower()
                        summary = sumy_summarizer(text, method, length)
                        st.subheader("📋 Extractive Summary")
                    else:
                        summary = abstractive_summarizer(text, max_length=length*30)
                        st.subheader("✨ Abstractive Summary")

                    st.write(summary)

                    # Stats
                    orig_words = len(word_tokenize(text))
                    summ_words = len(word_tokenize(summary))
                    ratio = (orig_words - summ_words)/orig_words * 100

                    st.success(f"""
                    **Summary Statistics**
                    - Original: {orig_words} words
                    - Summary: {summ_words} words
                    - Reduced by: {ratio:.1f}%
                    - Time taken: {time.time()-start_time:.2f}s
                    """)

                except Exception as e:
                    st.error(f"Error: {str(e)}")

if __name__ == "__main__":
    main()

Overwriting article_summarizer.py


In [113]:
# Run the app with ngrok in Colab
from pyngrok import ngrok
import threading
import subprocess
import time
import requests

# Kill existing processes
!pkill -f streamlit
!pkill -f ngrok

# Function to run Streamlit
def run_streamlit():
    subprocess.run(['streamlit', 'run', 'article_summarizer.py', '--server.port', '8501'])

# Start Streamlit in background thread
thread = threading.Thread(target=run_streamlit, daemon=True)
thread.start()

# Wait for server to start
time.sleep(5)

# Set up ngrok tunnel
ngrok.kill()
ngrok.set_auth_token("2n3gPegMzGBqsMDg66r03QloPW1_yUnGcmD6nmDfoLRRAfFE")  # Replace with your token
public_url = ngrok.connect(addr='8501', bind_tls=True)

# Display the public URL
print("✨ Your app is running at:")
print(f"🔗 {public_url.public_url}")

# Alternative localtunnel option (uncomment if ngrok fails)
# !npx localtunnel --port 8501

✨ Your app is running at:
🔗 https://7af9-35-197-129-239.ngrok-free.app


In [121]:
# First save the notebook (replace with your actual filename)
!jupyter nbconvert Text_Summarization_tool.ipynb --to python
# Case-sensitive!

This application is used to convert notebook files (*.ipynb)
        to various other formats.


Options
The options below are convenience aliases to configurable class-options,
as listed in the "Equivalent to" description-line of the aliases.
To see all configurable class-options for some <cmd>, use:
    <cmd> --help-all

--debug
    set log level to logging.DEBUG (maximize logging output)
    Equivalent to: [--Application.log_level=10]
--show-config
    Show the application's configuration (human-readable format)
    Equivalent to: [--Application.show_config=True]
--show-config-json
    Show the application's configuration (json format)
    Equivalent to: [--Application.show_config_json=True]
--generate-config
    generate default config file
    Equivalent to: [--JupyterApp.generate_config=True]
-y
    Answer yes to any questions instead of prompting.
    Equivalent to: [--JupyterApp.answer_yes=True]
--execute
    Execute the notebook prior to export.
    Equivalent to: [--ExecutePr