In [14]:
# 🔧 Install dependencies
!pip install -q streamlit transformers torch keybert sentence-transformers pyngrok


In [15]:
%%writefile summarizer_app.py
import streamlit as st
from transformers import pipeline
import torch
import re
from keybert import KeyBERT
from sentence_transformers import SentenceTransformer

# Page Configuration
st.set_page_config(
    page_title="✨ AI Text Summarizer & Keyword Extractor",
    layout="centered",
    initial_sidebar_state="expanded"
)

# Custom CSS for colorful UI
st.markdown("""
    <style>
        .stApp {
            background-color: #fff8f0;
            font-family: 'Segoe UI', sans-serif;
        }
        .main-title {
            font-size: 2.2rem;
            font-weight: bold;
            color: #cc0066;
            text-align: center;
        }
        .subtitle {
            color: #4f4f4f;
            font-size: 1rem;
            text-align: center;
            margin-bottom: 20px;
        }
        .highlight-box {
            background-color: #ffe6f0;
            padding: 1rem;
            border-radius: 12px;
            box-shadow: 0 0 10px rgba(0,0,0,0.05);
        }
        .stButton button {
            background-color: #ff66b2;
            color: white;
            font-weight: bold;
            border-radius: 8px;
            padding: 0.5rem 1rem;
            transition: 0.3s;
        }
        .stButton button:hover {
            background-color: #cc0066;
            color: #ffffff;
        }
    </style>
""", unsafe_allow_html=True)

# Title and Subtitle
st.markdown('<div class="main-title">📝 AI Text Summarizer</div>', unsafe_allow_html=True)
st.markdown('<div class="subtitle">Summarize long texts & extract key insights effortlessly with just one click!</div>', unsafe_allow_html=True)

# Set device
device = 0 if torch.cuda.is_available() else -1

# Clean input text
def clean_text(text):
    return re.sub(r'\s+', ' ', text.strip())

# Initialize session states
if "summary_text" not in st.session_state:
    st.session_state.summary_text = ""
if "input_text_clean" not in st.session_state:
    st.session_state.input_text_clean = ""

# Model Selection
with st.expander("🔍 Choose a Summarization Model"):
    model_option = st.radio(
        "Select Model:",
        ("T5 (General purpose)", "BART (Balanced & robust)", "PEGASUS (Best for abstractive summarization)")
    )

# Set model name
if model_option == "T5 (General purpose)":
    model_name = "t5-large"
elif model_option == "BART (Balanced & robust)":
    model_name = "facebook/bart-large-cnn"
else:
    model_name = "google/pegasus-xsum"

# Text input area
st.markdown("### 📄 Paste Your Paragraph Below:")
input_text = st.text_area(
    "",
    height=250,
    placeholder="Paste or type your paragraph here..."
)

# Summarize
if st.button("✨ Summarize Text"):
    if not input_text.strip():
        st.warning("⚠️ Please enter a paragraph to summarize.")
    else:
        with st.spinner(f"🔄 Summarizing using {model_option.split()[0]} model..."):
            summarizer = pipeline("summarization", model=model_name, device=device)
            st.session_state.input_text_clean = clean_text(input_text)
            word_count = len(st.session_state.input_text_clean.split())

            if model_name == "google/pegasus-xsum":
                max_len = 60
                min_len = 20
            else:
                max_len = max(30, min(int(word_count * 0.6), 200))
                min_len = max(10, min(int(word_count * 0.4), max_len - 10))

            summary = summarizer(
                st.session_state.input_text_clean,
                max_length=max_len,
                min_length=min_len,
                do_sample=False,
                clean_up_tokenization_spaces=True
            )

            st.session_state.summary_text = summary[0]['summary_text'].strip().capitalize()

# Show Summary
if st.session_state.summary_text:
    st.markdown("### ✅ Summarized Result")
    with st.container():
        st.markdown(
            f"<div class='highlight-box'><b>Original Word Count:</b> {len(st.session_state.input_text_clean.split())}<br><b>Summary Word Count:</b> {len(st.session_state.summary_text.split())}</div>",
            unsafe_allow_html=True
        )
        st.success(st.session_state.summary_text)

    # Keyword Extraction
    if st.button("🔑 Extract Keywords"):
        with st.spinner("🔍 Finding top keywords..."):
            embed_model = SentenceTransformer("all-mpnet-base-v2")
            kw_model = KeyBERT(model=embed_model)
            keywords = kw_model.extract_keywords(
                st.session_state.input_text_clean,
                keyphrase_ngram_range=(1, 2),
                stop_words='english',
                use_mmr=True,
                diversity=0.7,
                top_n=5  # Get only top 5 keywords
            )

        st.markdown("### 🧠 Top 5 Keywords")
        for i, (kw, score) in enumerate(keywords, 1):
            st.markdown(f"**{i}. {kw}** — _Score:_ `{round(score, 2)}`")


Overwriting summarizer_app.py


In [16]:
!ngrok config add-authtoken 2vf2gmW1fBlKA4ILjWtm9PwxjdD_ew3NGY6JbPAx8iP2c7Pi

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
  from pyngrok import ngrok
  import os

  # Kill any previous Streamlit processes
  !pkill streamlit

  # Create the tunnel for port 8501 (default for Streamlit)
  public_url = ngrok.connect(8501)
  print("🚀 Streamlit is live at:", public_url)

  # Run the Streamlit app in the background
  !streamlit run summarizer_app.py &


🚀 Streamlit is live at: NgrokTunnel: "https://a394-34-27-5-152.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.27.5.152:8501[0m
[0m
2025-04-13 08:46:12.382683: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744533972.469560   58371 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744533972.494928   58371 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been regis