In [1]:
# 1. Install the requirements
!pip install -q transformers spacy vaderSentiment scikit-learn gradio
!python -m spacy download en_core_web_sm

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m58.4 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [2]:
# 2. Import Libraries
from transformers import pipeline
import spacy
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer
import gradio as gr
import re

In [3]:
# 3. Load NLP Models
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
nlp = spacy.load("en_core_web_sm")
analyzer = SentimentIntensityAnalyzer()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu


In [4]:
# 4. Text Cleaning Function
def clean_text(text):
    return re.sub(r'\s+', ' ', text).strip()

In [5]:
# 5. Text Summarization
def generate_summary(text, max_len=130, min_len=30):
    cleaned = clean_text(text)
    summary = summarizer(cleaned, max_length=max_len, min_length=min_len, do_sample=False)
    return summary[0]['summary_text']

In [6]:
# 6. Keyword Extraction
def extract_keywords(text, num_keywords=5):
    vec = TfidfVectorizer(stop_words='english')
    X = vec.fit_transform([text])
    scores = zip(vec.get_feature_names_out(), X.toarray()[0])
    sorted_scores = sorted(scores, key=lambda x: -x[1])
    return [kw for kw, score in sorted_scores[:num_keywords]]

In [7]:
# 7. Sentiment Analysis
def analyze_sentiment(text):
    return analyzer.polarity_scores(text)

In [8]:
# 8. Simple Topic Classification
def classify_topic(text):
    lowered = text.lower()
    if "health" in lowered:
        return "Health"
    elif "finance" in lowered:
        return "Finance"
    elif "sports" in lowered:
        return "Sports"
    elif "education" in lowered:
        return "Education"
    else:
        return "General"

In [9]:
# 9. Summary Compression Score
def summary_score(summary, original):
    return round(len(summary) / len(original), 2)

In [10]:
# 10. Combine Everything with Gradio UI
def analyze_text(text):
    if len(text.strip()) < 50:
        return "Text too short for summarization.", [], {}, "", 0.0
    summary = generate_summary(text)
    keywords = extract_keywords(text)
    sentiment = analyze_sentiment(text)
    topic = classify_topic(text)
    score = summary_score(summary, text)
    return summary, keywords, sentiment, topic, score

In [11]:
# 11. Launch the Gradio Interface

with gr.Blocks() as demo:
    gr.Markdown("# 📄 Text Summarizer")
    gr.Markdown("**Summarize, analyze sentiment, extract keywords, and identify the topic of your text**")

    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(lines=12, label="Enter your text")
            submit_btn = gr.Button("Analyze")

        with gr.Column():
            summary_output = gr.Textbox(label="📝 Summary")
            keyword_output = gr.Textbox(label="🔑 Keywords")
            sentiment_output = gr.Label(label="😊 Sentiment")
            topic_output = gr.Textbox(label="📚 Topic")
            score_output = gr.Number(label="📉 Summary Compression Score")

    submit_btn.click(fn=analyze_text, inputs=input_text, outputs=[
        summary_output, keyword_output, sentiment_output,
        topic_output, score_output
    ])

demo.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ef2a2b23c6954dc36e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


