# ***Text Summarization and Analysis Using LLM and Gradio***

In [1]:
!pip install gradio transformers wordcloud

Collecting gradio
  Downloading gradio-4.41.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.112.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m646.3 kB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from gra

In [2]:
import gradio as gr
from transformers import pipeline
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud, STOPWORDS
from collections import Counter
import re
import tempfile
import logging

In [3]:
# Enable logging
logging.basicConfig(level=logging.INFO)

In [4]:
# Load the summarization and sentiment analysis models once at startup
logging.info("Loading models...")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
sentiment_analyzer = pipeline("sentiment-analysis")
logging.info("Models loaded successfully.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [5]:
# Function to clean and preprocess text
def preprocess_text(text):
    logging.info("Preprocessing text...")
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    logging.info("Text preprocessed.")
    return text

In [6]:
# Function to generate word cloud and save it as a file
def generate_wordcloud(text, stopwords=None):
    logging.info("Generating word cloud...")
    if stopwords is None:
        stopwords = set(STOPWORDS)
    wordcloud = WordCloud(width=800, height=400, background_color='white', stopwords=stopwords).generate(text)
    _, temp_file = tempfile.mkstemp(suffix='.png')
    wordcloud.to_file(temp_file)
    logging.info("Word cloud generated.")
    return temp_file

In [7]:
# Function to perform text summarization
def summarize_text(text, max_length=130, min_length=30):
    logging.info("Summarizing text...")
    summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']
    logging.info("Text summarized.")
    return summary

In [14]:
# Function to perform text analysis
def analyze_text(text):
    logging.info("Analyzing text...")
    # Tokenization and counting
    tokens = text.split()
    token_count = len(tokens)

    # Word frequency analysis
    word_freq = Counter(tokens)

    # Generate and save word cloud
    wordcloud_file = generate_wordcloud(' '.join(tokens))

    # Plot word frequency distribution
    freq_dist = list(word_freq.values())
    _, temp_file = tempfile.mkstemp(suffix='.png')
    plt.figure(figsize=(10, 5))
    sns.histplot(freq_dist, bins=10, kde=True , palette='viridus')
    plt.title('Word Frequency Distribution')
    plt.xlabel('Frequency')
    plt.ylabel('Count')
    plt.savefig(temp_file)
    plt.close()
    logging.info("Text analyzed.")
    return wordcloud_file, temp_file, f"Total Tokens: {token_count}\nTop 5 Most Common Words: {word_freq.most_common(5)}"

In [15]:
# Function to perform sentiment analysis
def sentiment_analysis(text):
    logging.info("Analyzing sentiment...")
    sentiment = sentiment_analyzer(text)
    sentiment_label = sentiment[0]['label']
    sentiment_score = sentiment[0]['score']
    logging.info(f"Sentiment analyzed: {sentiment_label} (Score: {sentiment_score:.2f})")
    return f"Sentiment: {sentiment_label} (Score: {sentiment_score:.2f})"


In [16]:
# Gradio interface function
def gradio_interface(text, summary_max_length, summary_min_length):
    try:
        if not text.strip():
            return "Input text cannot be empty.", None, None, None, None, None

        preprocessed_text = preprocess_text(text)

        # Sentiment analysis before summarization
        sentiment_before = sentiment_analysis(preprocessed_text)

        # Summarization
        summary = summarize_text(preprocessed_text, max_length=summary_max_length, min_length=summary_min_length)

        # Analysis after summarization
        wordcloud_file_after, freq_dist_file_after, analysis_after = analyze_text(summary)

        # Sentiment analysis after summarization
        sentiment_after = sentiment_analysis(summary)

        return (summary, analysis_after, wordcloud_file_after, freq_dist_file_after,
                sentiment_before, sentiment_after)

    except Exception as e:
        logging.error(f"An error occurred: {e}")
        return f"An error occurred: {str(e)}", None, None, None, None, None


In [17]:
# Gradio app
iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Textbox(lines=5, label="Input Text"),
        gr.Slider(minimum=50, maximum=300, value=130, label="Summary Maximum Length"),
        gr.Slider(minimum=10, maximum=100, value=30, label="Summary Minimum Length")
    ],
    outputs=[
        gr.Textbox(label="Summary"),
        gr.Textbox(label="Analysis after summarization"),
        gr.Image(label="Wordcloud after summarization"),
        gr.Image(label="Frequency distribution after summarization"),
        gr.Textbox(label="Sentiment before summarization"),
        gr.Textbox(label="Sentiment after summarization")
    ],
    title="Text Summarization and Analysis",
    description="Enter text to generate a summary and analyze word frequencies, sentiment after summarization."
)

iface.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://addac222a296648f38.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [19]:
gr.close_all()

Closing server running on port: 7860
Closing server running on port: 7860
