In [2]:
!pip install gradio transformers beautifulsoup4 requests nltk huggingface_hub
!pip install gradio
!pip install gTTs

Collecting gradio
  Downloading gradio-5.23.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [10]:
#exp 3 for audio
#sentiment % with metadata ui
from collections import Counter
import gradio as gr
from transformers import pipeline
import requests
from bs4 import BeautifulSoup
import re
import random
import time
from nltk.corpus import stopwords
import nltk
from huggingface_hub import login
import os

# Download stopwords
nltk.download('stopwords')

# Load the models
summarizer = pipeline("summarization", model="facebook/bart-large-cnn",batch_size=2)
sentiment_analyzer = pipeline("sentiment-analysis", model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")
translator = pipeline("translation_en_to_hi", model="facebook/nllb-200-distilled-600M")

# List of news URLs to scrape
news_urls = [
    "https://www.tokenpost.com/news/business/14511",
    "https://www.cbsnews.com/news/tesla-cybertruck-recall-loose-trim-panel-elon-musk/",
    "https://nypost.com/2025/03/21/media/sean-hannity-rips-jimmy-kimmel-for-tesla-arson-jokes-asks-bob-iger-if-hes-proud/",
    "https://www.cbsnews.com/news/stocks-up-tesla-stock-price-elon-musk/",
    "https://www.cbsnews.com/news/tesla-violence-protest-elon-musk/",
    "https://news.abplive.com/business/tesla-partners-with-tata-group-companies-to-strengthen-ev-supply-chain-in-india-report-1759658",
    "https://www.aljazeera.com/podcasts/2025/3/21/tesla-takedown-how-elon-musks-trump-alliance-is-triggering-backlash",
    "https://abcnews.go.com/Politics/trump-suggests-tesla-vandals-prison-el-salvador/story?id=120019715",
    "https://www.ndtv.com/world-news/will-elon-musk-resign-as-tesla-ceo-longtime-investor-ross-gerber-wants-him-to-7974551",
    "https://www.tokenpost.com/news/business/12924",
    "https://www.tokenpost.com/news/investing/13749",
    "https://abcnews4.com/news/nation-world/jasmine-crockett-demands-musk-be-taken-down-during-telsa-protest-call",
    "https://timesofindia.indiatimes.com/technology/tech-news/tesla-says-smile-youre-on-camera-after-the-addition-of-new-feature-indias-blinkit-responds/articleshow/119256364.cms"
]

# List of user agents
user_agents = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
]

def extract_news_data(url):
    try:
        headers = {"User-Agent": random.choice(user_agents)}
        time.sleep(random.randint(1, 5))
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract title
        title = soup.find('title').get_text(strip=True) if soup.find('title') else 'No Title'

        # Extract article body
        article_body = ' '.join([p.get_text(strip=True) for p in soup.find_all('p')])

        # Generate summary
        final_summary = generate_summary(article_body)

        # Analyze sentiment
        sentiment_result = sentiment_analyzer(final_summary[:512])  # Limit to model's max length
        sentiment = sentiment_result[0]['label'].lower()
        sentiment_score = sentiment_result[0]['score']

        # Translate summary
        hindi_summary = translate_to_hindi(final_summary[:512])

        # Extract metadata
        pub_date = extract_publication_date(soup)
        keywords = extract_keywords(soup)
        source = url.split('/')[2]

        return {
            'title': title,
            'summary': final_summary,
            'hindi_summary': hindi_summary,
            'sentiment': sentiment,
            'sentiment_score': sentiment_score,
            'publication_date': pub_date,
            'keywords': keywords,
            'source': source,
            'url': url
        }
    except Exception as e:
        print(f"Error scraping {url}: {str(e)}")
        return None

def generate_summary(text):
    if len(text.split()) < 50:
        return text
    try:
        summary = summarizer(text, max_length=200, min_length=40, do_sample=False)
        return summary[0]['summary_text']
    except Exception as e:
        print(f"Error summarizing text: {str(e)}")
        return "Error generating summary"

def translate_to_hindi(text):
    try:
        translation = translator(text, src_lang="eng_Latn", tgt_lang="hin_Deva")
        return translation[0]['translation_text']
    except Exception as e:
        print(f"Error translating text: {str(e)}")
        return "Translation failed"

def extract_publication_date(soup):
    date_tags = [
        {'property': 'article:published_time'},
        {'name': 'date'},
        {'itemprop': 'datePublished'},
        {'class': 'date'},
        {'class': 'timestamp'}
    ]

    for tag in date_tags:
        date_element = soup.find('meta', attrs=tag) or soup.find(attrs=tag)
        if date_element:
            if date_element.has_attr('content'):
                return date_element['content']
            return date_element.get_text(strip=True)
    return 'Unknown'

def extract_keywords(soup):
    keyword_meta_tags = [
        {'name': 'keywords'},
        {'name': 'news_keywords'},
        {'property': 'article:tag'},
        {'name': 'parsely-tags'}
    ]

    for meta_tag in keyword_meta_tags:
        keyword_meta = soup.find('meta', attrs=meta_tag)
        if keyword_meta and 'content' in keyword_meta.attrs:
            return [kw.strip() for kw in keyword_meta['content'].split(',') if kw.strip()]

    # Fallback to extracting from text
    title = soup.find('title').get_text(strip=True) if soup.find('title') else ''
    body = ' '.join([p.get_text(strip=True) for p in soup.find_all('p')])
    return extract_keywords_from_text(f"{title} {body}")

def extract_keywords_from_text(text, num_keywords=5):
    stop_words = set(stopwords.words('english'))
    words = re.findall(r'\b\w+\b', text.lower())
    words = [word for word in words if word.isalpha() and word not in stop_words]
    word_freq = Counter(words)
    return [word for word, _ in word_freq.most_common(num_keywords)]

def analyze_sentiment_distribution(articles):
    sentiment_counts = Counter()
    for article in articles:
        sentiment_counts[article['sentiment']] += 1

    total = len(articles)
    if total == 0:
        return "No articles to analyze"

    report = "\nSentiment Analysis Report:\n"
    report += f"Total Articles: {total}\n"
    for sentiment, count in sentiment_counts.items():
        percentage = (count / total) * 100
        report += f"{sentiment.capitalize()}: {count} ({percentage:.1f}%)\n"

    return report

def fetch_news_and_sentiment(company_name):
    articles = []
    for url in news_urls:
        if company_name.lower() in url.lower():  # Simple filter for demo
            news_data = extract_news_data(url)
            if news_data:
                articles.append(news_data)

    if not articles:
        return "No relevant articles found for the specified company."

    # Generate output
    output = ""
    for article in articles:
        output += f"\nTitle: {article['title']}\n"
        output += f"Source: {article['source']}\n"
        output += f"Date: {article['publication_date']}\n"
        output += f"Sentiment: {article['sentiment']} (confidence: {article['sentiment_score']:.2f})\n"
        output += f"Summary: {article['summary']}\n"
        output += f"Hindi Summary: {article['hindi_summary']}\n"
        output += f"Keywords: {', '.join(article['keywords']) if article['keywords'] else 'N/A'}\n"
        output += f"URL: {article['url']}\n"
        output += "-" * 80 + "\n"

    # Add sentiment analysis
    output += analyze_sentiment_distribution(articles)

    return output

company_name = ["Tesla"]

# Create Gradio interface with a dropdown
iface = gr.Interface(
    fn=fetch_news_and_sentiment,
    inputs=gr.Dropdown(
        label="Company Name",
        choices=company_name,),
    outputs=gr.Textbox(label="Sentiment Report"),
    title="News Summarization and Text-to-Speech Application",
    description="Select a company name to fetch news articles and generate a sentiment report."
)

# Launch the interface
iface.launch()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
Device set to use cpu
Device set to use cpu
Device set to use cpu


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://69487ae3b18396d10d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




# HuggingFace Space Link

https://huggingface.co/spaces/Srishtipriya/news-summarization-tts-app_2