In [1]:
!pip install gradio duckduckgo-search newspaper3k transformers


Collecting duckduckgo-search
  Downloading duckduckgo_search-8.1.1-py3-none-any.whl.metadata (16 kB)
Collecting newspaper3k
  Downloading newspaper3k-0.2.8-py3-none-any.whl.metadata (11 kB)
Collecting primp>=0.15.0 (from duckduckgo-search)
  Downloading primp-0.15.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting cssselect>=0.9.2 (from newspaper3k)
  Downloading cssselect-1.3.0-py3-none-any.whl.metadata (2.6 kB)
Collecting feedparser>=5.2.1 (from newspaper3k)
  Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)
Collecting tldextract>=2.0.1 (from newspaper3k)
  Downloading tldextract-5.3.0-py3-none-any.whl.metadata (11 kB)
Collecting feedfinder2>=0.0.4 (from newspaper3k)
  Downloading feedfinder2-0.0.4.tar.gz (3.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting jieba3k>=0.35.1 (from newspaper3k)
  Downloading jieba3k-0.35.1.zip (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [

In [2]:
from duckduckgo_search import DDGS

def search_news_links(company_name, max_results=5):
    query = f"{company_name} latest news"
    with DDGS() as ddgs:
        results = [r for r in ddgs.text(query, max_results=max_results)]
        return [r["href"] for r in results if "href" in r]

# Test it (optional):
# print(search_news_links("Apple"))


In [5]:
!pip install newspaper3k lxml_html_clean


Collecting lxml_html_clean
  Downloading lxml_html_clean-0.4.2-py3-none-any.whl.metadata (2.4 kB)
Downloading lxml_html_clean-0.4.2-py3-none-any.whl (14 kB)
Installing collected packages: lxml_html_clean
Successfully installed lxml_html_clean-0.4.2


In [6]:
from newspaper import Article

def extract_news_content(url):
    try:
        article = Article(url)
        article.download()
        article.parse()
        return article.text
    except:
        return ""


In [7]:
from transformers import pipeline

# Load an open-source summarizer
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

def summarize_text(text):
    if len(text) > 1000:
        text = text[:1000]  # Truncate if too long
    summary = summarizer(text, max_length=120, min_length=30, do_sample=False)
    return summary[0]['summary_text']


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

Device set to use cpu


In [8]:
import random

def is_greeting(text):
    greetings = ["hi", "hello", "hey", "how are you", "good morning", "good evening"]
    return any(greet in text.lower() for greet in greetings)

def handle_greeting():
    responses = ["Hey there!", "Hello! 👋", "Hi, how can I help you today?"]
    return random.choice(responses)

def get_news_summary(company_name, style="Formal"):
    links = search_news_links(company_name)
    if not links:
        return "Sorry, I couldn't find any news. Maybe check the spelling?"

    summaries = []
    for url in links:
        content = extract_news_content(url)
        if content:
            summary = summarize_text(content)
            if style == "Casual":
                summary = f"So guess what? Here's something new about {company_name}: {summary}"
            elif style == "Bullet Points":
                summary = "• " + summary.replace(". ", ".\n• ")
            summaries.append(f"{summary}\n[Read more]({url})")

    return "\n\n".join(summaries)


In [9]:
import gradio as gr

def chatbot_interface(user_input, style):
    if is_greeting(user_input):
        return handle_greeting()
    else:
        return get_news_summary(user_input, style)

style_options = ["Formal", "Casual", "Bullet Points"]

gr.Interface(fn=chatbot_interface,
             inputs=[gr.Textbox(label="Ask about any company..."),
                     gr.Radio(style_options, label="Choose your style")],
             outputs="text",
             title="📡 Smart Company News Chatbot 🤖",
             description="Get the latest news about any company with AI summaries!"
             ).launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://4f290665fc308d9953.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


