In [4]:
import os
import requests
from datetime import datetime, timedelta
from dotenv import load_dotenv

load_dotenv()

NEWS_API_KEY = os.getenv("NEWS_API_KEY")
BASE_URL = "https://newsapi.org/v2/everything"

def fetch_news(
    query: str,
    days: int = 3,
    language: str = "en",
    page_size: int = 20
):
    """
    Fetch news articles from NewsAPI based on a query.
    """
    if not NEWS_API_KEY:
        raise ValueError("NEWS_API_KEY not found in environment variables")

    from_date = (datetime.utcnow() - timedelta(days=days)).strftime("%Y-%m-%d")

    params = {
        "q": query,
        "from": from_date,
        "sortBy": "relevancy",
        "language": language,
        "pageSize": page_size,
        "apiKey": NEWS_API_KEY
    }

    response = requests.get(BASE_URL, params=params)
    response.raise_for_status()

    data = response.json()
    return data.get("articles", [])

In [5]:
def normalize_articles(articles):
    """
    Keeping the field only relevant for further processing
    """
    normalized = []
    for article in articles:
        content = article.get("content") or article.get("description")
        if not content:
            continue

        normalized.append({
            "title": article.get("title"),
            "source": article.get("source", {}).get("name"),
            "published_at": article.get("publishedAt"),
            "url": article.get("url"),
            "text": content
        })
    return normalized

In [6]:
raw_articles = fetch_news("artificial Intelligence")
raw_articles


[{'source': {'id': None, 'name': 'Yahoo Entertainment'},
  'author': None,
  'title': '1 Unstoppable Artificial Intelligence (AI) Stock to Buy Before It Soars More Than 30% in 2026, According to a Wall Street Analyst',
  'description': None,
  'url': 'https://consent.yahoo.com/v2/collectConsent?sessionId=1_cc-session_c345afd1-235d-4da5-9e7f-6eb8783fad2c',
  'urlToImage': None,
  'publishedAt': '2026-01-19T21:25:00Z',
  'content': "If you click 'Accept all', we and our partners, including 245 who are part of the IAB Transparency &amp; Consent Framework, will also store and / or access information on a device (in other words, us… [+1046 chars]"},
 {'source': {'id': None, 'name': 'BBC News'},
  'author': None,
  'title': "Chris Pratt on new film Mercy: I asked to be locked into an executioner's chair",
  'description': 'The Marvel star plays a detective with 90 minutes to prove to an AI judge he did not murder his wife.',
  'url': 'https://www.bbc.com/news/articles/cy05drxx2nwo',
  'urlTo

In [7]:
len(raw_articles)

20

In [9]:
docs = []
for article in raw_articles:
    docs.append({'Title': article['title'],
                 'Content': article['content'],
                 'Description': article['description'],
                 'Source': article['source']['name'],
                 "Url": article['url']})
    
docs

[{'Title': '1 Unstoppable Artificial Intelligence (AI) Stock to Buy Before It Soars More Than 30% in 2026, According to a Wall Street Analyst',
  'Content': "If you click 'Accept all', we and our partners, including 245 who are part of the IAB Transparency &amp; Consent Framework, will also store and / or access information on a device (in other words, us… [+1046 chars]",
  'Description': None,
  'Source': 'Yahoo Entertainment',
  'Url': 'https://consent.yahoo.com/v2/collectConsent?sessionId=1_cc-session_c345afd1-235d-4da5-9e7f-6eb8783fad2c'},
 {'Title': "Chris Pratt on new film Mercy: I asked to be locked into an executioner's chair",
  'Content': "Helen BushbyCulture reporter\r\nChris Pratt plays a police officer with just 90 minutes to prove to an AI judge he did not murder his wife\r\nBeing locked barefoot in an executioner's chair sounds uncomf… [+5772 chars]",
  'Description': 'The Marvel star plays a detective with 90 minutes to prove to an AI judge he did not murder his wife.',

In [10]:
if __name__ == "__main__":
    topic = "Global Warming and Climate Change"
    raw_articles = fetch_news(topic)
    clean_articles = normalize_articles(raw_articles)

    print(f"Fetched {len(clean_articles)} articles\n")

    for i, article in enumerate(clean_articles[:3], 1):
        print(f"{i}. {article['title']}")
        print(f"   Source: {article['source']}")
        print(f"   URL: {article['url']}\n")

Fetched 19 articles

1. Superintelligence Is Not the Answer to Climate Change
   Source: Project Syndicate
   URL: https://www.project-syndicate.org/commentary/ai-superintelligence-will-not-solve-climate-crisis-once-tipping-points-crossed-by-alex-friedman-1-2026-01

2. A huge iceberg becomes a deadly trap for penguins
   Source: Popular Science
   URL: http://www.popsci.com/environment/iceberg-traps-penguins-antarctica/

3. What Can We Do for the 250 Million and Counting Displaced by the Environmental Crisis?
   Source: Nakedcapitalism.com
   URL: https://www.nakedcapitalism.com/2026/01/what-can-we-do-for-the-250-million-and-counting-displaced-by-the-environmental-crisis.html



In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    separators=["\n\n", "\n", " ", ""]
)

In [12]:
from langchain_core.documents import Document
    
def articles_to_documents(articles, topic):
    docs = []

    for article in articles:
        splits = text_splitter.split_text(article["text"])

        for split in splits:
            docs.append(
                Document(
                    page_content=split,
                    metadata={
                        "title": article["title"],
                        "source": article["source"],
                        "url": article["url"],
                        "topic": topic,
                        "date": article["published_at"][:10]
                    }
                )
            )

    return docs 



In [13]:
topic = "Indian income tax policy"

# Convert articles -> chunked Documents
documents = articles_to_documents(
    articles=clean_articles,   # output from NewsAPI normalization
    topic=topic
)

print(f"Total document chunks created: {len(documents)}")


Total document chunks created: 19


In [15]:
from langchain_community.embeddings import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model="mxbai-embed-large"
)

test_vec = embeddings.embed_query("Income tax changes in India")
len(test_vec)



1024

In [1]:
from langchain_community.vectorstores import Chroma

# Create / load persistent ChromaDB
vectorstore = Chroma.from_documents(
    documents=documents,              # output from articles_to_documents(...)
    embedding=embeddings,              # OllamaEmbeddings (mxbai-embed-large)
    persist_directory="./chroma_db"    # folder on disk
)

# Persist to disk
vectorstore.persist()

print(f"Stored {len(documents)} chunks in ChromaDB")



KeyboardInterrupt



In [79]:
retriever = vectorstore.as_retriever(
    search_type = "similarity",
    search_kwargs={"k": 4}   # top-4 most relevant chunks
)

In [80]:
topic = "Weather Patterns"
intent = "impact on farmers growing onions"

query = f"{topic}. Focus on {intent}."


In [81]:
docs = retriever.invoke(query)

print(f"Retrieved {len(docs)} chunks")


Retrieved 4 chunks


In [82]:
docs

[Document(metadata={'title': 'It’s a Gas', 'date': '2025-12-25', 'url': 'http://www.nybooks.com/articles/2026/01/15/its-a-gas-the-story-of-co2-mckibben/', 'source': 'The New York Review of Books', 'topic': 'Indian income tax policy'}, page_content='I’m writing this in the last days of the northern hemisphere’s autumn in 2025. Over recent weeks we’ve seen a hurricane hit Jamaica with wind speeds a few hundred feet above sea level topping 250 mil… [+17629 chars]'),
 Document(metadata={'source': 'NPR', 'url': 'https://www.npr.org/2025/12/24/nx-s1-5620997/2025-hurricane-season-storm-climate-melissa-category-5', 'topic': 'Indian income tax policy', 'title': "What does climate change look like? This year's hurricane season is one example", 'date': '2025-12-24'}, page_content='The 2025 hurricane season was a study in contrasts.\r\nIn one way, it felt very quiet in the United States. No storms made landfall in the U.S. for the first time since 2015. And, for about three weeks… [+2789 chars]'),

In [83]:
def build_context(docs, max_chars=3500):
    context = ""
    for doc in docs:
        if len(context) + len(doc.page_content) > max_chars:
            break
        context += doc.page_content + "\n\n"
    return context

context = build_context(docs)
print(f"Context length (chars): {len(context)}")

Context length (chars): 867


In [84]:
from ollama import Client

client = Client(host="http://localhost:11434")


In [85]:
def generate_summary(context, topic, intent):
    prompt = f"""
You are a financial news analyst.

Summarize the latest updates related to:
Topic: {topic}
Audience: {intent}

Using ONLY the context below, provide:
1. Key updates (bullet points)
2. Who is affected
3. Why it matters
4. One-line TL;DR

Context:
{context}
"""

    response = client.chat(
        model="llama3.1",
        messages=[{"role": "user", "content": prompt}],
        options={"temperature": 0.2}
    )

    return response["message"]["content"]


In [86]:
summary = generate_summary(
    context=context,
    topic=topic,
    intent=intent
)

print(summary)


**Key Updates:**

* A hurricane hit Jamaica with wind speeds exceeding 250 mph in recent weeks.
* The 2025 hurricane season was relatively quiet in the US, with no storms making landfall for the first time since 2015.
* Climate change is causing unusual weather patterns, including a warm and sunny Christmas across the US.

**Who is Affected:**

* Farmers growing onions in regions affected by the hurricane (e.g. Jamaica) or experiencing unusual weather patterns due to climate change.

**Why it Matters:**

* Unpredictable weather conditions can impact onion yields and quality, affecting farmers' incomes and food supplies.
* Climate change may lead to more frequent and severe weather events, exacerbating these challenges for farmers.

**One-line TL;DR:**
Unusual weather patterns in 2025, including a hurricane in Jamaica and a warm Christmas across the US, pose significant risks to onion farmers due to potential crop damage and yield disruptions.
