# Financial News Summary Pipeline (Production Ready)

- openai>=1.0.0 client
- DDGS timelimit fixed (no d14)


In [2]:

%pip -q install -U openai ddgs langchain langchain-openai langchain-community

import getpass, re, hashlib
from datetime import datetime, timezone, timedelta
from ddgs import DDGS
from openai import OpenAI as OpenAIClient

api_key = getpass.getpass("Enter your OpenAI API Key: ")
client = OpenAIClient(api_key=api_key)


Note: you may need to restart the kernel to use updated packages.


In [3]:

def _norm_text(x: str) -> str:
    return re.sub(r"\s+", " ", (x or "")).strip()

def _hash_item(item: dict) -> str:
    raw = (item.get("title","") + "|" + item.get("href","") + "|" + item.get("body","")).encode("utf-8", errors="ignore")
    return hashlib.md5(raw).hexdigest()

def obtener_noticias(activo: str, max_items: int = 10, recency_days: int = 14, region: str = "wt-wt"):
    """
    ddgs.news timelimit ONLY supports: 'd' (day), 'w' (week), 'm' (month).
    For arbitrary recency_days (e.g., 14), we query 'm' and then filter by date.
    """
    q = f'{activo} (results OR earnings OR guidance OR dividend OR buyback OR outlook OR rating)'

    # Map recency_days -> ddgs timelimit bucket
    if recency_days <= 1:
        timelimit = "d"
    elif recency_days <= 7:
        timelimit = "w"
    else:
        timelimit = "m"

    results = []
    with DDGS() as ddgs:
        for r in ddgs.news(
            q,
            region=region,
            safesearch="off",
            timelimit=timelimit,
            max_results=max_items * 3
        ):
            results.append(r)

    def _parse_date(s):
        try:
            return datetime.fromisoformat((s or "").replace("Z", "+00:00"))
        except Exception:
            return None

    cutoff = datetime.now(timezone.utc) - timedelta(days=recency_days)

    out, seen = [], set()
    for r in results:
        item = {
            "title": _norm_text(r.get("title")),
            "date": _norm_text(r.get("date")),
            "source": _norm_text(r.get("source")),
            "url": _norm_text(r.get("url") or r.get("href")),
            "snippet": _norm_text(r.get("body") or r.get("snippet")),
        }
        if not item["title"] or not item["url"]:
            continue

        dt = _parse_date(item["date"])
        if dt is not None and dt < cutoff:
            continue

        h = _hash_item({"title": item["title"], "href": item["url"], "body": item["snippet"]})
        if h in seen:
            continue
        seen.add(h)
        out.append(item)

    out.sort(key=lambda x: _parse_date(x["date"]) or datetime(1970,1,1,tzinfo=timezone.utc), reverse=True)
    return out[:max_items]


In [4]:

def _tono_from_reco(recomendacion: str) -> str:
    r = (recomendacion or "").lower().strip()
    if r == "sobreponderar":
        return "constructively positive (optimistic but factual)"
    if r == "infraponderar":
        return "cautiously negative (risk-focused but factual)"
    return "neutral and factual"

def generar_resumen(noticias, recomendacion, activo, max_lines=3):
    tono = _tono_from_reco(recomendacion)

    if not noticias:
        return {
            "summary_block": f"No recent news found for {activo}.",
            "sources": [],
            "coverage": 0,
            "flags": ["NO_NEWS"]
        }

    pack = []
    for i, n in enumerate(noticias, 1):
        pack.append(
            f"[{i}] {n['title']}\nSource: {n['source']} | Date: {n['date']}\nURL: {n['url']}\nSnippet: {n['snippet']}"
        )
    context = "\n\n".join(pack)

    system = (
        "You are a financial news summarizer. "
        "Do not invent facts. Use only the provided snippets. "
        "If something is unclear or missing, say so explicitly."
    )

    user = f"""
Asset: {activo}
Tone: {tono}

TASK:
1) Write a {max_lines}-line factual summary in English matching the tone.
2) Then 3-5 bullets of ONLY verifiable facts from the snippets.
3) Then 'Sources:' with the URLs.

NEWS:
{context}
"""

    resp = client.chat.completions.create(
        model="gpt-4o-mini",
        temperature=0,
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": user},
        ],
    )

    text = resp.choices[0].message.content.strip()
    urls = [n["url"] for n in noticias[:6] if n.get("url")]

    flags = []
    if len(noticias) < 3:
        flags.append("LOW_COVERAGE")

    return {
        "summary_block": text,
        "sources": urls,
        "coverage": len(noticias),
        "flags": flags
    }


In [5]:

# Example usage
activo = "Iberdrola"
recomendacion = "sobreponderar"

noticias = obtener_noticias(activo, recency_days=14)
out = generar_resumen(noticias, recomendacion, activo)

print(out["summary_block"])
print("Flags:", out["flags"])
print("Sources:", out["sources"][:3])


1) Iberdrola continues to demonstrate its commitment to innovation and sustainability through its subsidiary Avangrid, which recently hosted an Energy Innovation Hackathon. Additionally, S&P Global Ratings has reaffirmed Iberdrola's credit rating at “BBB+” with a stable outlook, reflecting the company's strong financial position.

2) 
- Avangrid, a member of the Iberdrola Group, is preparing its utility companies for the winter storm season.
- Avangrid celebrated future leaders in the energy industry at its fourth annual Energy Innovation Hackathon.
- S&P Global Ratings has reaffirmed Iberdrola's credit rating at “BBB+” with a stable outlook.

Sources:
- https://uk.finance.yahoo.com/news/avangrid-utility-companies-ready-winter-162100852.html
- https://www.lelezard.com/en/news-22054849.html
- https://thecorner.eu/companies/sp-global-ratings-reaffirms-iberdrolas-credit-rating-at-bbb-with-stable-outlook/123342/
Flags: []
Sources: ['https://uk.finance.yahoo.com/news/avangrid-utility-compan

In [7]:
# Example usage
activo = "Iberdrola"
recomendacion = "infraponderar"

noticias = obtener_noticias(activo, recency_days=14)
out = generar_resumen(noticias, recomendacion, activo)

print(out["summary_block"])
print("Flags:", out["flags"])
print("Sources:", out["sources"][:3])


1) Iberdrola faces potential risks as market dynamics shift, with increasing competition and regulatory pressures impacting its operational outlook. Investors should remain cautious about the company's future performance amid these challenges.

2) 
- Iberdrola is experiencing heightened competition in the energy sector.
- Regulatory pressures are becoming more pronounced, affecting operational strategies.
- The company's future performance may be influenced by these external factors.

3) Sources: [The Motley Fool](https://www.fool.com/investing/2025/12/10/ge-vernova-stocks-surges-on-guidance-raise-doublin/), [MarketWatch](https://www.marketwatch.com/story/ge-vernova-raises-outlook-doubles-dividend-on-energy-market-momentum-1e107bdf)
Flags: ['LOW_COVERAGE']
Sources: ['https://www.fool.com/investing/2025/12/10/ge-vernova-stocks-surges-on-guidance-raise-doublin/', 'https://www.marketwatch.com/story/ge-vernova-raises-outlook-doubles-dividend-on-energy-market-momentum-1e107bdf']


In [8]:
# Example usage
activo = "Santander"
recomendacion = "Sobreponderar"

noticias = obtener_noticias(activo, recency_days=14)
out = generar_resumen(noticias, recomendacion, activo)

print(out["summary_block"])
print("Flags:", out["flags"])
print("Sources:", out["sources"][:3])

No recent news found for Santander.
Flags: ['NO_NEWS']
Sources: []


In [10]:
# Example usage
activo = "Talgo"
recomendacion = "neutral"

noticias = obtener_noticias(activo, recency_days=14)
out = generar_resumen(noticias, recomendacion, activo)

print(out["summary_block"])
print("Flags:", out["flags"])
print("Sources:", out["sources"][:3])

1) Talgo is set to increase its share capital by €3.187 billion through the issuance of 10.58 million new ordinary shares, following approval from shareholders at an Extraordinary General Meeting. The new shares will be issued without pre-emptive subscription rights.

2) 
- Talgo will increase its share capital by €3.187 billion nominal value.
- The company will issue 10.58 million new ordinary shares.
- The issuance of new shares was approved at an Extraordinary General Meeting.

Sources: 
- https://thecorner.eu/companies/talgo-to-increase-share-capital-by-e3-187-billion-nominal-value-issuing-10-58-million-new-ordinary-shares-without-pre-emptive-subscription-rights/123399/
Flags: []
Sources: ['https://thecorner.eu/companies/talgo-to-increase-share-capital-by-e3-187-billion-nominal-value-issuing-10-58-million-new-ordinary-shares-without-pre-emptive-subscription-rights/123399/', 'https://www.msn.com/en-us/news/other/opinion-young-americans-demand-solutions-to-the-affordability-crisis/ar

In [9]:
# Example usage
activo = "Viscofan"
recomendacion = "infraponderar"

noticias = obtener_noticias(activo, recency_days=14)
out = generar_resumen(noticias, recomendacion, activo)

print(out["summary_block"])
print("Flags:", out["flags"])
print("Sources:", out["sources"][:3])

1) Viscofan faces potential risks as market conditions remain uncertain, impacting its financial outlook. Investors should be cautious given the fluctuating economic environment and competitive pressures. 

2) 
- Viscofan is a company involved in the production of casings for the meat industry.
- The company has been experiencing challenges related to market demand and pricing pressures.
- Recent financial reports indicate a need for strategic adjustments to maintain profitability.

3) Sources: 
- [MarketWatch](https://www.marketwatch.com/press-release/broadcom-inc-announces-fourth-quarter-and-fiscal-year-2025-financial-results-and-quarterly-dividend-b5cb32e6)
- [The Motley Fool](https://www.fool.com/investing/2025/12/10/ge-vernova-stocks-surges-on-guidance-raise-doublin/)
Flags: ['LOW_COVERAGE']
Sources: ['https://www.marketwatch.com/press-release/broadcom-inc-announces-fourth-quarter-and-fiscal-year-2025-financial-results-and-quarterly-dividend-b5cb32e6', 'https://www.fool.com/invest

In [6]:

# Debug helper (optional): confirm timelimit bucket used
for d in [1, 7, 14, 30]:
    if d <= 1:
        tl = "d"
    elif d <= 7:
        tl = "w"
    else:
        tl = "m"
    print(d, "->", tl)


1 -> d
7 -> w
14 -> m
30 -> m
