In [1]:
from langchain_community.tools.yahoo_finance_news import YahooFinanceNewsTool
from langchain_community.tools import DuckDuckGoSearchResults, WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from typing import List, Optional, Dict
from textwrap import dedent
import networkx as nx
import requests, json, time, os

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
LMSTUDIO_URL = "http://localhost:1234/v1"
LLM_NAME = "gemma-3-27b-it"
ENTITY_EXTRACTOR = "qwen/qwen3-next-80b"
EXTRACTION_TEMPERATURE = 0
FETCH_DELAY_SECONDS = 1.5
SQLITE_DB_PATH = "kg_store.db"
ALPHA_VANTAGE_API_KEY = os.getenv('ALPHA_VANTAGE_KEY')

In [3]:
yahoo_news = YahooFinanceNewsTool()
wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
ddgo = DuckDuckGoSearchResults()

In [4]:
class Entity(BaseModel):
    text: str = Field(description="The extracted entity text")
    label: str = Field(description="Entity type (PERSON, ORGANIZATION, STOCK_SYMBOL, etc.)")
    confidence: Optional[float] = Field(description="Confidence score", default=None)


In [5]:
class NERResponse(BaseModel):
    entities: List[Entity] = Field(description="List of extracted named entities")


In [6]:
extraction_model = ChatOpenAI(
    base_url=LMSTUDIO_URL,
    api_key="dummy",
    model_name=ENTITY_EXTRACTOR,
    temperature=0
).with_structured_output(NERResponse)

In [7]:
FINANCIAL_ENTITY_LABELS = {
    "ORGANIZATION", "PERSON", "PRODUCT", "EVENT", "STOCK_SYMBOL", "POLICY", "GOVERNMENT", "COMPANY"
}

In [8]:
def is_relevant_entity(ent: Entity) -> bool:
    """Filter out meaningless or numeric entities."""
    if not ent.text or len(ent.text.strip()) < 2:
        return False
    if ent.text.replace('.', '', 1).isdigit():  # numeric price like 523.2800
        return False
    if ent.label.upper() not in FINANCIAL_ENTITY_LABELS:
        return False
    return True

In [9]:
def fetch_financial_data_alpha_vantage(ticker: str) -> str:
    """Fetch stock data from Alpha Vantage."""
    try:
        url = "https://www.alphavantage.co/query"
        params = {"function": "GLOBAL_QUOTE", "symbol": ticker, "apikey": ALPHA_VANTAGE_API_KEY}
        resp = requests.get(url, params=params, timeout=15)
        data = resp.json()
        quote = data.get("Global Quote") or {}
        if not quote:
            return ""
        return f"AlphaVantage Quote for {ticker}:\n{json.dumps(quote, indent=2)}"
    except Exception as e:
        print(f"[Error] AlphaVantage: {e}")
        return ""

In [10]:
def fetch_yahoo_news(ticker: str) -> str:
    """Fetch recent news correctly using LangChain YahooFinanceNewsTool."""
    try:
        res = yahoo_news.invoke(ticker)
        if isinstance(res, str) and len(res.strip()) > 50 and "No news found" not in res:
            return f"Yahoo Finance News for {ticker}:\n{res}"
    except Exception as e:
        print(f"[Error] YahooNewsTool: {e}")
    return ""

In [11]:
def fetch_related_info(entity_name: str) -> str:
    for src, tool in [("Wikipedia", wiki), ("DuckDuckGo", ddgo)]:
        try:
            res = tool.run(entity_name)
            if isinstance(res, str) and len(res.strip()) > 50:
                return res[:4000]
        except Exception as e:
            print(f"[Error] {src}: {e}")
    return ""

In [12]:
def fetch_entity_info(entity_name: str, entity_label: str) -> str:
    """
    Category-aware fetcher:
    - STOCK_SYMBOL / ORGANIZATION → financial + news
    - PERSON → background
    - POLICY / GOVERNMENT → DuckDuckGo for recent events
    """
    entity_label = entity_label.upper()
    text = ""

    if entity_label in {"STOCK_SYMBOL", "ORGANIZATION", "COMPANY"}:
        text = fetch_financial_data_alpha_vantage(entity_name)
        text += "\n\n" + fetch_yahoo_news(entity_name)

    elif entity_label in {"PERSON"}:
        text = fetch_related_info(entity_name)

    elif entity_label in {"POLICY", "GOVERNMENT", "EVENT"}:
        try:
            print(f"[Policy Search] Searching policy/news context for '{entity_name}'")
            res = ddgo.run(f"{entity_name} government budget OR regulation OR policy 2025 site:reuters.com OR site:bloomberg.com")
            if res:
                text = res[:4000]
        except Exception as e:
            print(f"Policy search failed: {e}")

    if not text:
        text = fetch_related_info(entity_name)

    return text.strip() or f"No relevant data for {entity_name}"

In [13]:
def add_entities_to_graph(G, seed_entity, seed_label, discovered_entities, source_text_snippet=None):
    if not G.has_node(seed_entity):
        G.add_node(seed_entity, label=seed_label)
    for ent in discovered_entities:
        if not G.has_node(ent.text):
            G.add_node(ent.text, label=ent.label)
        if not G.has_edge(seed_entity, ent.text):
            G.add_edge(seed_entity, ent.text, relation="mentioned_with", snippet=(source_text_snippet or "")[:400])

In [14]:
def expand_graph(seed_ticker: str, depth: int = 3, throttle: float = FETCH_DELAY_SECONDS):
    G = nx.Graph()
    visited = {}
    current_entities = [Entity(text=seed_ticker, label="STOCK_SYMBOL")]

    for layer in range(1, depth + 1):
        print(f"\n=== LAYER {layer} ===")
        next_entities = []

        for ent in current_entities:
            if ent.text in visited:
                continue
            visited[ent.text] = True

            print(f"[L{layer}] Fetching info for '{ent.text}' ({ent.label})")
            info = fetch_entity_info(ent.text, ent.label)
            if not info.strip() or info.startswith("No relevant data"):
                continue

            print(f"[L{layer}] Running NER on '{ent.text}' info...")
            try:
                ner_result = extraction_model.invoke(dedent(f"Extract named entities:\n{info[:8000]}"))
                discovered = [e for e in ner_result.entities if is_relevant_entity(e)]
            except Exception as e:
                print(f"[Error] NER on {ent.text}: {e}")
                discovered = []

            add_entities_to_graph(G, ent.text, ent.label, discovered, info)
            next_entities.extend(d for d in discovered if d.text not in visited)
            time.sleep(throttle)

        current_entities = next_entities

    print(f"\nGraph complete: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges.")
    return G

In [15]:
def generate_graph_report(G):
    lines = [f"Graph Report: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges"]
    for n, d in G.nodes(data=True):
        lines.append(f"- {n} ({d.get('label','')})")
    return "\n".join(lines)

In [16]:
graph = expand_graph("MSFT", depth=2)


=== LAYER 1 ===
[L1] Fetching info for 'MSFT' (STOCK_SYMBOL)
[L1] Running NER on 'MSFT' info...

=== LAYER 2 ===
[L2] Fetching info for 'Microsoft' (COMPANY)
[L2] Fetching info for 'Microsoft Corp' (COMPANY)


HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: MICROSOFT CORP"}}}


[L2] Fetching info for 'NASDAQ:MSFT' (STOCK_SYMBOL)


HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: NASDAQ:MSFT"}}}


[L2] Fetching info for 'Jim Cramer' (PERSON)
[L2] Running NER on 'Jim Cramer' info...
[L2] Fetching info for 'Melius Research' (ORGANIZATION)


HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: MELIUS RESEARCH"}}}


[L2] Fetching info for 'Ben Reitzes' (PERSON)
[L2] Running NER on 'Ben Reitzes' info...

Graph complete: 30 nodes, 31 edges.


In [17]:
print(generate_graph_report(graph))

Graph Report: 30 nodes, 31 edges
- MSFT (STOCK_SYMBOL)
- Microsoft (COMPANY)
- Microsoft Corp (COMPANY)
- NASDAQ:MSFT (STOCK_SYMBOL)
- Jim Cramer (PERSON)
- Melius Research (ORGANIZATION)
- Ben Reitzes (PERSON)
- James Joseph Cramer (PERSON)
- CNBC (ORGANIZATION)
- Harvard College (ORGANIZATION)
- Harvard Law School (ORGANIZATION)
- Goldman Sachs (ORGANIZATION)
- Cramer Berkowitz (ORGANIZATION)
- TheStreet (ORGANIZATION)
- Jon Stewart (PERSON)
- Rick Santelli (PERSON)
- Chicago Board of Trade (ORGANIZATION)
- New York Stock Exchange Building (ORGANIZATION)
- 2008 financial crisis (EVENT)
- 2009 Inauguration Day (EVENT)
- Saul Alinsky (Person)
- University of Delaware (UD) (Organization)
- National Collegiate Athletic Association (NCAA) (Organization)
- Ryan Carty (Person)
- Rich Gannon (Person)
- Joe Flacco (Person)
- Jeff Komlo (Person)
- Pat Devlin (Person)
- Andy Hall (Person)
- Scott Brunner (Person)
