In [1]:
import os

os.environ["ANONYMIZED_TELEMETRY"] = "false"
os.environ["CHROMA_TELEMETRY"] = "false"

In [2]:
import chromadb
import pandas as pd
import shutil
import requests
import yfinance as yf
import re, json
from tavily import TavilyClient
from pathlib import Path
# from typing import Optional, List
# from pydantic import BaseModel
# from typing import List, Optional
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from typing import Any, Dict, List, Optional, Literal
from pydantic import BaseModel, Field
from langgraph.graph import StateGraph, END

from dotenv import load_dotenv


In [None]:
def find_repo_root(start: Path | None = None) -> Path:
    start = start or Path.cwd()
    for p in [start] + list(start.parents):
        if (p / ".git").exists() or (p / "requirements.txt").exists():
            return p
    raise RuntimeError("Repo root not found")

REPO_ROOT = find_repo_root()

def resolve_from_repo(path_str: str) -> Path:
    p = Path(path_str)
    if p.is_absolute():
        return p
    return (REPO_ROOT / p).resolve()

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
FINNHUB_API_KEY = os.getenv("FINNHUB_API_KEY")

FUNDAMENTALS_CSV = resolve_from_repo(os.getenv("FUNDAMENTALS_CSV", "data/raw/fundamentals_2024.csv"))
CHROMA_PERSIST_DIR = resolve_from_repo(os.getenv("CHROMA_PERSIST_DIR", "data/indices/chroma"))
SYMBOL_CACHE_PATH = resolve_from_repo(os.getenv("SYMBOL_CACHE_PATH", "data/cache/yahoo_symbol_map.json"))

# Ensure dirs exist
CHROMA_PERSIST_DIR.mkdir(parents=True, exist_ok=True)
SYMBOL_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)

OPENAI_EMBED_MODEL = "text-embedding-3-large"
OPENAI_CHAT_MODEL = "gpt-4o-mini"
SNAPSHOT_DATE = "2024"

assert OPENAI_API_KEY
assert TAVILY_API_KEY
assert FINNHUB_API_KEY
assert FUNDAMENTALS_CSV
assert CHROMA_PERSIST_DIR

print("REPO_ROOT:", REPO_ROOT)
print("FUNDAMENTALS_CSV:", FUNDAMENTALS_CSV)
print("CHROMA_PERSIST_DIR:", CHROMA_PERSIST_DIR)
print("SYMBOL_CACHE_PATH:", SYMBOL_CACHE_PATH)

REPO_ROOT: /Users/pranavr/Desktop/Pranav/Miscellaneous/RAG
FUNDAMENTALS_CSV: /Users/pranavr/Desktop/Pranav/Miscellaneous/RAG/data/raw/fundamentals_2024.csv
CHROMA_PERSIST_DIR: /Users/pranavr/Desktop/Pranav/Miscellaneous/RAG/data/indices/chroma
SYMBOL_CACHE_PATH: /Users/pranavr/Desktop/Pranav/Miscellaneous/RAG/data/cache/yahoo_symbol_map.json


In [None]:
# # from dotenv import load_dotenv

# load_dotenv()

# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
# FINNHUB_API_KEY = os.getenv("FINNHUB_API_KEY")

# FUNDAMENTALS_CSV = os.getenv("FUNDAMENTALS_CSV")
# CHROMA_PERSIST_DIR = os.getenv("CHROMA_PERSIST_DIR")

# OPENAI_EMBED_MODEL = "text-embedding-3-large"
# OPENAI_CHAT_MODEL = "gpt-4o-mini"
# SNAPSHOT_DATE = "2024"

# assert OPENAI_API_KEY
# assert TAVILY_API_KEY
# assert FINNHUB_API_KEY
# assert FUNDAMENTALS_CSV
# assert CHROMA_PERSIST_DIR

# print("CSV:", FUNDAMENTALS_CSV)
# print("Chroma:", CHROMA_PERSIST_DIR)


In [5]:
# import pandas as pd

df = pd.read_csv(FUNDAMENTALS_CSV)
df.head()


Unnamed: 0.1,Unnamed: 0,Name,Country,Sales,Profit,Assets,Market Value
0,0,JPMorganChase,United States,$252.9 B,$50 B,"$4,090.7 B",$588.1 B
1,1,Berkshire Hathaway,United States,$369 B,$73.4 B,"$1,070 B",$899.1 B
2,2,Saudi Arabian Oil Company (Saudi Aramco),Saudi Arabia,$489.1 B,$116.9 B,$661.5 B,"$1,919.3 B"
3,3,ICBC,China,$223.8 B,$50.4 B,"$6,586 B",$215.2 B
4,4,Bank of America,United States,$183.3 B,$25 B,"$3,273.8 B",$307.3 B


In [7]:
def row_to_company_card(row):
    text = "\n".join([
        f"Company: {row['Name']}",
        f"Country: {row['Country']}",
        "",
        "Financial snapshot (2024):",
        f"Sales: {row['Sales']}",
        f"Profit: {row['Profit']}",
        f"Assets: {row['Assets']}",
        f"Market Value: {row['Market Value']}",
    ])

    metadata = {
        "company": row["Name"],
        "country": row["Country"],
        "snapshot_date": SNAPSHOT_DATE,
        "data_type": "fundamentals",
    }
    
    return text, metadata


In [8]:

def build_chroma_from_company_cards(
    texts,
    metadatas,
    chroma_dir,
    collection_name="company_cards",
    rebuild=False,
    embed_model="text-embedding-3-large",
    openai_api_key=None,
):
    if not openai_api_key:
        raise ValueError("OPENAI_API_KEY is missing")

    p = Path(chroma_dir)

    if rebuild and p.exists():
        shutil.rmtree(p)

    p.mkdir(parents=True, exist_ok=True)

    emb = OpenAIEmbeddings(api_key=openai_api_key, model=embed_model)

    vs = Chroma(
        collection_name=collection_name,
        persist_directory=str(p),
        embedding_function=emb,
    )

    if texts:
        ids = [f"company::{m.get('company','').strip()}" for m in metadatas]
        vs.add_texts(texts=texts, metadatas=metadatas, ids=ids)

    vs.persist()
    
    return vs


In [9]:
docs = [row_to_company_card(r) for _, r in df.iterrows()]
texts = [t for t, _ in docs]
metadatas = [m for _, m in docs]

print("Cards:", len(texts))
print("Sample metadata:", metadatas[0])

Cards: 2001
Sample metadata: {'company': 'JPMorganChase', 'country': 'United States', 'snapshot_date': '2024', 'data_type': 'fundamentals'}


In [11]:
vs_index = build_chroma_from_company_cards(
    texts=texts,
    metadatas=metadatas,
    chroma_dir=CHROMA_PERSIST_DIR,
    collection_name="company_cards",
    rebuild=True,
    embed_model=OPENAI_EMBED_MODEL,
    openai_api_key=OPENAI_API_KEY,
)

print("Count after indexing:", vs_index._collection.count())

  vs = Chroma(


Count after indexing: 2001


  vs.persist()


In [12]:
_VECTORSTORE = None

def get_vectorstore():
    global _VECTORSTORE
    

    if _VECTORSTORE is None:
        _VECTORSTORE = build_chroma_from_company_cards(
            texts=[],          
            metadatas=[],
            chroma_dir=CHROMA_PERSIST_DIR,
            collection_name="company_cards",
            rebuild=False,
            embed_model="text-embedding-3-large",
            openai_api_key=OPENAI_API_KEY,
        )

    return _VECTORSTORE


In [13]:
vs = get_vectorstore()

In [None]:


def retrieve_companies(query, k=6):
    vs = get_vectorstore()

    results = vs.similarity_search(
        query,
        k=k,
        filter={
            "$and": [
                {"snapshot_date": SNAPSHOT_DATE},
                {"data_type": "fundamentals"},
            ]
        },
    )

    return [
        {
            "company": r.metadata.get("company"),
            "country": r.metadata.get("country"),
            "card": r.page_content,
        }
        for r in results
    ]


In [15]:
retrieve_companies("most valuable profitable companies")


[{'company': 'ExxonMobil',
  'country': 'United States',
  'card': 'Company: ExxonMobil\nCountry: United States\n\nFinancial snapshot (2024):\nSales: $331.9 B\nProfit: $32.8 B\nAssets: $377.9 B\nMarket Value: $536.7 B'},
 {'company': 'Meta Platforms',
  'country': 'United States',
  'card': 'Company: Meta Platforms\nCountry: United States\n\nFinancial snapshot (2024):\nSales: $142.7 B\nProfit: $45.8 B\nAssets: $222.8 B\nMarket Value: $1,197 B'},
 {'company': 'Berkshire Hathaway',
  'country': 'United States',
  'card': 'Company: Berkshire Hathaway\nCountry: United States\n\nFinancial snapshot (2024):\nSales: $369 B\nProfit: $73.4 B\nAssets: $1,070 B\nMarket Value: $899.1 B'},
 {'company': 'Apple',
  'country': 'United States',
  'card': 'Company: Apple\nCountry: United States\n\nFinancial snapshot (2024):\nSales: $381.6 B\nProfit: $100.4 B\nAssets: $337.4 B\nMarket Value: $2,911.5 B'},
 {'company': 'Microsoft',
  'country': 'United States',
  'card': 'Company: Microsoft\nCountry: Unite

In [16]:
retrieve_companies("most valuable profitable companies",k=10)


[{'company': 'ExxonMobil',
  'country': 'United States',
  'card': 'Company: ExxonMobil\nCountry: United States\n\nFinancial snapshot (2024):\nSales: $331.9 B\nProfit: $32.8 B\nAssets: $377.9 B\nMarket Value: $536.7 B'},
 {'company': 'Meta Platforms',
  'country': 'United States',
  'card': 'Company: Meta Platforms\nCountry: United States\n\nFinancial snapshot (2024):\nSales: $142.7 B\nProfit: $45.8 B\nAssets: $222.8 B\nMarket Value: $1,197 B'},
 {'company': 'Berkshire Hathaway',
  'country': 'United States',
  'card': 'Company: Berkshire Hathaway\nCountry: United States\n\nFinancial snapshot (2024):\nSales: $369 B\nProfit: $73.4 B\nAssets: $1,070 B\nMarket Value: $899.1 B'},
 {'company': 'Apple',
  'country': 'United States',
  'card': 'Company: Apple\nCountry: United States\n\nFinancial snapshot (2024):\nSales: $381.6 B\nProfit: $100.4 B\nAssets: $337.4 B\nMarket Value: $2,911.5 B'},
 {'company': 'Microsoft',
  'country': 'United States',
  'card': 'Company: Microsoft\nCountry: Unite

In [17]:
retrieve_companies("least valuable companies",k=10)

[{'company': 'Top Frontier Investment Holdings',
  'country': 'Philippines',
  'card': 'Company: Top Frontier Investment Holdings\nCountry: Philippines\n\nFinancial snapshot (2024):\nSales: $26 B\nProfit: $-47.9 M\nAssets: $47.1 B\nMarket Value: $586 M'},
 {'company': 'Cencora',
  'country': 'United States',
  'card': 'Company: Cencora\nCountry: United States\n\nFinancial snapshot (2024):\nSales: $276.5 B\nProfit: $1.9 B\nAssets: $63.9 B\nMarket Value: $44.3 B'},
 {'company': 'Veralto',
  'country': 'United States',
  'card': 'Company: Veralto\nCountry: United States\n\nFinancial snapshot (2024):\nSales: $5 B\nProfit: $798 M\nAssets: $5.7 B\nMarket Value: $24.3 B'},
 {'company': 'Valero Energy',
  'country': 'United States',
  'card': 'Company: Valero Energy\nCountry: United States\n\nFinancial snapshot (2024):\nSales: $140.1 B\nProfit: $7 B\nAssets: $62.6 B\nMarket Value: $54.3 B'},
 {'company': 'China Fortune Land Development',
  'country': 'China',
  'card': 'Company: China Fortune 

In [None]:

if SYMBOL_CACHE_PATH.exists():
    try:
        _YAHOO_SYMBOL_CACHE = json.loads(SYMBOL_CACHE_PATH.read_text())
    except Exception:
        _YAHOO_SYMBOL_CACHE = {}
else:
    _YAHOO_SYMBOL_CACHE = {}



def _save_symbol_cache():
    SYMBOL_CACHE_PATH.write_text(json.dumps(_YAHOO_SYMBOL_CACHE, indent=2))

def is_probably_valid_symbol(sym: Optional[str]) -> bool:
    if not sym:
        return False
    s = sym.strip()
    if len(s) < 2:
        return False
    if " " in s:
        return False
    if s == "B.ST":
        return False
    return True

def normalize_for_yahoo(symbol: str) -> str:
    # Handles common class share mismatch:
    # BRK.A -> BRK-A, BF.B -> BF-B, etc.    
    s = symbol.strip()
    if re.match(r"^[A-Z]{1,6}\.[A-Z]$", s):
        return s.replace(".", "-")
    return s

def yahoo_candidates(symbol: str) -> List[str]:
    s = symbol.strip()
    cands = [s, normalize_for_yahoo(s)]
    out = []
    for x in cands:
        if x and x not in out:
            out.append(x)
    return out

def _yfinance_last_close(yahoo_symbol: str, period: str = "5d") -> Optional[float]:
    try:
        hist = yf.Ticker(yahoo_symbol).history(period=period, auto_adjust=False)
        if hist is None or hist.empty or "Close" not in hist:
            return None
        ser = hist["Close"].dropna()
        if ser.empty:
            return None
        return float(ser.iloc[-1])
    except Exception:
        return None

def resolve_yahoo_symbol(symbol: str) -> str:
    # Returns the best yahoo compatible symbol for yfinance calls
    # Uses cache so we do not keep rediscovering formats
    s = symbol.strip()

    cached = _YAHOO_SYMBOL_CACHE.get(s)
    if cached:
        return cached

    for cand in yahoo_candidates(s):
        if _yfinance_last_close(cand) is not None:
            _YAHOO_SYMBOL_CACHE[s] = cand
            _save_symbol_cache()
            return cand

    # If nothing works, store the original to avoid repeated work
    _YAHOO_SYMBOL_CACHE[s] = s
    _save_symbol_cache()

    return s

def get_recent_close_safe(symbol: str, period: str = "5d") -> Optional[float]:
    if not is_probably_valid_symbol(symbol):
        return None
    yahoo_sym = resolve_yahoo_symbol(symbol)
    return _yfinance_last_close(yahoo_sym, period=period)



In [19]:
# from tavily import TavilyClient

tavily = TavilyClient(api_key=TAVILY_API_KEY)

def search_news(query, max_results=5):
    res = tavily.search(query=query, max_results=max_results, include_raw_content=False)
    items = []
    for r in res.get("results", []) or []:
        items.append(
            {
                "title": r.get("title"),
                "url": r.get("url"),
                "content": r.get("content"),
            }
        )
    return items


In [None]:


llm = ChatOpenAI(
    api_key=OPENAI_API_KEY,
    model=OPENAI_CHAT_MODEL,
    temperature=0.2,
)

SYSTEM = """
You are a cautious investor assistant.
You have a 2024 fundamentals snapshot for top companies.
You may also have live quotes and recent news.
Always be explicit about data freshness.
Return JSON only.
""".strip()



In [None]:
print("Hello World")

In [21]:
# from pydantic import BaseModel
# from typing import List, Optional

class CompanyOut(BaseModel):
    company: str
    ticker: Optional[str] = None
    why_relevant: str

class KeyNumbersOut(BaseModel):
    company: str
    sales: Optional[str] = None
    profit: Optional[str] = None
    assets: Optional[str] = None
    market_value: Optional[str] = None

class AnswerOut(BaseModel):
    summary: str
    companies: List[CompanyOut]
    bull_points: List[str]
    bear_points: List[str]
    key_numbers: List[KeyNumbersOut]
    data_notes: List[str]


llm_struct = llm.with_structured_output(AnswerOut)


def generate_answer(question, retrieved, resolved, quotes, profiles, news, market_errors):

    cards_text = "\n\n".join([x["card"] for x in retrieved])
    market_errors = market_errors or []

    prompt = f"""
            User question:
            {question}

            Retrieved company cards (2024 snapshot):
            {cards_text}

            Resolved tickers:
            {json.dumps(resolved, ensure_ascii=False)}

            Live quotes (Finnhub):
            {json.dumps(quotes, ensure_ascii=False)}

            Company profiles (Finnhub):
            {json.dumps(profiles, ensure_ascii=False)}

            Recent news (Tavily):
            {json.dumps(news, ensure_ascii=False)}

            Market data issues (what failed and why):
            {json.dumps(market_errors, ensure_ascii=False)}

            Return JSON with fields:
            summary (string)
            companies (list of objects: company, ticker, why_relevant)
            bull_points (list of strings)
            bear_points (list of strings)
            key_numbers (list of objects: company, sales, profit, assets, market_value)
            data_notes (list of strings)
            """.strip()


    return llm_struct.invoke(
        [{"role": "system", "content": SYSTEM},
         {"role": "user", "content": prompt}]
    ).model_dump()


In [22]:
# import os, json, requests
# from typing import Any, Dict, List, Optional, Literal
# from pydantic import BaseModel, Field
# from langgraph.graph import StateGraph, END

# Assumes you already have these defined from earlier cells:
# retrieve_companies, search_news, get_recent_close
# FINNHUB_API_KEY, FINNHUB_BASE
# generate_answer (structured output version) that accepts:
# generate_answer(question, retrieved, resolved, quotes, profiles, news, market_errors)

FINNHUB_BASE = "https://finnhub.io/api/v1"

# ---------- Safe Finnhub helpers ----------

def finnhub_get_safe(path: str, params: dict) -> dict:
    params = dict(params)
    params["token"] = FINNHUB_API_KEY
    try:
        r = requests.get(f"{FINNHUB_BASE}/{path}", params=params, timeout=20)
        r.raise_for_status()
        return {"ok": True, "status": r.status_code, "data": r.json(), "error": None}
    except requests.HTTPError as e:
        status = getattr(e.response, "status_code", None)
        return {"ok": False, "status": status, "data": {}, "error": str(e)}
    except Exception as e:
        return {"ok": False, "status": None, "data": {}, "error": str(e)}


def resolve_ticker_safe(company: str) -> Optional[str]:
    out = finnhub_get_safe("search", {"q": company})
    if not out["ok"]:
        return None
    results = out["data"].get("result", [])
    if not results:
        return None
    sym = results[0].get("symbol")
    return sym

def get_quote_safe(symbol: str) -> dict:
    if not symbol:
        return {"ok": False, "status": None, "data": {}, "error": "missing_symbol"}
    return finnhub_get_safe("quote", {"symbol": symbol})


def looks_problem_symbol(symbol: str) -> bool:
    # Simple heuristic: spaces often cause trouble, and many non US formats vary
    if not symbol:
        return True
    if " " in symbol:
        return True
    return False



In [23]:
# ----------------------------
# One unified "market fetch" per ticker
# ----------------------------

def fetch_market_for_ticker(ticker: str) -> Dict[str, Any]:
    """
    Returns:
      {
        "ticker": <original>,
        "yahoo_symbol_used": <symbol used for yfinance>,
        "yfinance_last_close": <float|None>,
        "finnhub_quote": <dict|None>,
        "source_used": "finnhub"|"yfinance"|None,
        "errors": [ ... ]
      }
    """
    out: Dict[str, Any] = {
        "ticker": ticker,
        "yahoo_symbol_used": None,
        "yfinance_last_close": None,
        "finnhub_quote": None,
        "source_used": None,
        "errors": [],
    }

    if not is_probably_valid_symbol(ticker):
        out["errors"].append({"stage": "validate", "reason": "invalid_symbol"})
        return out

    # yfinance (with normalization + cache)
    yahoo_sym = resolve_yahoo_symbol(ticker)
    out["yahoo_symbol_used"] = yahoo_sym
    out["yfinance_last_close"] = _yfinance_last_close(yahoo_sym)

    if out["yfinance_last_close"] is not None:
        out["source_used"] = "yfinance"

    # finnhub quote (best effort)
    if not looks_problem_symbol(ticker):
        q = get_quote_safe(ticker)
        if q["ok"]:
            out["finnhub_quote"] = q["data"]
            out["source_used"] = out["source_used"] or "finnhub"
        else:
            out["errors"].append({
                "stage": "finnhub_quote",
                "status": q.get("status"),
                "reason": q.get("error"),
            })
    else:
        out["errors"].append({"stage": "finnhub_quote", "reason": "skipped_problem_symbol"})

    if out["source_used"] is None:
        out["errors"].append({"stage": "market", "reason": "no_market_data_available"})

    return out

In [None]:
# ---------- Agent state ----------

NextStep = Literal["retrieve", "market", "news", "final"]

class AgentState(BaseModel):
    question: str
    next_step: Optional[NextStep] = None

    retrieved: List[Dict[str, Any]] = Field(default_factory=list)
    resolved: List[Dict[str, Any]] = Field(default_factory=list)

    quotes: List[Dict[str, Any]] = Field(default_factory=list)
    market_errors: List[Dict[str, Any]] = Field(default_factory=list)

    news: List[Dict[str, Any]] = Field(default_factory=list)

    answer: Optional[Dict[str, Any]] = None

# ---------- Planner node (basic agentic) ----------

def node_plan(state: AgentState) -> AgentState:
    q = state.question.lower()

    wants_recent = any(w in q for w in ["recent", "today", "now", "happening", "latest", "news", "headline"])
    wants_price = any(w in q for w in ["price", "stock", "quote", "market", "trading"])

    # Always retrieve first if we have nothing to ground on
    if not state.retrieved:
        return state.model_copy(update={"next_step": "retrieve"})

    # If the user asks for recent context, prefer news early
    if wants_recent and not state.news:
        return state.model_copy(update={"next_step": "news"})

    # Market step is optional. Do it if user asks for price type info, or if question suggests it
    # Also skip market if we already tried and got many failures
    if (wants_price or wants_recent) and (not state.quotes) and (len(state.market_errors) < 3):
        return state.model_copy(update={"next_step": "market"})

    return state.model_copy(update={"next_step": "final"})

# ---------- Tool nodes ----------

def node_retrieve(state: AgentState) -> AgentState:
    retrieved = retrieve_companies(state.question, k=6)
    return state.model_copy(update={"retrieved": retrieved})

def node_news(state: AgentState) -> AgentState:
    tickers = [x.get("ticker") for x in state.resolved if x.get("ticker")]
    q = state.question + (" " + " ".join(tickers[:3]) if tickers else "")
    news = search_news(q)
    return state.model_copy(update={"news": news})




def node_market(state: AgentState) -> AgentState:
    resolved: List[Dict[str, Any]] = []
    quotes: List[Dict[str, Any]] = []
    market_errors: List[Dict[str, Any]] = []

    for r in state.retrieved:
        company = r.get("company")
        country = r.get("country")

        ticker = resolve_ticker_safe(company)

        resolved.append({
            "company": company,
            "country": country,
            "ticker": ticker
        })

        # If we couldn't resolve a ticker, record and continue
        if not ticker:
            market_errors.append({
                "company": company,
                "ticker": None,
                "stage": "resolve",
                "reason": "no_ticker",
            })
            continue

        # Fetch market data safely (yfinance + finnhub best effort)
        m = fetch_market_for_ticker(ticker)

        # Build a compact quote object for the LLM
        quote_obj: Dict[str, Any] = {
            "symbol": ticker,
            "yfinance_symbol_used": m.get("yahoo_symbol_used"),
            "yfinance_last_close": m.get("yfinance_last_close"),
            "finnhub_quote": m.get("finnhub_quote"),
            "source_used": m.get("source_used"),
        }
        quotes.append(quote_obj)

        # Push any market errors into the state so the final answer can explain limitations
        for e in m.get("errors", []):
            market_errors.append({
                "company": company,
                "ticker": ticker,
                **e,
            })

    return state.model_copy(update={
        "resolved": resolved,
        "quotes": quotes,
        "market_errors": market_errors,
    })



def node_final(state: AgentState) -> AgentState:
    answer = generate_answer(
        question=state.question,
        retrieved=state.retrieved,
        resolved=state.resolved,
        quotes=state.quotes,
        profiles=[],
        news=state.news,
        market_errors=state.market_errors,
    )
    return state.model_copy(update={"answer": answer})



In [25]:
# ---------- Router and graph ----------

def route(state: AgentState) -> str:
    return state.next_step or "final"

g = StateGraph(AgentState)
g.add_node("plan", node_plan)
g.add_node("retrieve", node_retrieve)
g.add_node("market", node_market)
g.add_node("news", node_news)
g.add_node("final", node_final)

g.set_entry_point("plan")

g.add_conditional_edges("plan", route, {
    "retrieve": "retrieve",
    "market": "market",
    "news": "news",
    "final": "final",
})

# Loop back to plan after each step
g.add_edge("retrieve", "plan")
g.add_edge("market", "plan")
g.add_edge("news", "plan")

g.add_edge("final", END)

agent_graph = g.compile()

# def run_agent_langgraph(question: str) -> Dict[str, Any]:
    # state = AgentState(question=question)
    # final = agent_graph.invoke(state)
    # return final.answer


In [26]:
# def run_agent_langgraph(question: str):
#     state = AgentState(question=question)
#     final = agent_graph.invoke(state)
#     return final.answer

def run_agent_langgraph(question: str):
    final = agent_graph.invoke(AgentState(question=question))
    return final["answer"]


run_agent_langgraph(
    "Give me an investor style view of the most valuable profitable companies and what is happening with them recently"
)

$BRK.A: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")


{'summary': 'This report provides an overview of some of the most valuable and profitable companies as of 2024, along with their recent market performance and relevant news. The data reflects the financial health and market dynamics of these firms, which are critical for investors looking to make informed decisions.',
 'companies': [{'company': 'Investor AB',
   'ticker': 'INVE B.ST',
   'why_relevant': 'A leading investment company in Sweden with significant assets and profitability.'},
  {'company': 'NASDAQ',
   'ticker': 'NDAQ',
   'why_relevant': 'The operator of the NASDAQ stock exchange, crucial for tech and growth stocks.'},
  {'company': 'East Money Information',
   'ticker': None,
   'why_relevant': 'A prominent financial services company in China, significant for its market presence.'},
  {'company': 'Berkshire Hathaway',
   'ticker': 'BRK.A',
   'why_relevant': 'One of the largest and most successful conglomerates globally, known for its diversified investments.'},
  {'compa

In [27]:
run_agent_langgraph("Is Nvidia overvalued")


{'summary': "Nvidia's valuation appears high compared to its financial metrics, particularly its market value relative to sales and profit. This raises questions about whether it is overvalued, especially in comparison to its peers.",
 'companies': [{'company': 'NVIDIA',
   'ticker': None,
   'why_relevant': 'Leading player in the semiconductor industry with significant market value.'},
  {'company': 'Advanced Micro Devices',
   'ticker': None,
   'why_relevant': 'Competitor in the semiconductor space, providing a benchmark for valuation.'},
  {'company': 'RTX',
   'ticker': None,
   'why_relevant': 'Another major player in the tech sector for comparative analysis.'}],
 'bull_points': ['Strong sales growth of $60.9 billion in 2024.',
  'High profit margin with $29.8 billion in profit.',
  'Leading position in AI and graphics processing markets.'],
 'bear_points': ['Market value of $2,312 billion suggests a high valuation relative to sales and profit.',
  "Comparison with AMD and RTX sh