In [10]:
import os
import json
import re
import requests
from typing import Dict, Any, List, Optional
from typing import TypedDict, Literal
from bs4 import BeautifulSoup

# LangChain imports
from langchain.tools import tool
from langchain.agents import create_agent  # agent builder used in ref
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

# Tavily search (community tool)
from langchain_community.tools.tavily_search import TavilySearchResults

# LangGraph imports (StateGraph-based flow)
from langgraph.graph import StateGraph, END

# Chat interface used in your reference
from langchain_openai import ChatOpenAI
from langchain_core.messages import AIMessage
from langgraph.graph import StateGraph, START, END

In [12]:

os.environ["OPENAI_API_KEY"] = openai_api_key
os.environ["TAVILY_API_KEY"] = tavily_api_key

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")

HEADERS = {"User-Agent": "FinancialAssistant/1.0"}

In [14]:
# Router keywords
WEB_KEYWORDS = [r"\blatest\b", r"\btoday\b", r"\bcurrent\b", r"\brecent\b", r"\bbreaking\b"]
RAG_KEYWORDS = [r"\bpolicy\b", r"\binternal\b", r"\bproduct\b", r"\bmanual\b", r"\bknowledge base\b"]

# LLM factory (ChatOpenAI wrapper used in your snippet)
def get_chat_llm(model: str = "gpt-4o-mini", temperature: float = 0.0) -> ChatOpenAI:
    if not OPENAI_API_KEY:
        raise RuntimeError("OPENAI_API_KEY must be set in environment")
    return ChatOpenAI(model=model, temperature=temperature, api_key=OPENAI_API_KEY)

# Lightweight HTML text extractor
def extract_text(html: str) -> str:
    soup = BeautifulSoup(html, "html.parser")
    # remove script/style
    for s in soup(["script", "style", "noscript"]):
        s.decompose()
    text = soup.get_text(separator="\n")
    lines = [line.strip() for line in text.splitlines()]
    text = "\n".join([l for l in lines if l])
    return text

In [16]:
@tool
def tavily_search(city: str) -> str:
    """
    Use TavilySearchResults to find tourist guide/blog pages for `city`,
    fetch pages, and summarize them with the ChatOpenAI model.
    Returns a plain text aggregation of summaries.
    """
    if not TAVILY_API_KEY:
        return "TAVILY_API_KEY not set; cannot run Tavily search."
    # Build search query
    query = f"Top tourist attractions in {city} travel guide blog"
    # Use the Tavily tool wrapper
    search = TavilySearchResults(k=6, api_key=TAVILY_API_KEY)
    results = search.run(query)
    urls = [r.get("url") for r in results if "url" in r][:5]

    llm = get_chat_llm(model="gpt-4o-mini", temperature=0)

    summaries: List[str] = []
    for url in urls:
        try:
            resp = requests.get(url, headers=HEADERS, timeout=10)
            resp.raise_for_status()
            text = extract_text(resp.text)
            # trim to reasonable length for LLM
            text = text[:12000]

            prompt = [
                {"role": "system", "content": "You are a travel content summarizer. Produce a crisp summary."},
                {"role": "user", "content": f"Summarize the following webpage content for travel insights:\n\n{text}"}
            ]
            response = llm.invoke(prompt)
            summary = getattr(response, "content", str(response))
            summaries.append(f"URL: {url}\nSummary: {summary}\n")
        except Exception as e:
            summaries.append(f"URL: {url}\nError summarizing page: {e}\n")

    return "\n".join(summaries)

In [18]:
@tool
def rag_search(query: str) -> str:
    """
    RAG retrieval tool. Expects a FAISS vectorstore saved at ./faiss_store or built at runtime.
    Returns top-k documents joined as a text blob.
    """
    # Use OpenAIEmbeddings via LangChain if OPENAI_API_KEY present
    if not OPENAI_API_KEY:
        return "OPENAI_API_KEY not set; cannot run RAG retrieval."

    # We'll look for a persisted FAISS index in ./faiss_index or build a small in-memory store if not present.
    # For demo, we will build an ephemeral index from sample docs if no persisted store exists.
    sample_docs = [
        "Internal refund policy: customers can return items within 30 days given proof of purchase.",
        "Product X spec: battery lasts up to 14 days under normal usage and weighs 1.2kg.",
        "Company architecture: we use microservices and event-driven transport for async tasks."
    ]
    try:
        embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
        # If you had persisted store: load it (not implemented here) else build ephemeral
        vectorstore = FAISS.from_texts(sample_docs, embeddings)
        docs_and_scores = vectorstore.similarity_search_with_score(query, k=5)
        lines = []
        for i, (doc, score) in enumerate(docs_and_scores):
            lines.append(f"Doc{i+1} (score={score}): {doc.page_content}")
        return "\n\n".join(lines)
    except Exception as e:
        return f"Error in rag_search: {e}"

In [20]:
def router_node(state: Dict[str, Any]) -> Dict[str, Any]:
    """
    Router node: performs a quick heuristic check, then falls back to LLM.
    Output: {"route": "web_search" | "rag" | "llm"}
    """
    llm = get_chat_llm(model="gpt-4o-mini", temperature=0)

    prompt = [
        {
            "role": "system",
            "content": (
                "Classify the user query into one routing category. "
                "Reply with exactly ONE of: web_search, rag, llm. "
                "No explanation."
            )
        },
        {"role": "user", "content": query},
    ]

    try:
        resp = llm.invoke(prompt)
        output = getattr(resp, "content", "").strip().lower()

        # sanitize LLM output
        if "web_search" in output:
            return {"route": "web_search"}
        if "rag" in output:
            return {"route": "rag"}
        if "llm" in output:
            return {"route": "llm"}

    except Exception:
        pass

    return {"route": "llm"}

In [22]:
def summarizer_node(state: Dict[str, Any]) -> Dict[str, Any]:
    """
    Synthesize final answer from evidence present in the state.
    Expects state to have:
      - city (optional)
      - route
      - evidence (string or list of source strings)
      - weather (optional)
    Returns {"final_report": "<text>"}
    """
    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    city = state.get("city", "the city")
    route = state.get("route", "llm")
    evidence = state.get("evidence", "")
    weather = state.get("weather", "")

    # normalize evidence into single text blob
    if isinstance(evidence, list):
        ev_text = "\n\n".join(evidence)
    else:
        ev_text = str(evidence)

    prompt = [
        {"role": "system", "content": "You are an expert travel report writer. Produce executive-level summaries."},
        {"role": "user",
         "content": f"""
Write an executive blog-style travel report for **{city}** using the evidence and current weather below.

--- CURRENT WEATHER ---
{weather}

--- EVIDENCE (from {route}) ---
{ev_text}

Make sure the final report explicitly includes:
- Current weather condition (sunny, cloudy, rainy, etc.)
- Temperature and how it feels (if available)
- Humidity levels (if available)
- Wind conditions (if available)
- How the weather affects travel experience
- Whether the weather is ideal for visiting right now

Output format:
**Executive Summary**
**Current Weather Conditions**
**Key Insights**
**Best Time to Visit**
**Top Attractions**
**Final Recommendation**
"""}
    ]

    response = llm.invoke(prompt)
    final = getattr(response, "content", str(response))
    return {"final_report": final}

In [24]:
# ------------------------------------------------------------
# 1. FLOW STATE
# ------------------------------------------------------------
class FlowState(TypedDict, total=False):
    city: str
    query: str

    weather: str
    tourist: str
    evidence: str

    route: Literal["web_search", "rag", "llm"]
    final_report: str



# ------------------------------------------------------------
# 2. LLM FACTORY
# ------------------------------------------------------------
def get_llm(model="gpt-4o-mini", temperature=0):
    return ChatOpenAI(model=model, temperature=temperature)



# ------------------------------------------------------------
# 3. ACTUAL AGENT NODE
# ------------------------------------------------------------
def agent_node(state: FlowState) -> FlowState:
    """
    Calls the Agent (weather + Tavily search).
    The Agent must return JSON with keys weather + tourist.
    """

    city = state.get("city")
    if not city:
        raise ValueError("agent_node requires 'city' in FlowState")

    instruction = f"""
    You are a research agent.
    Return EXACT JSON with keys:
    - "weather": a short description of weather for {city}.
    - "tourist": a bullet list of attractions.

    Example:
    {{
        "weather": "...",
        "tourist": "..."
    }}
    """

    llm = get_llm()

    resp = llm.invoke([
        {"role": "system", "content": instruction},
        {"role": "user", "content": f"Research city {city}. Return ONLY JSON."}
    ])

    raw = resp.content

    try:
        data = json.loads(raw)
    except Exception:
        # fallback – extract JSON substring
        try:
            start = raw.index("{")
            end = raw.rindex("}") + 1
            data = json.loads(raw[start:end])
        except:
            data = {"weather": raw, "tourist": "No tourist info extracted."}

    return {
        "weather": data.get("weather", ""),
        "tourist": data.get("tourist", "")
    }

In [26]:
# ------------------------------------------------------------
# 4. ROUTER NODE
# ------------------------------------------------------------
def router_node(state: FlowState) -> FlowState:
    """
    Chooses route based on query.
    Must output FlowState with 'route'.
    """

    query = state.get("query", "")
    if not query:
        return {"route": "llm"}

    llm = get_llm()

    resp = llm.invoke([
        {
            "role": "system",
            "content": (
                "Classify the query into EXACTLY one of: "
                "web_search, rag, llm. "
                "Return ONLY one token."
            )
        },
        {"role": "user", "content": query}
    ])

    route = resp.content.strip().lower()

    allowed = {"web_search", "rag", "llm"}
    if route not in allowed:
        route = "llm"

    return {"route": route}



# ------------------------------------------------------------
# 5. WEB SEARCH NODE (Tavily)
# ------------------------------------------------------------
def web_search_node(state: FlowState) -> FlowState:
    q = state.get("query") or state.get("city") or ""
    # Replace with TavilySearchResults run
    evidence = f"(Mock Tavily search results for: {q})"
    return {"evidence": evidence}



# ------------------------------------------------------------
# 6. RAG NODE (FAISS / docstore)
# ------------------------------------------------------------
def rag_node(state: FlowState) -> FlowState:
    q = state.get("query", "")
    # Replace with real RAG chain
    evidence = f"(Mock RAG retrieval for: {q})"
    return {"evidence": evidence}



# ------------------------------------------------------------
# 7. LLM REASONING NODE
# ------------------------------------------------------------
def llm_node(state: FlowState) -> FlowState:
    q = state.get("query", "")
    llm = get_llm()

    resp = llm.invoke([
        {"role": "system", "content": "Answer concisely."},
        {"role": "user", "content": q}
    ])

    return {"evidence": resp.content}



# ------------------------------------------------------------
# 8. FINAL SUMMARIZER NODE
# ------------------------------------------------------------
def summarizer_node(state: FlowState) -> FlowState:
    weather = state.get("weather", "")
    tourist = state.get("tourist", "")
    evidence = state.get("evidence", "")

    llm = get_llm()

    prompt = f"""
    Produce a final summary:

    WEATHER:
    {weather}

    ATTRACTIONS:
    {tourist}

    SUPPORTING EVIDENCE:
    {evidence}
    """

    resp = llm.invoke([{"role": "user", "content": prompt}])

    return {"final_report": resp.content}




In [28]:
def create_flow():
    graph = StateGraph(FlowState)

    graph.add_node("agent", agent_node)
    graph.add_node("router", router_node)

    graph.add_node("web_search", web_search_node)
    graph.add_node("rag", rag_node)
    graph.add_node("llm_reason", llm_node)

    graph.add_node("summarizer", summarizer_node)

    graph.set_entry_point("agent")

    # agent → router
    graph.add_edge("agent", "router")

    # router → conditional edges
    graph.add_conditional_edges(
        "router",
        lambda s: s["route"],
        {
            "web_search": "web_search",
            "rag": "rag",
            "llm": "llm_reason"
        }
    )

    # all routes → summarizer
    graph.add_edge("web_search", "summarizer")
    graph.add_edge("rag", "summarizer")
    graph.add_edge("llm_reason", "summarizer")

    graph.add_edge("summarizer", END)

    return graph.compile()


In [52]:
if __name__ == "__main__":
    flow = create_flow()

    input_state = {
        "city": "Chennai",
        "query": "What is the best time to visit Chennai and the top attractions?"
    }

    result = flow.invoke(input_state)

    print("\n===== FINAL REPORT =====\n")
    print(result.get("final_report") or result)

NameError: name 'create_financial_flow' is not defined