In [None]:
import os
os.environ["GOOGLE_API_KEY"] = "API_KEY_PLACEHOLDER"

In [286]:
from langchain_google_genai import ChatGoogleGenerativeAI
model=ChatGoogleGenerativeAI(model='gemini-1.5-flash')
output=model.invoke("hi")
print(output.content)

Hi there! How can I help you today?


In [287]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
len(embeddings.embed_query("hi"))

384

In [288]:
import operator
from typing import List
from pydantic import BaseModel , Field
from langchain.prompts import PromptTemplate
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import HumanMessage, AIMessage
from langgraph.graph import StateGraph,END

In [289]:
class TopicSelectionParser(BaseModel):
    Topic:str=Field(description="selected topic")
    Reasoning:str=Field(description='Reasoning behind topic selection')

In [290]:
from langchain.output_parsers import PydanticOutputParser

In [291]:
parser=PydanticOutputParser(pydantic_object=TopicSelectionParser)

In [292]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"Topic": {"description": "selected topic", "title": "Topic", "type": "string"}, "Reasoning": {"description": "Reasoning behind topic selection", "title": "Reasoning", "type": "string"}}, "required": ["Topic", "Reasoning"]}\n```'

In [293]:
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

In [294]:
state={"messages":["hi"]}

In [295]:
state="hi"

In [296]:
def function_1(state:AgentState):
    
    question=state["messages"][-1]
    
    print("Question",question)
    
    template="""
    Your task is to classify the given user query into one of the following categories: [USA,Not Related]. 
    Only respond with the category name and nothing else.

    User query: {question}
    {format_instructions}
    """
    
    prompt= PromptTemplate(
        template=template,
        input_variable=["question"],
        partial_variables={"format_instructions": parser.get_format_instructions()}
    )
    
    
    chain= prompt | model | parser
    
    response = chain.invoke({"question":question})
    
    print("Parsed response:", response)
    
    return {"messages": [response.Topic]}

In [297]:
state={"messages":["what is a today weather?"]}

In [298]:
state={"messages":["what is a GDP of usa??"]}

In [299]:
function_1(state)

Question what is a GDP of usa??
Parsed response: Topic='USA' Reasoning='The query explicitly asks for the GDP of the USA.'


{'messages': ['USA']}

In [300]:
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [301]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("/Users/lakshmin/Langgraph-only/Langgraph/LANGGRAPH/data/usa.txt")
docs = loader.load()


In [302]:
docs=loader.load()

In [303]:
docs

[Document(metadata={'source': '/Users/lakshmin/Langgraph-only/Langgraph/LANGGRAPH/data/usa.txt'}, page_content="üá∫üá∏ Overview of the U.S. Economy\nThe United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology, healthcare, finance, real estate, defense, and agriculture.\n\nU.S. GDP ‚Äì Size, Composition, and Global Share\nAs of 2024, the United States‚Äô nominal GDP is estimated to be around $28 trillion USD, accounting for approximately 25% of the global economy. It ranks #1 in

In [304]:
docs[0].page_content

"üá∫üá∏ Overview of the U.S. Economy\nThe United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology, healthcare, finance, real estate, defense, and agriculture.\n\nU.S. GDP ‚Äì Size, Composition, and Global Share\nAs of 2024, the United States‚Äô nominal GDP is estimated to be around $28 trillion USD, accounting for approximately 25% of the global economy. It ranks #1 in the world by nominal GDP, far ahead of China (which ranks 2nd). The U.S. GDP per capita is also among the high

In [305]:
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=50
)

In [306]:
new_docs=text_splitter.split_documents(documents=docs)

In [307]:
new_docs

[Document(metadata={'source': '/Users/lakshmin/Langgraph-only/Langgraph/LANGGRAPH/data/usa.txt'}, page_content='üá∫üá∏ Overview of the U.S. Economy'),
 Document(metadata={'source': '/Users/lakshmin/Langgraph-only/Langgraph/LANGGRAPH/data/usa.txt'}, page_content='The United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy,'),
 Document(metadata={'source': '/Users/lakshmin/Langgraph-only/Langgraph/LANGGRAPH/data/usa.txt'}, page_content='It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a'),
 Document(metadata={'source': '/Users/lakshmin/Langgraph-only/Langgraph/LANGGRAPH/data/usa.txt'}, page_content='a population of over 335 million people and a high level of technological advancement, the U.S. economy thriv

In [308]:
doc_string=[doc.page_content for doc in new_docs]

In [309]:
doc_string

['üá∫üá∏ Overview of the U.S. Economy',
 'The United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy,',
 'It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a',
 'a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services.',
 'innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology, healthcare, finance, real estate, defense, and agriculture.',
 'U.S. GDP ‚Äì Size, Composition, and Global Share',
 'As of 2024, the United States‚Äô nominal GDP is estimated to be around $28 trillion USD, accounting for appr

In [310]:
db=Chroma.from_documents(new_docs,embeddings)

In [311]:
retriever=db.as_retriever(search_kwargs={"k": 3})

In [312]:
retriever.invoke("industrial growth of usa?")

[Document(metadata={'source': '/Users/lakshmin/Langgraph-only/Langgraph/LANGGRAPH/data/usa.txt'}, page_content='Looking forward, the U.S. economy is expected to grow at a moderate pace, powered by innovation in AI, green energy, robotics, biotech, and quantum computing. The Biden administration‚Äôs Inflation'),
 Document(metadata={'source': '/Users/lakshmin/Langgraph-only/Langgraph/LANGGRAPH/data/usa.txt'}, page_content='Looking forward, the U.S. economy is expected to grow at a moderate pace, powered by innovation in AI, green energy, robotics, biotech, and quantum computing. The Biden administration‚Äôs Inflation'),
 Document(metadata={'source': '/Users/lakshmin/Langgraph-only/Langgraph/LANGGRAPH/data/usa.txt'}, page_content='üá∫üá∏ Overview of the U.S. Economy')]

In [313]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [314]:
class TopicSelectionParser(BaseModel):
    Topic:str=Field(description="selected topic")
    Reasoning:str=Field(description='Reasoning behind topic selection')

In [315]:
def function_2(state:AgentState):
    print("-> RAG Call ->")
    
    question = state["messages"][0]
    
    prompt=PromptTemplate(
        template="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:""",
        
        input_variables=['context', 'question']
    )
    
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | model
        | StrOutputParser()
    )
    result = rag_chain.invoke(question)
    return  {"messages": [result]}

In [316]:
# LLM Function
def function_3(state:AgentState):
    print("-> LLM Call ->")
    question = state["messages"][0]
    
    # Normal LLM call
    complete_query = "Anwer the follow question with you knowledge of the real world. Following is the user question: " + question
    response = model.invoke(complete_query)
    return {"messages": [response.content]}

In [317]:
pip install ddgs --upgrade

  pid, fd = os.forkpty()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [318]:
from langgraph.graph import StateGraph,END

In [319]:
workflow=StateGraph(AgentState)

In [320]:
workflow.add_node("Supervisor",function_1)

<langgraph.graph.state.StateGraph at 0x31815c410>

In [321]:
workflow.add_node("RAG",function_2)

<langgraph.graph.state.StateGraph at 0x31815c410>

In [322]:
workflow.add_node("LLM",function_3)

<langgraph.graph.state.StateGraph at 0x31815c410>

In [323]:
workflow.add_node("Webcrawler",function_web_crawler)

<langgraph.graph.state.StateGraph at 0x31815c410>

In [324]:
workflow.set_entry_point("Supervisor")

<langgraph.graph.state.StateGraph at 0x31815c410>

In [325]:
workflow.add_conditional_edges(
    "Supervisor",
    router,
    {
        "RAG Call": "RAG",
        "LLM Call": "LLM",
        "Webcrawler Call": "Webcrawler",
    }
)

<langgraph.graph.state.StateGraph at 0x31815c410>

In [326]:
workflow.add_edge("RAG",END)
workflow.add_edge("LLM",END)
workflow.add_edge("Webcrawler",END)

<langgraph.graph.state.StateGraph at 0x31815c410>

In [327]:
app=workflow.compile()

In [328]:
state={"messages":["hi"]}

In [329]:
app.invoke(state)

Question hi
Parsed response: Topic='Not Related' Reasoning='The query "hi" is a generic greeting and does not relate to the USA.'
Original message: 'Not Related'
Normalized message: 'not related'
Checking for 'news' in message: False
Checking for 'latest' in message: False
Checking for 'real-time' in message: False
Checking for 'update' in message: False
Checking for 'current' in message: False
Router returns: LLM Call
-> LLM Call ->


{'messages': ['hi', 'Not Related', 'Hi there!']}

In [330]:
state={"messages":["what is a latest gdp of usa?"]}

In [331]:
app.invoke(state)

Question what is a latest gdp of usa?
Parsed response: Topic='USA' Reasoning='The query explicitly asks for the latest GDP of the USA.'
Original message: 'USA'
Normalized message: 'usa'
Checking for 'news' in message: False
Checking for 'latest' in message: False
Checking for 'real-time' in message: False
Checking for 'update' in message: False
Checking for 'current' in message: False
Router returns: RAG Call
-> RAG Call ->


{'messages': ['what is a latest gdp of usa?',
  'USA',
  'I do not know the latest GDP of the USA.  The provided text only gives the title of a section about the US economy, not the actual GDP figure.']}

In [332]:
state = {
    "messages": ["Can you tell me the 'real-time' industrial growth of USA?"]
}


In [333]:
state={"messages":["Question Can you tell me the 'real-time' industrial growth of USA?"]}

In [334]:
result=app.invoke(state)

Question Question Can you tell me the 'real-time' industrial growth of USA?
Parsed response: Topic='USA' Reasoning='The query explicitly asks about the industrial growth of the USA.'
Original message: 'USA'
Normalized message: 'usa'
Checking for 'news' in message: False
Checking for 'latest' in message: False
Checking for 'real-time' in message: False
Checking for 'update' in message: False
Checking for 'current' in message: False
Router returns: RAG Call
-> RAG Call ->


In [335]:
result["messages"][-1]

'The provided text forecasts moderate growth in the U.S. economy, driven by innovation in several sectors.  It does not offer real-time industrial growth data.  Therefore, I cannot answer your question.'

In [336]:
import re

# Normalization function
def normalize(text):
    text = text.lower()
    # Replace all dash-like characters with normal hyphen
    text = re.sub(r"[‚Äê‚Äë‚Äí‚Äì‚Äî‚àí]", "-", text)
    # Replace curly quotes with straight ones
    text = re.sub(r"[‚Äò‚Äô`¬¥]", "'", text)
    text = re.sub(r"[‚Äú‚Äù]", '"', text)
    return text

# Simulated state object
state = {
    "messages": ["Question Can you tell me the 'real‚Äëtime' industrial growth of USA?"]
}
print("Full state:", repr(state))
# Router
def router(state):
    last_message = state["messages"][-1]
    print("Original message:", repr(last_message))

    message_normalized = normalize(last_message)
    print("Normalized message:", repr(message_normalized))

    # Keyword check
    keywords = ["news", "latest", "real-time", "update", "current"]
    for word in keywords:
        print(f"Checking for '{word}' in message: {word in message_normalized}")

    if any(word in message_normalized for word in keywords):
        print("Router returns: WebCrawler Call")
        return "WebCrawler Call"
    elif "usa" in message_normalized:
        print("Router returns: RAG Call")
        return "RAG Call"
    else:
        print("Router returns: LLM Call")
        return "LLM Call"

# Test it
router(state)


Full state: {'messages': ["Question Can you tell me the 'real‚Äëtime' industrial growth of USA?"]}
Original message: "Question Can you tell me the 'real‚Äëtime' industrial growth of USA?"
Normalized message: "question can you tell me the 'real-time' industrial growth of usa?"
Checking for 'news' in message: False
Checking for 'latest' in message: False
Checking for 'real-time' in message: True
Checking for 'update' in message: False
Checking for 'current' in message: False
Router returns: WebCrawler Call


'WebCrawler Call'

In [337]:
user_input = "Can you tell me the 'real-time' industrial growth of USA?"


In [338]:
parsed_topic = "usa"  # Imagine this comes from your NLP parser


In [339]:
state["messages"] = [user_input]  # ‚Üê good, preserves real query
state["topic"] = parsed_topic     # ‚Üê optionally add parsed info


In [340]:
def invoke(state):
    print("Inside invoke, state is:", state)
    result = router(state)
    print("Router result inside invoke:", result)
    return result


In [341]:
result = invoke(state)


Inside invoke, state is: {'messages': ["Can you tell me the 'real-time' industrial growth of USA?"], 'topic': 'usa'}
Original message: "Can you tell me the 'real-time' industrial growth of USA?"
Normalized message: "can you tell me the 'real-time' industrial growth of usa?"
Checking for 'news' in message: False
Checking for 'latest' in message: False
Checking for 'real-time' in message: True
Checking for 'update' in message: False
Checking for 'current' in message: False
Router returns: WebCrawler Call
Router result inside invoke: WebCrawler Call


In [342]:
def web_crawler_handler(state):
    # Simulate web crawler response
    return "üì° [Web] Real-time industrial growth of USA is currently increasing due to a rebound in manufacturing output."

def rag_handler(state):
    # Simulate RAG response
    return "üìò [RAG] Based on retrieved documents, the USA's industrial growth shows a steady upward trend."

def llm_handler(state):
    # Simulate default LLM generation
    return "üí¨ [LLM] Here's a general overview of industrial growth in the USA."


In [343]:
def invoke(state):
    print("Inside invoke, state is:", state)
    route = router(state)
    print("Router result inside invoke:", route)

    if route == "WebCrawler Call":
        return web_crawler_handler(state)
    elif route == "RAG Call":
        return rag_handler(state)
    else:
        return llm_handler(state)


In [344]:
user_input = "Question Can you tell me the 'real-time' industrial growth of USA?"
state = {"messages": [user_input]}

result = invoke(state)
print("Final result:\n", result)

Inside invoke, state is: {'messages': ["Question Can you tell me the 'real-time' industrial growth of USA?"]}
Original message: "Question Can you tell me the 'real-time' industrial growth of USA?"
Normalized message: "question can you tell me the 'real-time' industrial growth of usa?"
Checking for 'news' in message: False
Checking for 'latest' in message: False
Checking for 'real-time' in message: True
Checking for 'update' in message: False
Checking for 'current' in message: False
Router returns: WebCrawler Call
Router result inside invoke: WebCrawler Call
Final result:
 üì° [Web] Real-time industrial growth of USA is currently increasing due to a rebound in manufacturing output.
