In [2]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings

import operator
from typing import List
from pydantic import BaseModel , Field
from langchain.prompts import PromptTemplate
from typing import TypedDict, Annotated, Sequence ,Optional
from langchain_core.messages import BaseMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import HumanMessage, AIMessage
from langgraph.graph import StateGraph,END

### Config the model from Google

In [3]:
from langchain_google_genai import ChatGoogleGenerativeAI

load_dotenv()

model=ChatGoogleGenerativeAI(model='gemini-1.5-flash')
output=model.invoke("hi")
print(output.content)

Hi there! How can I help you today?


### Config the embedding model

In [4]:
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
len(embeddings.embed_query("hi"))

  from .autonotebook import tqdm as notebook_tqdm


384

## lets take a data embedd it and store in VDB

In [5]:
# Example: Load all .txt files from the "data" directory
from langchain_community.document_loaders import DirectoryLoader
loader = DirectoryLoader("../data2", glob="**/*.txt")


In [6]:
docs = loader.load()

libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.


In [7]:
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=50
)

In [8]:
new_docs=text_splitter.split_documents(documents=docs)

In [9]:
doc_string=[doc.page_content for doc in new_docs]

In [10]:
doc_string

['🇺🇸 Overview of the U.S. Economy The United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under',
 'economic force globally. It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over',
 'and fiscal role. With a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade,',
 'of consumer spending, innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology, healthcare, finance, real estate, defense, and',
 'healthcare, finance, real estate, defense, and agriculture.',
 'U.S. GDP – Size, Composition, and Global Share As of 2024, the United States’ nominal GDP is estimated to be around $28 trillion

In [11]:
len(doc_string)

49

In [12]:
db=Chroma.from_documents(new_docs,embeddings)

In [13]:
retriever=db.as_retriever(search_kwargs={"k": 3})

In [14]:
retriever.invoke("industrial growth of usa?")

[Document(metadata={'source': '..\\data2\\usa.txt'}, page_content='Future Outlook (2025–2030) Looking forward, the U.S. economy is expected to grow at a moderate pace, powered by innovation in AI, green energy, robotics, biotech, and quantum computing. The Biden'),
 Document(metadata={'source': '..\\data2\\usa.txt'}, page_content='The U.S. maintains its GDP growth through strong innovation, entrepreneurship, and investment in R&D. With companies like Apple, Google, Amazon, Microsoft, and Tesla leading global markets, the U.S.'),
 Document(metadata={'source': '..\\data2\\usa.txt'}, page_content='Final Summary The U.S. economy remains the engine of global growth, backed by unmatched innovation, financial dominance, and a strong institutional framework. Its $28 trillion GDP and influence over')]

## creation of pydantic class

In [15]:
class TopicSelectionParser(BaseModel):
    Topic:str=Field(description="selected topic")
    Reasoning:str=Field(description='Reasoning behind topic selection')

In [16]:
from langchain.output_parsers import PydanticOutputParser

In [17]:
parser=PydanticOutputParser(pydantic_object=TopicSelectionParser)

In [18]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"Topic": {"description": "selected topic", "title": "Topic", "type": "string"}, "Reasoning": {"description": "Reasoning behind topic selection", "title": "Reasoning", "type": "string"}}, "required": ["Topic", "Reasoning"]}\n```'

In [19]:
'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"Topic": {"description": "selected topic", "title": "Topic", "type": "string"}, "Reasoning": {"description": "Reasoning behind topic selection", "title": "Reasoning", "type": "string"}}, "required": ["Topic", "Reasoning"]}\n```'

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"Topic": {"description": "selected topic", "title": "Topic", "type": "string"}, "Reasoning": {"description": "Reasoning behind topic selection", "title": "Reasoning", "type": "string"}}, "required": ["Topic", "Reasoning"]}\n```'

### this below agentstate is just for the explnation like how state works

In [20]:
Agentstate={}

In [22]:
Agentstate["messages"]=[]

In [23]:
Agentstate

{'messages': []}

In [24]:
Agentstate["messages"].append("hi how are you?")

In [25]:
Agentstate

{'messages': ['hi how are you?']}

In [26]:
Agentstate["messages"].append("what are you doing?")

In [27]:
Agentstate

{'messages': ['hi how are you?', 'what are you doing?']}

In [28]:
Agentstate["messages"].append("i hope everything fine")

In [29]:
Agentstate


{'messages': ['hi how are you?',
  'what are you doing?',
  'i hope everything fine']}

In [30]:
Agentstate["messages"][-1]

'i hope everything fine'

In [31]:
Agentstate["messages"][0]

'hi how are you?'

### this agentstate class you need to inside the stategraph

In [32]:
#class AgentState(TypedDict):
 #   messages: Annotated[Sequence[BaseMessage], operator.add]

class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]
    answer: Optional[str]                        # <- for agents to store answers
    verification_passed: Optional[bool]          # <- for verifier to set pass/fail

In [33]:
state={"messages":["hi"]}

In [37]:
state="hi"

In [44]:
# ── Shared state (extends the repo’s original AgentState) ──────────
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]  # chat history
    answer: Optional[str] = None              # producer agents set this
    verification_passed: Optional[bool] = None  # validator sets pass/fail
    last_route: Optional[str] = None          # track last attempted route
    supervisor_choice: Optional[str] = None   # optional override from supervisor

# ── Router node ────────────────────────────────────────────────────
def router(state: AgentState) -> dict[str, str]:
    """
    Decide which specialised node (llm / rag / web) should handle the request.

    • If `supervisor_choice` is set (after a failed validation) route there.
    • Otherwise use simple keyword heuristics on the latest user message.
    """
    # If the supervisor already told us where to go next, honour that
    if state.get("supervisor_choice"):
        route = state.pop("supervisor_choice")   # consume the override
        state["last_route"] = route
        return {"route": route}

    # Fresh routing based on user’s text
    latest_msg = state["messages"][-1]
    
    print("last_message:", latest_msg)
    text = latest_msg.content.lower() if hasattr(latest_msg, "content") else str(latest_msg).lower()

    if any(k in text for k in ("latest", "current", "breaking", "web", "search")):
        route = "web"
    elif any(k in text for k in ("doc", "pdf", "knowledge", "kb","usa", "rag")):
        route = "rag"
    else:
        route = "llm"

    state["last_route"] = route
    return {"route": route}

In [50]:
state={"messages":["what is a today weather?"]}

In [63]:
state={"messages":["what is a GDP of usa??"]}

In [60]:
state={"messages":["What is the current rate of US dollar ?"]}

In [64]:
router(state)

last_message: what is a GDP of usa??


{'route': 'rag'}

In [1]:
def supervisor(state: AgentState) -> AgentState:
    """
    Entry and control node for the LangGraph.

    - On first pass: simply returns state to be routed normally.
    - On validation failure: decides what node to call next (sets 'supervisor_choice').

    The 'supervisor_choice' is consumed by the router to direct control flow.
    """
    if state.get("verification_passed") is False:
        print("Supervisor: Previous output failed validation.")

        # Track what was last tried
        last = state.get("last_route")

        # Decide next route (rotate between llm, rag, web)
        fallback_order = ["llm", "rag", "web"]
        try:
            next_index = (fallback_order.index(last) + 1) % len(fallback_order)
        except (ValueError, TypeError):
            next_index = 0  # default to first if last_route is missing

        next_route = fallback_order[next_index]

        print(f"Supervisor: Re-routing to {next_route}")
        state["supervisor_choice"] = next_route

    else:
        # First-time call or passed validation
        print("Supervisor: Starting or validation passed, proceeding to router.")
        state["supervisor_choice"] = None  # clear any previous value

    return state


NameError: name 'AgentState' is not defined

In [65]:
class TopicSelectionParser(BaseModel):
    Topic:str=Field(description="selected topic")
    Reasoning:str=Field(description='Reasoning behind topic selection')

In [56]:
state={"messages":["search for todays weather details??"]}

In [58]:
router(state)

last_message: search for todays weather details??


{'route': 'web'}

In [74]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [75]:
# RAG Function
def rag_agent(state:AgentState):
    print("-> RAG Call ->")
    
    question = state["messages"][0]
    
    prompt=PromptTemplate(
        template="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, 
        just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:""",
        
        input_variables=['context', 'question']
    )
    
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | model
        | StrOutputParser()
    )
    result = rag_chain.invoke(question)
    return  {"messages": [result]}

In [76]:
# LLM Function
def llm_agent(state:AgentState):
    print("-> LLM Call ->")
    question = state["messages"][0]
    
    # Normal LLM call
    complete_query = "Answer the follow question with you knowledge of the real world. Following is the user question: " + question
    response = model.invoke(complete_query)
    return {"messages": [response.content]}

In [77]:
# To install: pip install tavily-python
from tavily import TavilyClient
client = TavilyClient("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")
response = client.search(
    query="What is the price of Iphone16 in ",
    search_depth="basic",  # Options: 'basic', 'advanced' (more results, slower)
    include_answer=True,      # Tavily will generate a summarized answer
    include_raw_content=False # If you just want summaries, not full HTML/text
)
print(response)

{'query': 'What is the price of Iphone16 in ', 'follow_up_questions': None, 'answer': 'The iPhone 16 Pro Max starts at $1,199. Trade-in offers can reduce this price significantly. The exact final price depends on trade-in and plan.', 'images': [], 'results': [{'title': 'Which iPhone 16 Model Should You Buy? - WIRED', 'url': 'https://www.wired.com/story/which-apple-iphone-16-models-to-buy/', 'content': 'iPhone 16 Pro Max ($1,199): Like the Plus, the Max (8/10, WIRED Recommends) is identical to the smaller iPhone 16 Pro, except it has a larger 6.9-inch screen and Apple claims it has the best battery life of the bunch. Verizon customers on the Unlimited Ultimate plan can get up to $1,000 off the entire iPhone 16 lineup (iPhone 16e, iPhone 16, iPhone 16 Plus, iPhone 16 Pro, iPhone 16 Pro Max) when trading in any phone in any condition (it has to be a device from Apple, Google, or Samsung).', 'score': 0.72184175, 'raw_content': None}, {'title': 'iPhone 16, iPhone 16 Plus, iPhone 16 Pro, and

In [78]:
# WEb CrawlerFunction

from tavily import TavilyClient

def web_agent(state:AgentState):
    print("-> WEB CRAWLER Call ->")
    question = state["messages"][0]
    #question = "When was mahatma Gandhi born and where?"
    
    os.environ['TAVILY_TOKEN']=os.getenv("TAVILY_API_KEY")
    client = TavilyClient(os.environ['TAVILY_TOKEN'])
    # Normal LLM call
    complete_query = """Search online and return a concise, up-to-date answer to the following question:"""+ question + """
    Make sure to include:
    - A short summary or direct answer
    - At least 2-3 supporting sources (URLs)
    - Mention if there is no recent or relevant information
    "Following is the user question: """
    response = client.search(
    query=complete_query,
    search_depth="basic",  # Options: 'basic', 'advanced' (more results, slower)
    include_answer=True,      # Tavily will generate a summarized answer
    include_raw_content=False # If you just want summaries, not full HTML/text
    )
    #print(response)
    #return {"messages": [response]}
    return {"messages": [response["answer"]]}

In [79]:
state={"messages":["what is a GDP of usa??"]}

In [80]:
state["messages"][0]

'what is a GDP of usa??'

In [90]:
def verify_answer(state: AgentState) -> AgentState:
    """
    Verifies the 'answer' in the state.

    - Sets 'verification_passed' to True if acceptable
    - Sets to False if answer is too short, missing, or fails any custom logic
    """
    answer = state.get("answer", "")

    # 🔍 Define your validation logic here
    if answer and len(answer.strip()) > 20:
        print("✅ Answer passed validation.")
        state["verification_passed"] = True
    else:
        print("❌ Answer failed validation.")
        state["verification_passed"] = False

    return state


In [85]:
from langgraph.graph import StateGraph


In [91]:
workflow = StateGraph(AgentState)

workflow.add_node("supervisor", supervisor)
workflow.add_node("router", router)
workflow.add_node("llm", llm_agent)
workflow.add_node("rag", rag_agent)
workflow.add_node("web", web_agent)
workflow.add_node("validator", verify_answer)

workflow.set_entry_point("supervisor")

workflow.add_edge("supervisor", "router")

workflow.add_conditional_edges(
    "router",
    lambda s: s["route"],
    {"llm": "llm", "rag": "rag", "web": "web"}
)

# after each operational agent call the validator
for node in ("llm", "rag", "web"):
    workflow.add_edge(node, "validator")

workflow.add_conditional_edges(
    "validator",
    lambda s: "END" if s["verification_passed"] else "supervisor",
    {"END": END, "supervisor": "supervisor"}
)


<langgraph.graph.state.StateGraph at 0x2efe54a2990>

In [92]:
app=workflow.compile()

In [93]:
state={"messages":["What is the weather of Delhi today?"]}

In [None]:
state={"messages":["What is the capital of USA?"]}

In [94]:
print(state)

{'messages': ['What is the weather of Delhi today?']}


In [95]:
app.invoke(state)

Supervisor: Starting or validation passed, proceeding to router.
last_message: What is the weather of Delhi today?
-> LLM Call ->
❌ Answer failed validation.
Supervisor: Previous output failed validation.
Supervisor: Re-routing to llm
-> LLM Call ->
❌ Answer failed validation.
Supervisor: Previous output failed validation.
Supervisor: Re-routing to llm
-> LLM Call ->
❌ Answer failed validation.
Supervisor: Previous output failed validation.
Supervisor: Re-routing to llm
-> LLM Call ->
❌ Answer failed validation.
Supervisor: Previous output failed validation.
Supervisor: Re-routing to llm
-> LLM Call ->
❌ Answer failed validation.
Supervisor: Previous output failed validation.
Supervisor: Re-routing to llm
-> LLM Call ->
❌ Answer failed validation.
Supervisor: Previous output failed validation.
Supervisor: Re-routing to llm


GraphRecursionError: Recursion limit of 25 reached without hitting a stop condition. You can increase the limit by setting the `recursion_limit` config key.
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/GRAPH_RECURSION_LIMIT

In [None]:
state={"messages":["what is a gdp of usa?"]}

In [None]:
app.invoke(state)

In [None]:
state={"messages":["can you tell me the industrial growth of world's most powerful economy?"]}

In [None]:
state={"messages":["can you tell me the industrial growth of world's poor economy?"]}

In [None]:
result=app.invoke(state)

In [None]:
result["messages"][-1]

In [None]:
Assignment:4
1. you have to create one supervisor node.
2. create one router function
3. create three more node
3.1 llm call (llm node)
3.2 RAG (rag node)
3.3 web crawler(fetch the info in realtime from internet)
4. created one more node after this for validation for generated output --> explore the validation part how to do that
5. if validation going to be failed in that case again go to supervioser node and then supervisor node will again decide what needs to be call next
6. once the validation will pass then only generate the final output

submission deadline till 9pm friday

submission instruction:
create your github repo and keep all the assisgnments over there(in that github repo)
i will share one googleform in group after completing your assignment you can share the github link through that google form. 

in next class will discuss about the 
1. react agent with multiple tool call
2.  agentic rag