In [354]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [355]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

## Config the model

In [356]:
from langchain_groq import ChatGroq
model = ChatGroq(model="deepseek-r1-distill-llama-70b")

## Config the embedding model

In [357]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
len(embeddings.embed_query("hi"))

384

## lets take a data embedd it and store in VDB

In [358]:
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [359]:
# To load all txt files from directory using glob function 
#loader=DirectoryLoader("../Data",glob="./*.txt",loader_cls=TextLoader)

In [360]:
#To load single txt file from a directory
loader = TextLoader("../Data/usa.txt")

In [361]:
docs=loader.load()

In [362]:
docs

[Document(metadata={'source': '../Data/usa.txt'}, page_content="🇺🇸 Overview of the U.S. Economy\nThe United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology, healthcare, finance, real estate, defense, and agriculture.\n\nU.S. GDP – Size, Composition, and Global Share\nAs of 2024, the United States’ nominal GDP is estimated to be around $28 trillion USD, accounting for approximately 25% of the global economy. It ranks #1 in the world by nominal GDP, far ahead of China (which ranks

In [363]:
docs[0].page_content

"🇺🇸 Overview of the U.S. Economy\nThe United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology, healthcare, finance, real estate, defense, and agriculture.\n\nU.S. GDP – Size, Composition, and Global Share\nAs of 2024, the United States’ nominal GDP is estimated to be around $28 trillion USD, accounting for approximately 25% of the global economy. It ranks #1 in the world by nominal GDP, far ahead of China (which ranks 2nd). The U.S. GDP per capita is also among the highest, hover

In [364]:
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=50
)

In [365]:
new_docs=text_splitter.split_documents(documents=docs)

In [366]:
new_docs

[Document(metadata={'source': '../Data/usa.txt'}, page_content='🇺🇸 Overview of the U.S. Economy'),
 Document(metadata={'source': '../Data/usa.txt'}, page_content='The United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy,'),
 Document(metadata={'source': '../Data/usa.txt'}, page_content='It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a'),
 Document(metadata={'source': '../Data/usa.txt'}, page_content='a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services.'),
 Document(metadata={'source': '../Data/usa.txt'}, page_content='innovation, global trade, and financial 

In [367]:
doc_string=[doc.page_content for doc in new_docs]

In [368]:
doc_string

['🇺🇸 Overview of the U.S. Economy',
 'The United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy,',
 'It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a',
 'a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services.',
 'innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology, healthcare, finance, real estate, defense, and agriculture.',
 'U.S. GDP – Size, Composition, and Global Share',
 'As of 2024, the United States’ nominal GDP is estimated to be around $28 trillion USD, accounting for approximately 

In [369]:
len(doc_string)

55

In [370]:
db=Chroma.from_documents(new_docs,embeddings)

In [371]:
retriever=db.as_retriever(search_kwargs={"k": 3})

In [372]:
retriever.invoke("industrial growth of usa?")

[Document(metadata={'source': '../Data/usa.txt'}, page_content='Looking forward, the U.S. economy is expected to grow at a moderate pace, powered by innovation in AI, green energy, robotics, biotech, and quantum computing. The Biden administration’s Inflation'),
 Document(metadata={'source': '../Data/usa.txt'}, page_content='Looking forward, the U.S. economy is expected to grow at a moderate pace, powered by innovation in AI, green energy, robotics, biotech, and quantum computing. The Biden administration’s Inflation'),
 Document(metadata={'source': '../Data/usa.txt'}, page_content='Looking forward, the U.S. economy is expected to grow at a moderate pace, powered by innovation in AI, green energy, robotics, biotech, and quantum computing. The Biden administration’s Inflation')]

## creation of pydantic class


In [373]:
import operator
from typing import List
from pydantic import BaseModel , Field
from langchain.prompts import PromptTemplate
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import HumanMessage, AIMessage
from langgraph.graph import StateGraph,END

In [374]:
class TopicSelectionParser(BaseModel):
    Topic:str=Field(description="selected topic")
    Reasoning:str=Field(description="Reasoning behind topic selection")

In [375]:
from langchain.output_parsers import PydanticOutputParser

In [376]:
parser=PydanticOutputParser(pydantic_object=TopicSelectionParser)

In [377]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"Topic": {"description": "selected topic", "title": "Topic", "type": "string"}, "Reasoning": {"description": "Reasoning behind topic selection", "title": "Reasoning", "type": "string"}}, "required": ["Topic", "Reasoning"]}\n```'

## this below agentstate is just for the explnation like how state works

In [378]:
Agentstate={}

In [379]:
Agentstate["messages"]=[]

In [380]:
Agentstate

{'messages': []}

In [381]:
Agentstate["messages"].append("hi how are you?")

In [382]:
Agentstate

{'messages': ['hi how are you?']}

In [383]:
Agentstate["messages"].append("how are you doing?")

In [384]:
Agentstate

{'messages': ['hi how are you?', 'how are you doing?']}

In [385]:
Agentstate["messages"].append("I hope you are doing well")

In [386]:
Agentstate

{'messages': ['hi how are you?',
  'how are you doing?',
  'I hope you are doing well']}

In [387]:
Agentstate["messages"][-1]

'I hope you are doing well'

## this agentstate class you need to inside the stategraph


In [388]:
# It creates a Dict of messages in sequence format
# '''{'messages': ['hi how are you?',
#   how are you doing?',
#   I hope you are doing well']}'''

class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

In [389]:
def function_1(state:AgentState):
    
    question=state["messages"][-1]
    
    print("Question: ",question)
    
    template="""
    Your task is to classify the given user query into one of the following categories: [USA,Not Related]. 
    Respond ONLY in valid JSON with the following keys:
    - Topic: the category name
    - Reasoning: the reasoning behind the topic selection

    User query: {question}
    {format_instructions}
    """
    
    prompt= PromptTemplate(
        template=template,
        input_variable=["question"],
        partial_variables={"format_instructions": parser.get_format_instructions()}
    )
    
    
    chain= prompt | model | parser
    
    response = chain.invoke({"question":question})
    
    print("Parsed response: ", response)
    
    return {"messages: ": [response.Topic]}

In [390]:
state={"messages":["what is a today weather?"]}

In [391]:
state={"messages":["what is a GDP of usa??"]}

In [392]:
function_1(state)

Question:  what is a GDP of usa??
Parsed response:  Topic='USA' Reasoning='The user is asking about the GDP of the USA, which is a specific economic indicator related to the United States.'


{'messages: ': ['USA']}

In [393]:
def router(state:AgentState):
    print("-> ROUTER ->")
    
    last_message=state["messages"][-1]
    print("last_message:", last_message)
    
    if "usa" in last_message.lower():
        return "RAG Call"
    else:
        return "LLM Call"

In [394]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [395]:
# RAG Function
def function_2(state:AgentState):
    print("-> RAG Call ->")
    
    question = state["messages"][0]
    
    prompt=PromptTemplate(
        template="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:""",
        
        input_variables=['context', 'question']
    )
    
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | model
        | StrOutputParser()
    )
    result = rag_chain.invoke(question)
    return  {"messages": [result]}

In [396]:
# LLM Function
def function_3(state:AgentState):
    print("-> LLM Call ->")
    question = state["messages"][0]
    
    # Normal LLM call
    complete_query = "Anwer the follow question with you knowledge of the real world. Following is the user question: " + question
    response = model.invoke(complete_query)
    return {"messages": [response.content]}

In [397]:
from langgraph.graph import StateGraph,END

In [398]:
workflow=StateGraph(AgentState)

In [399]:
workflow.add_node("Supervisor",function_1)

<langgraph.graph.state.StateGraph at 0x1a9e61995b0>

In [400]:
workflow.add_node("RAG",function_2)

<langgraph.graph.state.StateGraph at 0x1a9e61995b0>

In [401]:
workflow.add_node("LLM",function_3)

<langgraph.graph.state.StateGraph at 0x1a9e61995b0>

In [402]:
workflow.set_entry_point("Supervisor")

<langgraph.graph.state.StateGraph at 0x1a9e61995b0>

In [None]:
workflow.add_conditional_edges(
    "Supervisor",
    router,
    {
        "RAG Call": "RAG",
        "LLM Call": "LLM",
    }
)

In [None]:
workflow.add_edge("RAG",END)
workflow.add_edge("LLM",END)

In [421]:
app=workflow.compile()

In [422]:
state={"messages":["what is a gdp of usa?"]}

In [429]:
state={"messages":["can you tell me the industrial growth of world's most powerful economy USA?"]}

In [428]:
state={"messages":["can you tell me the industrial growth of world's poor economy?"]}

In [430]:
result=app.invoke(state)

Question:  can you tell me the industrial growth of world's most powerful economy USA?
Parsed response:  Topic='USA' Reasoning='The user query specifically mentions the USA and its economy.'
-> ROUTER ->
last_message: can you tell me the industrial growth of world's most powerful economy USA?
-> RAG Call ->


In [431]:
result["messages"][-1]

"<think>\nOkay, the user is asking about the industrial growth of the USA, the world's most powerful economy. I need to use the provided context to answer. \n\nLooking at the context, it repeats the same information three times: the US has the largest economy in nominal GDP and operates a capitalist mixed economy. There's no specific data on industrial growth rates or trends over time. \n\nSince the context doesn't provide details on industrial growth, I can't give a precise answer. I should inform the user that I don't have the specific information they're asking for.\n</think>\n\nI don't know the specific details about the industrial growth of the USA."