In [2]:
from dotenv import load_dotenv
load_dotenv()

import os
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
os.environ['HUGGINGFACE_API_KEY'] = os.getenv('HUGGINGFACE_API_KEY')

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)
model = ChatGroq(model_name="llama-3.3-70b-versatile")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
len(embeddings.embed_query("Hello World!"))

384

In [6]:
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [8]:
loader = DirectoryLoader("../data2",glob="./*.txt",loader_cls=TextLoader)

In [9]:
docs = loader.load()

In [10]:
docs

[Document(metadata={'source': '..\\data2\\usa.txt'}, page_content="ðŸ‡ºðŸ‡¸ Overview of the U.S. Economy\nThe United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology, healthcare, finance, real estate, defense, and agriculture.\n\nU.S. GDP â€“ Size, Composition, and Global Share\nAs of 2024, the United Statesâ€™ nominal GDP is estimated to be around $28 trillion USD, accounting for approximately 25% of the global economy. It ranks #1 in the world by nominal GDP, far ahead of China

In [11]:
docs[0].page_content

"ðŸ‡ºðŸ‡¸ Overview of the U.S. Economy\nThe United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology, healthcare, finance, real estate, defense, and agriculture.\n\nU.S. GDP â€“ Size, Composition, and Global Share\nAs of 2024, the United Statesâ€™ nominal GDP is estimated to be around $28 trillion USD, accounting for approximately 25% of the global economy. It ranks #1 in the world by nominal GDP, far ahead of China (which ranks 2nd). The U.S. GDP per capita is also among the high

In [12]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)

In [14]:
new_docs = text_splitter.split_documents(docs)

In [18]:
doc_string = [doc.page_content for doc in new_docs]

In [19]:
doc_string

['ðŸ‡ºðŸ‡¸ Overview of the U.S. Economy',
 'The United States of America possesses the largest economy in the world in terms of nominal GDP, making it the most powerful economic force globally. It operates under a capitalist mixed economy,',
 'It operates under a capitalist mixed economy, where the private sector dominates, but the government plays a significant regulatory and fiscal role. With a population of over 335 million people and a',
 'a population of over 335 million people and a high level of technological advancement, the U.S. economy thrives on a foundation of consumer spending, innovation, global trade, and financial services.',
 'innovation, global trade, and financial services. It has a highly diversified structure with strong sectors in technology, healthcare, finance, real estate, defense, and agriculture.',
 'U.S. GDP â€“ Size, Composition, and Global Share',
 'As of 2024, the United Statesâ€™ nominal GDP is estimated to be around $28 trillion USD, accounting for appr

In [20]:
len(doc_string)

56

In [21]:
db = Chroma.from_documents(new_docs, embeddings)

In [22]:
retriever = db.as_retriever(search_kwargs={"k":3})

In [23]:
retriever.invoke("industrial growth of usa?")

[Document(metadata={'source': '..\\data2\\usa.txt'}, page_content='The U.S. economy remains the engine of global growth, backed by unmatched innovation, financial dominance, and a strong institutional framework. Its $28 trillion GDP and influence over global'),
 Document(metadata={'source': '..\\data2\\usa.txt'}, page_content='GDP Growth Trends and Dynamics'),
 Document(metadata={'source': '..\\data2\\usa.txt'}, page_content='The U.S. maintains its GDP growth through strong innovation, entrepreneurship, and investment in R&D. With companies like Apple, Google, Amazon, Microsoft, and Tesla leading global markets, the U.S.')]

# creation of pydantic class

In [112]:
from pydantic import BaseModel, Field

class TopicSelectionParser(BaseModel):
    topic: str = Field(description="The topic of the conversation")
    reasoning: str = Field(description="The reasoning behind the topic selection")

In [113]:
from langchain_core.output_parsers import PydanticOutputParser


In [114]:
parser = PydanticOutputParser(pydantic_object=TopicSelectionParser)

In [115]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"topic": {"description": "The topic of the conversation", "title": "Topic", "type": "string"}, "reasoning": {"description": "The reasoning behind the topic selection", "title": "Reasoning", "type": "string"}}, "required": ["topic", "reasoning"]}\n```'

In [116]:
import operator
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

# this agent dict is for the exmaple purpose

In [40]:
AgentState = {}

In [41]:
AgentState['messages'] = []

In [42]:
AgentState

{'messages': []}

In [43]:
AgentState['messages'].append("Hi, how are you?")

In [45]:
AgentState

{'messages': ['Hi, how are you?']}

In [46]:
AgentState['messages'].append("I am fine, thanks!")


In [47]:
AgentState

{'messages': ['Hi, how are you?', 'I am fine, thanks!']}

In [48]:
AgentState['messages'].append("You are a helpful assistant.")

In [49]:
AgentState

{'messages': ['Hi, how are you?',
  'I am fine, thanks!',
  'You are a helpful assistant.']}

In [None]:
AgentState["messages"][0 ]

'Hi, how are you?'

# From here the actual agent state starts

In [141]:
class AgentState(TypedDict):
    """
    This class represents the state of the agent.
    """
    messages: Annotated[Sequence[BaseMessage], operator.add]

# SUPERVISOR

In [142]:
from langchain_core.prompts import PromptTemplate

def function1(state: AgentState):
    """
    This function is the supervisor function.
    It will decide which agent to call next.
    """
    question = state["messages"][-1]
    print("Question: ", question)

    template = """
    Your task is it identif the given user query into one of the following categories: [USA, Not Related].
    Only respond with the category name and nothing else.

    User Query: {question}
    {format_instructions}
    """

    prompt = PromptTemplate(
        template= template,
        input_variables=["question"],
        partial_variables={"format_instructions": parser.get_format_instructions()}
    )

    chain = prompt | model | parser
    response = chain.invoke({"question": question})
    print("Parsed Response: ", response)
    return {"messages": [response.topic]}


In [143]:
state = {"messages":["what is today's weather"]}

In [144]:
function1(state)

Question:  what is today's weather
Parsed Response:  topic='Not Related' reasoning='The user query is about the weather, which does not specifically relate to the USA or any other geographic location.'


{'messages': ['Not Related']}

In [145]:
state = {"messages": ["what's the GDP of USA?"]}

In [146]:
function1(state)


Question:  what's the GDP of USA?
Parsed Response:  topic='USA' reasoning="The user query mentions 'USA' which is a direct reference to the country."


{'messages': ['USA']}

# ROUTER 

In [147]:
def router(state: AgentState):
    """
    This function routes the message to either RAG Call or LLM Call
    """
    print("-----------Router------------")

    last_message = state["messages"][-1]
    print("Last Message: ", last_message)

    if "USA" in last_message:
        return "RAG Call"
    else:
        return "LLM Call"

# RAG CALL

In [161]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [175]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def function2(state: AgentState):
    """
    This function calls the RAG Call to generate a response
    """
    print("-----------RAG Call------------")
    question = state["messages"][0]

    prompt = PromptTemplate(
        template="""
        You are an assistant for question-answering tasks.
        Use the following pieces of retrieved context to answer the question.
        If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
        Context: {context}
        Question: {question}
        """,
        input_variables=["context", "question"],
    )

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | model
        | StrOutputParser()
    )

    result = rag_chain.invoke(question)
    return {"messages": [result]}

# LLM CALL

In [176]:
def function3(state: AgentState):
    """
    This function calls the LLM Call to generate a response
    """
    print("-----------LLM Call------------")
    question = state["messages"][0]

    complete_query = "Answer the follow question wiht your knowledge: " + question
    response =  model.invoke(complete_query)
    return {"messages": [response]}
    

In [177]:
from langgraph.graph import StateGraph, END

In [178]:
workflow = StateGraph(AgentState)

In [179]:
workflow.add_node("supervisor", function1)

<langgraph.graph.state.StateGraph at 0x1bc0523cad0>

In [180]:
workflow.add_node("RAG", function2)

<langgraph.graph.state.StateGraph at 0x1bc0523cad0>

In [181]:
workflow.add_node("LLM", function3)

<langgraph.graph.state.StateGraph at 0x1bc0523cad0>

In [182]:
workflow.set_entry_point("supervisor")

<langgraph.graph.state.StateGraph at 0x1bc0523cad0>

In [183]:
workflow.add_conditional_edges(
    "supervisor",
    router,
    {
        "RAG Call": "RAG",
        "LLM Call": "LLM",
    }
    )  

<langgraph.graph.state.StateGraph at 0x1bc0523cad0>

In [184]:
workflow.add_edge("RAG", END)
workflow.add_edge("LLM", END)

<langgraph.graph.state.StateGraph at 0x1bc0523cad0>

In [185]:
app = workflow.compile()

In [186]:
app.invoke({"messages": ["What is the capital of France?"]})

Question:  What is the capital of France?
Parsed Response:  topic='Not Related' reasoning='The user query is about the capital of France, which is not related to the USA.'
-----------Router------------
Last Message:  Not Related
-----------LLM Call------------


{'messages': ['What is the capital of France?',
  'Not Related',
  AIMessage(content='The capital of France is Paris.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 51, 'total_tokens': 59, 'completion_time': 0.011068619, 'completion_tokens_details': None, 'prompt_time': 0.001681068, 'prompt_tokens_details': None, 'queue_time': 0.053835662, 'total_time': 0.012749687}, 'model_name': 'llama-3.3-70b-versatile', 'system_fingerprint': 'fp_c06d5113ec', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019c154d-db5f-7933-a649-b175ff6d8d9d-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 51, 'output_tokens': 8, 'total_tokens': 59})]}

In [187]:
 app.invoke({"messages": ["What is the GDP of usa?"]})

Question:  What is the GDP of usa?
Parsed Response:  topic='USA' reasoning="The user query mentions 'usa' which is a clear reference to the United States of America."
-----------Router------------
Last Message:  USA
-----------RAG Call------------


{'messages': ['What is the GDP of usa?',
  'USA',
  'The nominal GDP of the United States is estimated to be around $28 trillion USD as of 2024. This accounts for approximately 25% of the global economy. The US ranks #1 in the world by nominal GDP, surpassing China which ranks 2nd.']}