### Configure the Model

In [212]:
from langchain_groq import ChatGroq
model = ChatGroq(model="llama3-8b-8192")
output = model.invoke("Hi, how are you")
print(output)

content="I'm just an AI, I don't have feelings or emotions like humans do. I'm just a computer program designed to understand and respond to natural language, so I don't have good or bad days. I'm here to help you with any questions or tasks you have, though! How can I assist you today?" additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 66, 'prompt_tokens': 15, 'total_tokens': 81, 'completion_time': 0.25083486, 'prompt_time': 0.025447662, 'queue_time': 4.677933832, 'total_time': 0.276282522}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_8b7c3a83f7', 'finish_reason': 'stop', 'logprobs': None} id='run--933dc1d6-e6d3-4169-bf15-4363b2db5261-0' usage_metadata={'input_tokens': 15, 'output_tokens': 66, 'total_tokens': 81}


### Configure the Embeddings

In [213]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
len(embeddings.embed_query("HI"))

384

### Lets take a data embedded and store it in the Vector DB

In [214]:
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate

In [215]:
## Data Loader
loader=DirectoryLoader("../data",glob="./*.txt", loader_cls=TextLoader)

In [216]:
docs = loader.load()
docs

[Document(metadata={'source': '..\\data\\indian_economy.txt'}, page_content="The economy of India is a developing mixed economy with a notable public sector in strategic sectors.[52] It is the world's fourth-largest economy by nominal GDP and the third-largest by purchasing power parity (PPP); on a per capita income basis, India ranked 136th by GDP (nominal) and 119th by GDP (PPP).[53] From independence in 1947 until 1991, successive governments followed the Soviet model and promoted protectionist economic policies, with extensive Sovietization, state intervention, demand-side economics, natural resources, bureaucrat-driven enterprises and economic regulation. This is characterised as dirigism, in the form of the Licence Raj.[54][55] The end of the Cold War and an acute balance of payments crisis in 1991 led to the adoption of a broad economic liberalisation in India and indicative planning.[56][57] India has about 1,900 public sector companies,[58] with the Indian state having complet

In [217]:
docs[0].page_content

"The economy of India is a developing mixed economy with a notable public sector in strategic sectors.[52] It is the world's fourth-largest economy by nominal GDP and the third-largest by purchasing power parity (PPP); on a per capita income basis, India ranked 136th by GDP (nominal) and 119th by GDP (PPP).[53] From independence in 1947 until 1991, successive governments followed the Soviet model and promoted protectionist economic policies, with extensive Sovietization, state intervention, demand-side economics, natural resources, bureaucrat-driven enterprises and economic regulation. This is characterised as dirigism, in the form of the Licence Raj.[54][55] The end of the Cold War and an acute balance of payments crisis in 1991 led to the adoption of a broad economic liberalisation in India and indicative planning.[56][57] India has about 1,900 public sector companies,[58] with the Indian state having complete control and ownership of railways and highways. The Indian government has 

In [218]:
## Chunking data 
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=50
)


In [219]:
new_docs =text_splitter.split_documents(documents=docs)

In [220]:
new_docs

[Document(metadata={'source': '..\\data\\indian_economy.txt'}, page_content="The economy of India is a developing mixed economy with a notable public sector in strategic sectors.[52] It is the world's fourth-largest economy by nominal GDP and the third-largest by purchasing"),
 Document(metadata={'source': '..\\data\\indian_economy.txt'}, page_content='nominal GDP and the third-largest by purchasing power parity (PPP); on a per capita income basis, India ranked 136th by GDP (nominal) and 119th by GDP (PPP).[53] From independence in 1947 until 1991,'),
 Document(metadata={'source': '..\\data\\indian_economy.txt'}, page_content='(PPP).[53] From independence in 1947 until 1991, successive governments followed the Soviet model and promoted protectionist economic policies, with extensive Sovietization, state intervention,'),
 Document(metadata={'source': '..\\data\\indian_economy.txt'}, page_content='with extensive Sovietization, state intervention, demand-side economics, natural resources,

In [221]:
doc_string = [doc.page_content for doc in new_docs]
doc_string

["The economy of India is a developing mixed economy with a notable public sector in strategic sectors.[52] It is the world's fourth-largest economy by nominal GDP and the third-largest by purchasing",
 'nominal GDP and the third-largest by purchasing power parity (PPP); on a per capita income basis, India ranked 136th by GDP (nominal) and 119th by GDP (PPP).[53] From independence in 1947 until 1991,',
 '(PPP).[53] From independence in 1947 until 1991, successive governments followed the Soviet model and promoted protectionist economic policies, with extensive Sovietization, state intervention,',
 'with extensive Sovietization, state intervention, demand-side economics, natural resources, bureaucrat-driven enterprises and economic regulation. This is characterised as dirigism, in the form of',
 'This is characterised as dirigism, in the form of the Licence Raj.[54][55] The end of the Cold War and an acute balance of payments crisis in 1991 led to the adoption of a broad economic',
 'in

In [222]:
len(doc_string)

27

In [223]:
db=Chroma.from_documents(new_docs, embeddings)

In [224]:
retriever = db.as_retriever(search_kwargs={"k":3})

In [225]:
retriever.invoke("what is industrial growth of India")

[Document(metadata={'source': '..\\data\\indian_economy.txt'}, page_content='in the world.[82][83][84][85][86] Economists say that due to structural economic problems, India is experiencing jobless economic growth.[87]'),
 Document(metadata={'source': '..\\data\\indian_economy.txt'}, page_content='in the world.[82][83][84][85][86] Economists say that due to structural economic problems, India is experiencing jobless economic growth.[87]'),
 Document(metadata={'source': '..\\data\\indian_economy.txt'}, page_content='in the world.[82][83][84][85][86] Economists say that due to structural economic problems, India is experiencing jobless economic growth.[87]')]

### LangGraph workflow

### 1. Creation of pydantic class

In [226]:
from pydantic import BaseModel, Field
from typing import TypedDict, Annotated, Sequence
from langchain_core.messages import BaseMessage
import operator

class TopicSelectionParser(BaseModel):
    Topic: str = Field(description="selected topic")
    Reasoning : str = Field(description="Reasoning behind the topic selection")

In [227]:
from langchain.output_parsers import PydanticOutputParser

In [228]:
parser = PydanticOutputParser(pydantic_object=TopicSelectionParser)

In [229]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"Topic": {"description": "selected topic", "title": "Topic", "type": "string"}, "Reasoning": {"description": "Reasoning behind the topic selection", "title": "Reasoning", "type": "string"}}, "required": ["Topic", "Reasoning"]}\n```'

In [230]:
class AgentState(TypedDict):
    messages:  Annotated[Sequence[BaseMessage], operator.add]

In [231]:

def function_1(state:AgentState):

    question=state["messages"][-1]

    print("Question", question)
    
    template="""
    Your task is to classify the given user into one of the following categories: [India, NOt Related].
    Only respond with the category name and nothing else.
    User query:{question}
    {format_instructions}
    """

    prompt=PromptTemplate(
        template=template,
        input_variables=["question"],
        partial_variables={"format_instructions":parser.get_format_instructions()}
    )

    # model = ChatGroq(model="llama3-8b-8192")

    chain = prompt | model | parser

    response = chain.invoke({"question":question})
    print("Parsed Response", response)

    return {"messages": [response.Topic]}


In [232]:
state={"messages":["what is today's weather"]}

In [233]:
function_1(state)

Question what is today's weather
Parsed Response Topic='Not Related' Reasoning="User query is about general information (weather) and doesn't specifically relate to India."


{'messages': ['Not Related']}

In [234]:
state={"messages":["what is GDP of INDIA"]}
function_1(state)

Question what is GDP of INDIA
Parsed Response Topic='India' Reasoning="User query related to India's economy"


{'messages': ['India']}

In [252]:
def router(state: AgentState):
    print("--> Router -->")

    messages = state.get("messages", [])
    if not messages:
        print("[Router Warning] No messages found! Defaulting to LLM Call.")
        return "LLM Call"

    last_message = messages[-1]
    print("last message:", last_message)

    if "india" in last_message.lower():
        return "RAG Call"
    else:
        return "LLM Call"


In [253]:
## RAG Function

def function_2(state:AgentState):
    print("--> RAG Call -->")

    question = state["message"][0]

    prompt = PromptTemplate(
        template="""
        You are an assistant for the question-answering tasks. Use the following pieces of retrived context to answer. If you dont know 
        the answer, just say that you dont know. Use three sentences maximum and keep the answer concise. \n Question: {question} \nContentxt:
        {context} \nAnswer:""",
        input_variables=['context', 'question']
    )

    rag_chain = (
        {"context": retriver | format_docs, "question": RunnablePassthrough()}
        | prompt
        | model
        | StrOutputParser()
    )
    result = rag_chain.invoke(question)
    return {
        {"messages"}: ['result']
    }

In [254]:
## LLM Function
def function_3(state:AgentState):
    print("--> LLM Call -->")
    question = state["messages"][0]

# Normal LLM Call

    complete_query = "Answer the following question with your knowledge of the real world. Following is the user question: " + question
    response = model.invoke(complete_query)
    return {"messages": [response.content]}

In [255]:
def router(state:AgentState):
    pass

In [256]:
from langgraph.graph import StateGraph, END

In [257]:
workflow = StateGraph(AgentState)

In [258]:
workflow.add_node("Supervisor", function_1)

<langgraph.graph.state.StateGraph at 0x22707aa7100>

In [259]:
workflow.add_node("RAG", function_2)

<langgraph.graph.state.StateGraph at 0x22707aa7100>

In [260]:
workflow.add_node("LLM", function_3)

<langgraph.graph.state.StateGraph at 0x22707aa7100>

In [261]:
workflow.set_entry_point("Supervisor")

<langgraph.graph.state.StateGraph at 0x22707aa7100>

In [262]:
workflow.add_conditional_edges(
    "Supervisor",
    router,
    {
        "RAG Call": "RAG",
        "LLM Call": "LLM"
    }
)

<langgraph.graph.state.StateGraph at 0x22707aa7100>

In [263]:
workflow.add_edge("RAG",END)
workflow.add_edge("LLM", END)

<langgraph.graph.state.StateGraph at 0x22707aa7100>

In [264]:
app= workflow.compile()

In [265]:
state={"messages":["whats gdp of state"]}

In [None]:
app.invoke(state)

Question whats gdp of state
Parsed Response Topic='Not Related' Reasoning='The user query is about GDP of a state, which is not a topic related to India'


KeyError: None