In [2]:
#llm
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model='gemini-1.5-flash')

In [3]:
#Embedding
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")

In [4]:
#Take data and embed it and store in vector db
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = DirectoryLoader("../data2",glob="./*.txt",loader_cls=TextLoader)

docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)

new_docs = text_splitter.split_documents(docs)

#doc_string = [doc.page_content for doc in docs]

db = Chroma.from_documents(new_docs, embeddings)

retriever = db.as_retriever(search_kwargs={"k": 3})

In [5]:
#Creation of Pydantic class
from pydantic import BaseModel, Field
class TopicSelection(BaseModel):
    Topic:str=Field(description="selected topic either usa or not usa")
    Reasoning:str=Field(description="Reasoning behind selected topic")

from langchain.output_parsers import PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=TopicSelection)
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"Topic": {"description": "selected topic either usa or not usa", "title": "Topic", "type": "string"}, "Reasoning": {"description": "Reasoning behind selected topic", "title": "Reasoning", "type": "string"}}, "required": ["Topic", "Reasoning"]}\n```'

In [6]:
#Creation of class for work flows
from typing import Annotated, Sequence, TypedDict
from langchain_core.messages import BaseMessage, HumanMessage
import operator
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage],operator.add]

In [7]:
# Create an instance
state: AgentState = {"messages": []}

# Append a proper message object
state["messages"].append(HumanMessage(content="What are you doing?"))
state["messages"].append(HumanMessage(content="WHii"))
state["messages"].append(HumanMessage(content="What is GDP of USA?"))

state

{'messages': [HumanMessage(content='What are you doing?', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='WHii', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='What is GDP of USA?', additional_kwargs={}, response_metadata={})]}

In [13]:
#Craeting functions/nodes for workflows
#Supervisor functin
from langchain_core.prompts import PromptTemplate
def function_1(state:AgentState):
    question = state["messages"][-1]
    template="""
    Your task is to classify the given user query into one of the following categories: [USA,Not Related]. 
    Only respond with the category name and nothing else.

    User query: {question}
    {format_instructions}
    """

    prompt = PromptTemplate(
        template=template,
        input_variables=["question"],
        partial_variables={"format_instructions":parser.get_format_instructions()}
    )
    chain = prompt | model | parser

    response = chain.invoke({"question": question})

    print("Parsed response", response)

    return {"messages" : [response.Topic]}


In [14]:
state ={"messages":["What is today's weather?"]}
function_1(state)

Parsed response Topic='Not Related' Reasoning='The query is a general weather question, not specific to the USA.'


{'messages': ['Not Related']}

In [16]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [17]:
#RAG function
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
def function_2(state:AgentState):
    print("->RAG CALL->")
    question = state["messages"][0]
    prompt = PromptTemplate(
     template = 
     """
     You are an assistant for question-answering tasks. Use the following pieces of retrieved
     context to answer the question. If you don't know the answer, just say that you don't know.
     Use three sentences maximum and keep the answer concise.\n
     Question: {question} \n Context: {context} \n Answer:
     """,
     input_variables=["context","question"]
    )

    rag_chain = (
    {"context":retriever | format_docs, "question":RunnablePassThrough()}
    | prompt
    | model
    | StrOutputParser()
    )
    result = rag_chain.invoke(question)
    return {"messages":[result]}



In [19]:
#LLM function
def function_3(state:AgentState):
    print("->LLM Call->")
    question = state["messages"][0]
    #Normal LLM Call
    complete_query = "Answer the follow question with your knowledge of the real world. Following is the user question:"+question
    response = model.invoke(complete_query)
    return {"messages":[response.content]}

In [20]:
def router(state:AgentState):
    print("->ROUTER->")
    last_message = state["messages"][-1]
    print("last_message:",last_message)
    if "usa" in last_message:
        return "RAG Call"
    else:
        return "LLM Call"


In [23]:
from langgraph.graph import StateGraph, END
workflow = StateGraph(AgentState)
workflow.add_node("Supervisor",function_1)
workflow.add_node("RAG",function_2)
workflow.add_node("LLM",function_3)
workflow.set_entry_point("Supervisor")
workflow.add_conditional_edges("Supervisor",router,{"RAG Call":"RAG","LLM Call":"LLM"})
workflow.add_edge("RAG",END)
workflow.add_edge("LLM",END)

app = workflow.compile()
state = {"messages":["Hi"]}
app.invoke(state)


Parsed response Topic='Not Related' Reasoning="The query 'Hi' is a greeting and does not relate to the USA."
->ROUTER->
last_message: Not Related
->LLM Call->


{'messages': ['Hi', 'Not Related', 'Hi!  How can I help you today?']}

In [24]:
state = {"messages":["Can you tell me industrial growth of world's most powerful economy?"]}
result= app.invoke(state)
result["messages"][-1]

Parsed response Topic='USA' Reasoning="The query asks about the industrial growth of the world's most powerful economy, which is generally considered to be the USA."
->ROUTER->
last_message: USA
->LLM Call->


'The world\'s most powerful economy is the United States.  Its industrial growth is complex and doesn\'t have a single, easily quantifiable number.  Instead, it\'s characterized by several trends:\n\n* **Shifting sectors:**  The US has experienced a significant decline in traditional manufacturing (like steel and autos) relative to its overall economy.  This is a long-term trend, driven by automation, globalization (offshoring of manufacturing), and a shift towards a service-based economy.\n\n* **Growth in specific advanced industries:** While overall manufacturing has shrunk as a percentage of GDP, there\'s been growth in *high-tech* manufacturing sectors. This includes things like pharmaceuticals, aerospace, advanced materials, and computer manufacturing. These sectors often feature higher value-added production and employ skilled labor.\n\n* **Automation and productivity:**  Industrial productivity in the US has generally increased over time, due to technological advancements and au

In [25]:
state = {"messages":["Can you tell me industrial growth of world's most poor economy?"]}
result= app.invoke(state)
result["messages"][-1]

Parsed response Topic='Not Related' Reasoning="The query asks about the industrial growth of the world's poorest economy, which is a global topic not limited to the USA."
->ROUTER->
last_message: Not Related
->LLM Call->


'There\'s no single definitive answer to "the world\'s most poor economy" because different metrics (GDP per capita, poverty rates, Human Development Index, etc.) yield different results, and data collection in the poorest countries is often unreliable.  Furthermore, "industrial growth" is complex and can be measured in various ways (manufacturing output, industrial employment, etc.).\n\nHowever, we can make some general observations:\n\n* **Limited Industrial Growth:**  The economies considered the poorest typically have extremely limited industrial growth.  Their economies are largely agrarian, relying heavily on subsistence farming and often lacking the infrastructure, capital, skilled labor, and stable political environments necessary for significant industrial development.\n\n* **Challenges to Industrialization:**  These countries face significant hurdles, including:\n    * **Lack of Infrastructure:**  Poor roads, unreliable electricity, and limited access to transportation hinder