## RAG-based Chat Agent

### Installing Dependencies

In [None]:
! pip install -q "langchain[google-genai]" langchain-google-genai langchain-core langgraph langchain-community gradio ipywidgets

### Imports

In [None]:
import getpass
import os
import gradio as gr
import requests
from langchain.chat_models import init_chat_model
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.tools import tool
from langchain_core.messages import SystemMessage, HumanMessage
from langgraph.prebuilt import ToolNode, tools_condition
from langgraph.graph import MessagesState, StateGraph, END
from langgraph.checkpoint.memory import MemorySaver

### Initialising models and vector store with API Key 
Get Gemini API key [here](https://aistudio.google.com/apikey).

In [None]:
if not os.environ.get("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"]=getpass.getpass("Enter API key for Google Gemini:")

llm=init_chat_model("gemini-2.5-flash", model_provider="google_genai")

embeddings= GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

vector_store= InMemoryVectorStore(embeddings)

### Extracting content from webpage as markdown
Here are some example links you can use: 
- https://lilianweng.github.io/posts/2023-06-23-agent/
- https://www.coursera.org/in/articles/what-is-generative-ai

In [None]:
webpath=input("Enter the url of the webpage to be used as knowledge base:")
web_response=requests.get(f"https://r.jina.ai/{webpath}")
webcontent=web_response.content
webcontent=webcontent.decode()

### Splitting the web content into chunks and indexing them in a vector store

In [None]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
content_chunks= text_splitter.split_text(webcontent)

_=vector_store.add_texts(texts=content_chunks)

### Defining the retrieval tool

In [None]:
@tool(response_format="content_and_artifact")
def retrieve(query:str):
    """Retrieve information related to a query"""
    retrieved_docs= vector_store.similarity_search(query, k=3)
    serialized= "\n\n".join((f"{doc}")
    for doc in retrieved_docs
    )
    print (serialized)
    return serialized, retrieved_docs

### Binding the tool with LLM

In [None]:
def query_or_respond(state: MessagesState):
    """Generate tool call for retrieval or respond"""
    llm_with_tools=llm.bind_tools([retrieve])
    response=llm_with_tools.invoke(state["messages"])
    return {"messages": [response]}

tools=ToolNode([retrieve])

### Defining the function for generating the final response by prompting it with a system message and the retrieved context

In [None]:
def generate(state: MessagesState):
    """Generate answer"""
    recent_tool_messages=[]
    for message in reversed(state["messages"]):
        if message.type=="tool":
            recent_tool_messages.append(message)
        else:
            break
    tool_messages=recent_tool_messages[::-1]

    #Format into prompt
    tool_messages_content="\n\n".join(message.content for message in tool_messages)
    system_message_content=(
        "You are a helpful assistant who has retrieved the following pieces of information to form a response to the user's query."
        "Do not hallucinate."
        "If you don't know the answer, say that you don't know."
        "\n\n"
        f"{tool_messages_content}"
    )

    conversation_messages= [
        message
        for message in state["messages"]
        if message.type in ("human", "system") or (message.type == "ai" and not message.tool_calls)
    ]

    prompt=[HumanMessage(system_message_content)]+ conversation_messages

    final_response=llm.invoke(prompt)
    return {"messages": [final_response]}

### Building the State Graph of the Agent

In [None]:
graph_builder=StateGraph(MessagesState)

graph_builder.add_node(query_or_respond)
graph_builder.add_node(tools)
graph_builder.add_node(generate)

graph_builder.set_entry_point("query_or_respond")
graph_builder.add_conditional_edges(
    "query_or_respond",
    tools_condition,
    {END: END, "tools": "tools"}
)

graph_builder.add_edge("tools","generate")
graph_builder.add_edge("generate", END)


### Compiling the graph with Memory Saver to persist chat history

In [None]:
memory= MemorySaver()
graph=graph_builder.compile(checkpointer=memory)

#Specifying ID for thread
config={"configurable":{"thread_id":"abc123"}}

### Defining the function to invoke the Agent with the user message

In [None]:
def get_response(message, history):
    input_data = {"messages": [HumanMessage(content=message)]}
    agent_response= graph.invoke(input_data,config=config,stream_mode='values')
    final_response=agent_response["messages"][-1]
    return final_response.content


### Starting the Chat UI

In [None]:
chat_ui=gr.ChatInterface(
    get_response,
    type="messages"
)

chat_ui.launch()