# Test querying the chatbot

## Setup

In [None]:
import logging
import os
from dotenv import load_dotenv
from typing_extensions import List, TypedDict, Literal
from typing import Optional, Annotated

from langchain_core.documents import Document
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
from langchain_community.vectorstores import AzureSearch
from langchain.prompts import PromptTemplate
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, END, StateGraph, MessagesState
from langgraph.prebuilt import ToolNode
from langchain_core.tools import tool
from langchain_core.messages import SystemMessage

In [None]:
load_dotenv()
oai_model = os.getenv("AZURE_DEPLOYMENT_MODEL")

In [None]:
llm = AzureChatOpenAI(
    azure_deployment=oai_model,
    api_version="2024-02-01",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

embeddings = AzureOpenAIEmbeddings(
    model="text-embedding-3-small",
    azure_endpoint=os.getenv("AZURE_OPENAI_EMBED_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_EMBED_API_KEY")
)

vector_store = AzureSearch(
    azure_search_endpoint=os.getenv("AZURE_SEARCH_SERVICE"),
    azure_search_key=os.environ["AZURE_SEARCH_API_KEY"],
    index_name="dataroots-guidelines-vector-index",
    embedding_function=embeddings.embed_query,
    additional_search_client_options={"retry_total": 4},
)

## Run bot

In [None]:
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use five sentences maximum. Keep the answer as concise as possible. Always say "Would you like more information?" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

In [None]:
class Search(TypedDict):
    """Search query."""

    query: Annotated[str, ..., "Search query to run."]
    section: Annotated[
        Optional[str],
        ...,
        "Project name from PDF source to filter by."
    ]

In [None]:
class State(TypedDict):
    question: str
    query: Search
    context: List[Document]
    answer: str

In [None]:
def analyze_query(state: State):
    structured_llm = llm.with_structured_output(Search)
    query = structured_llm.invoke(state["question"])
    return {"query": query}

In [None]:
def retrieve(state: State):
    if state.get("project"):
        filter = {"source": {"$regex": state["project"], "$options": "i"}}
        retrieved_docs = vector_store.similarity_search(
            state["question"],
            filter=filter,
            k=10,
            search_type="hybrid"
        )
    else:
        retrieved_docs = vector_store.similarity_search(
            state["question"],
            k=10,
            search_type="hybrid"
        )
    return {"context": retrieved_docs}

In [None]:
def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = custom_rag_prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [None]:
graph_builder = StateGraph(State).add_sequence([analyze_query, retrieve, generate])
graph_builder.add_edge(START, "analyze_query")
graph = graph_builder.compile()

In [None]:
query = "Is there a project about chatbots? If so, what's his title? What are the section titles?"
result = graph.invoke({"question": query})

print(f'Context: {result["context"]}\n')
print(f'Answer: {result["answer"]}')

## Convert to memory chatbot

In [None]:
memory = MemorySaver()

In [None]:
@tool
def rag_search(query: str) -> str:
    """Search through the document database to find relevant information."""
    # Perform vector search
    retrieved_docs = vector_store.similarity_search(
        query,
        k=10,
        search_type="hybrid"
    )
    
    # Format the context with metadata
    context_pieces = []
    for doc in retrieved_docs:
        metadata = doc.metadata
        source = metadata.get('file_name', 'Unknown source')
        page = metadata.get('page', 'Unknown page')
        
        context_piece = f"""
        Source: {source} (Page {page})
        Content: {doc.page_content}
        """
        context_pieces.append(context_piece)
    
    return "\n\n---\n\n".join(context_pieces)

In [None]:
tools = [rag_search]
tool_node = ToolNode(tools)

In [None]:
bound_model = llm.bind_tools(tools)

In [None]:
def should_continue(state: MessagesState) -> Literal["action", "end"]:
    """Determine if we should continue the conversation."""
    last_message = state["messages"][-1]
    if not last_message.tool_calls:
        return END
    return "action"

In [None]:
def call_model(state: MessagesState):
    # Create a system prompt that includes RAG instructions
    system_prompt = """You are a helpful assistant with access to a document database. 
    When answering questions, use the rag_search tool to find relevant information.
    Always provide thorough, clear explanations and cite your sources.
    Break down complex topics into understandable parts and use examples when helpful.
    End your responses by asking if the user would like more information."""
    
    # Add system message if it's not already present
    if not state["messages"] or state["messages"][0].role != "system":
        state["messages"].insert(0, SystemMessage(content=system_prompt))
    
    response = bound_model.invoke(state["messages"])
    return {"messages": response}

In [None]:
# Build the graph
workflow = StateGraph(MessagesState)
workflow.add_node("agent", call_model)
workflow.add_node("action", tool_node)
workflow.add_edge(START, "agent")
workflow.add_conditional_edges(
    "agent",
    should_continue,
    ["action", END],
)
workflow.add_edge("action", "agent")