### Run this if you are using Google Colab

In [None]:
# !pip install langchain langchain-openai langchain-qdrant

In [None]:
# import os
# from google.colab import userdata

# os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")
# os.environ["LANGSMITH_API_KEY"] = userdata.get("LANGSMITH_API_KEY")
# os.environ["LANGSMITH_PROJECT"] = "ncit-workshop"
# os.environ["LANGSMITH_TRACING"] = "true"
# os.environ["LANGSMITH_ENDPOINT"] = "https://api.smith.langchain.com"
# os.environ["QDRANT_API_KEY"] = userdata.get("QDRANT_API_KEY")
# os.environ["QDRANT_URL"] = "qdrant-host"

### Run this if you are running VSCode

In [None]:
import sys
from pathlib import Path

sys.path.append(str(Path().resolve().parent))
from core import load_vault_env

load_vault_env()

### Imports

In [None]:
import os
import operator
from typing import Annotated, Literal, TypedDict, Any

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import SystemMessage
from langchain_qdrant import QdrantVectorStore
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain_community.utilities import SQLDatabase
from langchain.agents import create_agent
from langchain.tools import tool

from langgraph.types import Command
from langgraph.graph import StateGraph, START, END
from langgraph.checkpoint.memory import InMemorySaver

from qdrant_client.models import models

### Initialization

#### Credentials

In [None]:
QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")

#### Initialize clients

In [None]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vector_store = QdrantVectorStore.from_existing_collection(
    collection_name="ncit-workshop-simple-rag",
    embedding=embeddings,
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY,
    prefer_grpc=True,
)

### Retrieval Tool

In [None]:
# Defining tool using LangChain's tool decorator
@tool
def retrieve_relevant_docs(
    question: str,
    filter: Literal[
        "finance",
        "it_policy",
        "hr_policy",
        "legal_policy",
        "operations_policy",
        "engineering_policy",
    ]
    | None = None,
    k: int = 3,
):
    """
    Retrieve relevant documents from the vector store based on the question and optional filter.
    """

    print(f"\n[CHAIN LOG] Searching for: '{question} in '{filter or 'ALL'}'")

    q_filter = None
    if filter:
        q_filter = models.Filter(
            must=[
                models.FieldCondition(
                    key="metadata.category", match=models.MatchValue(value=filter)
                )
            ]
        )

    # Perform search with scores
    results = vector_store.similarity_search_with_score(
        query=question, k=k, filter=q_filter
    )

    # Filter by Threshold & Format
    valid_context = []
    for doc, score in results:
        if score >= 0.5:
            valid_context.append(
                f"Policy ID: {doc.metadata['policy_id']}\n"
                f"Topic: {doc.metadata['topic']}\n"
                f"Rule: {doc.page_content}"
            )

    if not valid_context:
        return "NO RELEVANT DOCUMENT FOUND."

    return "\n\n".join(valid_context)

### RAG Agent

In [None]:
rag_agent = create_agent(
    model="gpt-4.1-mini",
    tools=[retrieve_relevant_docs],
    system_prompt=SystemMessage(
        content=[
            {
                "type": "text",
                "text": (
                    "You are a strictly factual HR Policy Bot."
                    "Answer the question based ONLY on the context provided below."
                    "Cite the Policy ID and topic for every fact you state."
                ),
            }
        ]
    ),
)

In [None]:
# Also provider the filter parameter to narrow down the search
question = "What is the hotel spending limit for major metro areas like NYC? (filter: 'finance')"
for step in rag_agent.stream(
    {"messages": [{"role": "user", "content": question}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()

## SQL Agent

In [None]:
db = SQLDatabase.from_uri("sqlite:///data.db")

print(f"Dialect: {db.dialect}")
print(f"Available Tables: {db.get_usable_table_names()}")
print(f"Sample output: {db.run('SELECT * from trips')}")

#### Initialize LLM model

In [None]:
llm = ChatOpenAI(name="gpt-4.1-mini", temperature=0)

#### SqlDatabaseToolkit available tools

In [None]:
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

tools = toolkit.get_tools()

for toolkit_tool in tools:
    print(f"{toolkit_tool.name}: {toolkit_tool.description}\n")

### Create Agent

#### Prompt

In [None]:
system_prompt = """
You are an agent designed to interact with a SQL database.
Given an input question, create a syntactically correct {dialect} query to run,
then look at the results of the query and return the answer. Unless the user
specifies a specific number of example they wish to obtain, always limit your
query to at most {top_k} results.

You can order the results by a relevent column to return the most interesting
examples in the database. Never query for all the columns from a specific table,
only ask for the columns that are relevant to the question.

You MUST double check your query before executing it. If you get an error while
executing a query, try to fix the query and execute it again.

DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the
database.

To start you should ALWAYS look at the tables in the databsae to see what you
can query, Do NOT skip this step.

Then you should query the schema for the most relevant tables.
""".format(
    dialect=db.dialect,
    top_k=5,
)

#### Langchain Agent

In [None]:
from langchain.agents import create_agent

agent = create_agent(
    model=llm,
    tools=tools,
    system_prompt=system_prompt,
)

#### Run the Agent

In [None]:
question = "Which trip had the most expense?"

for step in agent.stream(
    {"messages": [{"role": "user", "content": question}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()