# Agentic Enterprise Knowledge Agent

In [56]:
from langgraph.graph import StateGraph, START, END
from langgraph.checkpoint.memory import MemorySaver
from typing import TypedDict, List, Optional
from langchain.schema import Document

from langchain.vectorstores import FAISS
from langchain.embeddings import FastEmbedEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import DirectoryLoader

from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

In [47]:
class RAGState(TypedDict):
    question: str
    query_type: str
    retrieved_docs: List[Document]
    search_query: str
    answer: str

In [48]:
def build_faiss_index(docs_path:str)->FAISS:
    loader = DirectoryLoader(docs_path, glob="**/*")
    documents = loader.load()

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=150,
    )

    chunks = splitter.split_documents(documents)

    embeddings = FastEmbedEmbeddings()
    vectorstore = FAISS.from_documents(chunks, embeddings)
    return vectorstore

In [59]:
llm = ChatOpenAI(
    model="mistralai/mistral-7b-instruct:free",  # Specify any model available on OpenRouter
    temperature=0.7,
    base_url="https://openrouter.ai/api/v1",
    api_key="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",
)


query_prompt = ChatPromptTemplate.from_messages([
    ("system", "You classify enterprise questions for document retrieval."),
    ("human", "{question}")
])

def query_analyzer(state: RAGState):
    response = llm.invoke(
        query_prompt.format_messages(question=state['question'])
    )

    state["query_type"] = 'retrieval'
    state["search_query"] = state["question"]

    return state

In [50]:
def retriever_agent(vectorstore: FAISS, k: int = 5):
    def _retrieve(state: RAGState) -> RAGState:
        docs = vectorstore.similarity_search(
            state['search_query'], k=k
        )
        state["retrieved_docs"] = docs

        return state
    return _retrieve

In [51]:
answer_prompt = ChatPromptTemplate.from_messages([
    ("system",
     "Answer ONLY using the provided context. "
     "If the answer is not in the context, say so explicitly."),
    ("human",
     "Question:\n{question}\n\nContext:\n{context}")
])

def answer_agent(state:RAGState) -> RAGState:
    context = "\n\n".join(
        doc.page_content for doc in state["retrieved_docs"]
    )
    response = llm.invoke(
        answer_prompt.format_messages(
            question = state['question'],
            context = context
        )
    )
    state['answer'] = response.content
    return state

In [60]:
def build_rag_graph(vectorstore: FAISS):
    graph = StateGraph(RAGState)


    graph.add_node("analyze_query", query_analyzer)
    graph.add_node("retrieve", retriever_agent(vectorstore))
    graph.add_node("answer", answer_agent)

    graph.add_edge(START, "analyze_query")
    graph.add_edge("analyze_query", "retrieve")
    graph.add_edge("retrieve", "answer")
    graph.add_edge("answer", END)

    return graph.compile()

In [63]:
vectorstore = build_faiss_index("./enterprise_docs")
rag_app = build_rag_graph(vectorstore)

result = rag_app.invoke({
    "question": "What is our company's AI use policy?",
    "retrieved_docs": [],
    "answer": None,
    "query_type": None,
    "search_query": None
})

print(result["answer"])


libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.


 Our company's AI use policy is outlined in Section 3 of the provided context. The policy includes approved uses (software development, data analysis, customer support), prohibited uses (hiring/termination decisions, processing highly sensitive personal data, generating legally binding documents), and restrictions on model training (enterprise data cannot be used to train external AI models without approval).
