In [21]:
from dotenv import load_dotenv
load_dotenv(override=True)

os.environ["OPENAI_API_KEY"] == os.getenv("OPENAI_API_KEY")

True

# Agentic RAG

In [38]:
import os

from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.tools.retriever import create_retriever_tool

from typing_extensions import TypedDict
from pydantic import BaseModel, Field
from langgraph.graph.message import add_messages
from typing import Annotated
from dotenv import load_dotenv
import os
from langchain_openai import ChatOpenAI 
from langchain.prompts import ChatPromptTemplate
from langchain.schema import HumanMessage, SystemMessage


In [39]:
pdf_folder = r"C:\AgenticAI\Projects_AgentAI\AgenticAI_04_companypolicy\pdf files"      # where pdf files are stored

pdf_files = [os.path.join(pdf_folder, f) for f in os.listdir(pdf_folder) if f.endswith(".pdf")]          # collect all PDF files
print(f"len of pdf files {len(pdf_files)}.")


docs = []
for file in pdf_files:
    loader = PyPDFLoader(file)
    docs.extend(loader.load())
print(f"total pages loaded {len(docs)}")


text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs_splitter = text_splitter.split_documents(docs)
print(f"total documents chunked into docs split {len(docs_splitter)}")


embeddings = OpenAIEmbeddings()

## add these text to vectordb
vectorstore = FAISS.from_documents(
    documents = docs_splitter,
    embedding = embeddings
)

## convert vectordb to retreiver
retriever = vectorstore.as_retriever(search_kwargs={"k":3})



len of pdf files 3.
total pages loaded 3
total documents chunked into docs split 9


In [42]:
RAG_SYSTEM_MESSAGE = """
You are an expert HR policy assistant.
Answer the question strictly based on the provided policy document context.
Be concise, factual, and do not add extra assumptions.
If the answer is not in the document, say: "The policy document does not mention this specifically.
"""

USER_PROMPT = "Question: {input_text}"

llm = ChatOpenAI(model="gpt-4.1", temperature=0)


def answer_policy_question(question: str):
    """llm will generate precise answer from pdf documents relevant to User's question"""
        
    relevant_docs = retriever.get_relevant_documents(question)

    context = "\n\n".join([doc.page_content for doc in relevant_docs])

    system_text = SystemMessage(content = RAG_SYSTEM_MESSAGE)

    user_text = HumanMessage(content=f"Question: {question}\n\nPolicy Document Context :\n{context}")

    response = llm.invoke([system_text, user_text])

    return response.content


In [43]:
question = "What is the working hours policy?"
answer = answer_policy_question(question)

print("\n Final Answer: \n")
print(answer)


 Final Answer: 

The working hours policy states that employees must work a minimum of 8 hours per day and 40 hours per week unless otherwise approved by HR. Employees are expected to work 8 hours per day, Monday to Friday.
