In [1]:
#Install Dependencies
!pip install -U \
    langchain \
    langchain-community \
    langchain-openai \
    langgraph \
    pinecone-client \
    pypdf \
    fastapi \
    uvicorn

Collecting fastapi
  Downloading fastapi-0.128.0-py3-none-any.whl.metadata (30 kB)
Collecting uvicorn
  Downloading uvicorn-0.40.0-py3-none-any.whl.metadata (6.7 kB)
Collecting starlette<0.51.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.50.0-py3-none-any.whl.metadata (6.3 kB)
Collecting annotated-doc>=0.0.2 (from fastapi)
  Downloading annotated_doc-0.0.4-py3-none-any.whl.metadata (6.6 kB)
Downloading fastapi-0.128.0-py3-none-any.whl (103 kB)
Downloading starlette-0.50.0-py3-none-any.whl (74 kB)
Downloading uvicorn-0.40.0-py3-none-any.whl (68 kB)
Downloading annotated_doc-0.0.4-py3-none-any.whl (5.3 kB)
Installing collected packages: annotated-doc, uvicorn, starlette, fastapi

   ---------- ----------------------------- 1/4 [uvicorn]
   ---------- ----------------------------- 1/4 [uvicorn]
   ---------- ----------------------------- 1/4 [uvicorn]
   -------------------- ------------------- 2/4 [starlette]
   -------------------- ------------------- 2/4 [starlette]
   ----------

In [None]:
#Imports & Environment Setup
import os
from typing import TypedDict, List

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain_openai import OpenAIEmbeddings, ChatOpenAI

from langchain_community.vectorstores import Pinecone
import pinecone

from langgraph.graph import StateGraph

In [3]:
#Load PDF
PDF_PATH = "Ebook-Agentic-AI.pdf"

loader = PyPDFLoader(PDF_PATH)
documents = loader.load()

len(documents)

In [None]:
#Chunk the PDF
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=150
)

chunks = text_splitter.split_documents(documents)
len(chunks)

In [None]:
#Initialize Pinecone & Store Embeddings


INDEX_NAME = "agentic-ai-rag"

existing_indexes = [index.name for index in pc.list_indexes()]

if INDEX_NAME not in existing_indexes:
    pc.create_index(
        name=INDEX_NAME,
        dimension=1536,   # OpenAI embedding size
        metric="cosine"
    )

In [None]:
embeddings = OpenAIEmbeddings()

vectorstore = Pinecone.from_documents(
    documents=chunks,
    embedding=embeddings,
    index_name=INDEX_NAME
)

In [None]:
#Define RAG State (LangGraph)
class RAGState(TypedDict):
    question: str
    context: List
    answer: str

In [None]:
#Retriever Node
def retrieve(state: RAGState):
    query = state["question"]

    db = Pinecone.from_existing_index(
        index_name=INDEX_NAME,
        embedding=embeddings
    )

    docs = db.similarity_search(query, k=4)

    return {
        "context": docs,
        "question": query
    }

In [None]:
#Generator Node (Strict Grounding)
llm = ChatOpenAI(
    model="gpt-4",
    temperature=0
)

PROMPT_TEMPLATE = """
You are an AI assistant.
Answer the question ONLY using the context provided.
If the answer is not present, respond exactly with:
"I don't know based on the provided document."

Context:
{context}

Question:
{question}
"""

def generate(state: RAGState):
    context_text = "\n\n".join(
        [doc.page_content for doc in state["context"]]
    )

    prompt = PROMPT_TEMPLATE.format(
        context=context_text,
        question=state["question"]
    )

    answer = llm.predict(prompt)

    return {
        "answer": answer,
        "context": state["context"]
    }

In [None]:
#Build LangGraph Pipeline
workflow = StateGraph(RAGState)

workflow.add_node("retrieve", retrieve)
workflow.add_node("generate", generate)

workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "generate")

rag_graph = workflow.compile()

In [None]:
#Test the RAG Chatbot
query = "What is Agentic AI?"

result = rag_graph.invoke({"question": query})

print("ANSWER:\n", result["answer"])
print("\n--- Retrieved Contexts ---\n")

for i, doc in enumerate(result["context"], 1):
    print(f"[Chunk {i}]")
    print(doc.page_content[:500])
    print("-" * 80)

In [None]:
#Confidence Score (Simple)
confidence = round(1 / len(result["context"]), 2)
confidence