In [1]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import Chroma
from dotenv import load_dotenv
from typing import TypedDict , List
from langgraph.graph import StateGraph,START,END
from sentence_transformers import CrossEncoder
from langchain.retrievers import BM25Retriever,EnsembleRetriever
from sentence_transformers import CrossEncoder
import warnings

from langchain_core.messages import HumanMessage
import os
warnings.filterwarnings("ignore")

In [2]:
class GraphState(TypedDict):
    question : str
    intent : str
    context : List[str]
    answer:str

In [3]:
loader = PyPDFLoader("keys-to-trading-gold-ca.pdf")
documents = loader.load()
print(len(documents))

15


In [4]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 500,chunk_overlap = 50)
texts = splitter.split_documents(documents)
print(f"Total chunks created:Â {len(texts)}")

Total chunks created:Â 44


In [5]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
llm  = ChatOpenAI(model="gpt-4o-mini",temperature=0)
persist_directory = r"D:\RAG Task"
collection_name ="article_new"

In [6]:
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")


In [7]:
vectorstore = Chroma.from_documents(
    embedding=embeddings,
    documents=texts,
    persist_directory=persist_directory,
    collection_name=collection_name
)

In [8]:
vector_retriever = vectorstore.as_retriever(search_kwargs={"k":4})
keyword_retriever=BM25Retriever.from_documents(texts)

hybrid_retriever=EnsembleRetriever(
    retrievers=[vector_retriever,keyword_retriever],
    weights=[0.6,0.4]
)

In [9]:
def intent_node(state : GraphState):
    question = state["question"]
    
    classification_prompt = f"""
    
    Classify the following question into one of these categories:
    1. greeting
    2. relevant_to_document
    3. irrelevant
    
    Question : "{question}"
    
    Reply only with the category name.
    """
    
    result = llm.invoke(classification_prompt).content.strip().lower()
    
    return {"intent" : result}

In [10]:
def greet_node(state:GraphState):
    question = state["question"]
    
    prompt = f"""
    You are a friendly AI assistant.
    The user greeted you with: "{question}".
    Respond politely and naturally, as if you are having a short chat before helping with their document.
    Example responses: "Hey there! Howâ€™s it going?" or "Hello! How can I assist you today?"
    
    """
    
    response = llm.invoke(prompt)
    return {"answer": response.content}

In [11]:
def ignore_node(state:GraphState):
    
    question = state["question"]
    prompt = f"""
    You are an assistant specialized in answering questions related only to a provided document.
    The user asked: "{question}".
    Politely tell the user that you can only answer questions related to the document content.
    Example responses:
    - "I'm sorry, I can only help with questions about the uploaded document."
    - "That seems unrelated to the document. Could you please ask something based on it?"
    """
    response = llm.invoke(prompt)
    return {"answer":response.content}

In [12]:
def retriever_node(state: GraphState):
    question = state["question"]

    
    hybrid_docs = hybrid_retriever.invoke(question)

    
    pairs = [(question, doc.page_content) for doc in hybrid_docs]

    
    scores = reranker.predict(pairs)

    
    ranked_docs = [doc for _, doc in sorted(zip(scores, hybrid_docs), key=lambda x: x[0], reverse=True)]

    
    top_docs = ranked_docs[:3]

  
    context = [doc.page_content for doc in top_docs]

    return {"context": context}



In [13]:
def answer_node(state: GraphState):
    question = state.get("question", "")
    context = state.get("context", "")

    prompt = f"""
    You are a helpful AI assistant. Use the context below to answer the user's question.

    Context:
    {context}

    Question:
    {question}

    If the answer is not found in the context, say "I'm not sure based on the available information."
    """

    response = llm.invoke(prompt)
    return {"answer": response.content}

In [14]:
def route_from_intent(state:GraphState):
    intent = state["intent"].lower()
    
    if "greeting" in intent:
        return "greet"
    elif "relevant" in intent:
        return "retrieve"
    else:
        return "ignore"

In [15]:
graph = StateGraph(GraphState)


graph.add_node("intent", intent_node)
graph.add_node("greet", greet_node)
graph.add_node("ignore", ignore_node)
graph.add_node("retrieve", retriever_node)
graph.add_node("answer", answer_node)

graph.add_edge(START, "intent")
graph.add_conditional_edges("intent", route_from_intent, ["greet", "retrieve", "ignore"])
graph.add_edge("retrieve", "answer")


graph.add_edge("greet", END)
graph.add_edge("ignore", END)
graph.add_edge("answer", END)

app = graph.compile()


In [16]:
print("AI: RAG Assistant is ready! Type 'exit' or 'quit' to stop.\n")

while True:
    user_input = input("You: ")
    print(f"You :{user_input}")
    
    if user_input.lower() in ["exit", "quit"]:
        print("Assistant: Goodbye! ðŸ‘‹")
        break
    
    try:
        response =response = app.invoke({"question": user_input})
        print("Assistant:", response.get("answer", "No response generated."))
    except Exception as e:
        print("âš  Error:", str(e))

AI: RAG Assistant is ready! Type 'exit' or 'quit' to stop.

You :exit
Assistant: Goodbye! ðŸ‘‹
