# Objective : Processing Document using RAG-FAISS

# Set up Environment

In [10]:
import os

# Safely set the OpenAI API key
os.environ["OPENAI_API_KEY"] = "ENTER_YOUR_API_KEY"


# Creating a ChatPromptTemplate

In [12]:
from langchain.prompts import ChatPromptTemplate

def load_prompt():
    prompt = """You need to answer the question in the sentence as same as in the PDF content.
    Given below is the context and question of the user.
    context = {context}
    question = {question}
    If the answer is not in the pdf, answer: "I do not know what the hell you are asking about"
    """
    return ChatPromptTemplate.from_template(prompt)


# Import Dependencies

In [14]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

def load_knowledgeBase():
    embeddings = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"])
    DB_FAISS_PATH = './vectorstore'  # Use relative path if possible
    db = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
    return db


In [16]:
from langchain_openai import ChatOpenAI

def load_llm():
    return ChatOpenAI(openai_api_key=os.environ["OPENAI_API_KEY"])


In [18]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# Performing Retrieval Augumented Generation pipeline with multiple queries

In [20]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# Load all components
knowledgeBase = load_knowledgeBase()
llm = load_llm()
prompt = load_prompt()

# Input query
query = "who is krishna"

# Step 1: Do similarity search
similar_docs = knowledgeBase.similarity_search(query)

# Step 2: Recreate a mini FAISS index from the matched docs
embedding_model = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"])
mini_vectorstore = FAISS.from_documents(documents=similar_docs, embedding=embedding_model)
retriever = mini_vectorstore.as_retriever()

# Step 3: Build and run the chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Step 4: Get response
response = rag_chain.invoke(query)

# Output
print("🧠 Response:\n", response)


🧠 Response:
 Krishna is described as the reservoir of pleasure and the primeval cause of everything. He is also known as Govinda, or one who gives pleasure to the senses.


In [24]:
query = "who is krishna"
response = rag_chain.invoke(query)
response

'Krishna is the reservoir of pleasure and the primeval cause of everything. He is described as sarva-kåra√a-kåra√am, the cause of all causes, and is also known as Govinda, or one who gives pleasure to the senses.'

In [26]:
query = "what does krishna mainily dipicts?"
response = rag_chain.invoke(query)
response

'Krishna mainly depicts the reservoir of pleasure and the primeval cause of everything.'

In [28]:
query = "who is Arjuna"
response = rag_chain.invoke(query)
response

'Arjuna is a character from the Hindu epic Mahabharata, who is the protagonist of the Bhagavad Gita.'