In [2]:
%pip install -U langchain-community faiss-cpu langchain-huggingface pymupdf tiktoken langchain-ollama python-dotenv

Collecting langchain-community
  Using cached langchain_community-0.3.13-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-ollama
  Using cached langchain_ollama-0.2.2-py3-none-any.whl.metadata (1.9 kB)
Collecting langchain<0.4.0,>=0.3.13 (from langchain-community)
  Using cached langchain-0.3.13-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.27 (from langchain-community)
  Using cached langchain_core-0.3.28-py3-none-any.whl.metadata (6.3 kB)
Using cached langchain_community-0.3.13-py3-none-any.whl (2.5 MB)
Using cached langchain_ollama-0.2.2-py3-none-any.whl (18 kB)
Using cached langchain-0.3.13-py3-none-any.whl (1.0 MB)
Using cached langchain_core-0.3.28-py3-none-any.whl (411 kB)
Installing collected packages: langchain-core, langchain-ollama, langchain, langchain-community
  Attempting uninstall: langchain-core
    Found existing installation: langchain-core 0.3.25
    Uninstalling langchain-core-0.3.25:
      Successfully uninstalled langchain-core-0

In [67]:
import os
import warnings
from dotenv import load_dotenv

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
warnings.filterwarnings("ignore")
load_dotenv()

True

In [68]:
from langchain_community.document_loaders import PyMuPDFLoader
loader = PyMuPDFLoader("./rag_dataset/Rajasthan_GK_first_chapter_filtered.docx")
docs = loader.load()

In [69]:
docs[0].metadata

{'source': './rag_dataset/Rajasthan_GK_first_chapter_filtered.docx',
 'file_path': './rag_dataset/Rajasthan_GK_first_chapter_filtered.docx',
 'page': 0,
 'total_pages': 5,
 'format': 'Office documen',
 'title': '',
 'author': '',
 'subject': '',
 'keywords': '',
 'creator': '',
 'producer': '',
 'creationDate': '',
 'modDate': '',
 'trapped': '',
 'encryption': ''}

In [70]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100,
)
chunks = text_splitter.split_documents(docs)
len(chunks),len(docs)

(13, 5)

In [71]:
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-4o-mini")
len(encoding.encode(docs[0].page_content)),len(encoding.encode(chunks[0].page_content))

(637, 276)

## Document Vector Embedding

In [72]:
from langchain_ollama import OllamaEmbeddings
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

In [73]:
embeddings = OllamaEmbeddings(
    model="nomic-embed-text",
    base_url="http://localhost:11434/",
)
single_vector = embeddings.embed_query("HELLO WORLD !")

In [74]:
len(single_vector)

768

In [75]:
index = faiss.IndexFlatL2(len(single_vector))
index.ntotal,index.d

(0, 768)

In [77]:
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [78]:
ids = vector_store.add_documents(chunks)

In [79]:
len(vector_store.index_to_docstore_id)

13

In [80]:
db_name = "rag_dataset"
vector_store.save_local(db_name)

In [81]:
new_vector_store = FAISS.load_local(db_name,embeddings=embeddings,allow_dangerous_deserialization=True)
len(new_vector_store.index_to_docstore_id)

13

In [119]:
question = "Ancient city which is mentioned in both Mahabharata and Mahabhashya?(RAS Pre 2016)A. Virat nagar (Bairath)B. Madhyamika (Nagri)C. RedD. Kakort"
question_vector = new_vector_store.search(query=question,search_type="similarity")
question_vector

[Document(id='6246b5a5-acfe-4391-84f7-8c827c586fe3', metadata={'source': './rag_dataset/Rajasthan_GK_first_chapter_filtered.docx', 'file_path': './rag_dataset/Rajasthan_GK_first_chapter_filtered.docx', 'page': 3, 'total_pages': 5, 'format': 'Office documen', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': '', 'producer': '', 'creationDate': '', 'modDate': '', 'trapped': '', 'encryption': ''}, page_content='invasion led by Mihirkul almost destroyed the already fragile lack\nof authority situation. Yashovarman of Malva was successful in\ndefeating Shakas or Hunas in 532 AD, which brought peace to the\nregion but the region could never emerge out of the devastation. e\nRajasthan was constantly in the process of cultural infusion from\noutside and by 6™ century the foreigners got so much mingled with\nthe locals that it gets hard to tell the difference since then. Previous\nYear Questions Q. Ancient city which is mentioned in both\nMahabharata and Mahabhashya? (RAS Pre

In [120]:
for doc in question_vector:
    print(doc.page_content)
    print("\n\n")

invasion led by Mihirkul almost destroyed the already fragile lack
of authority situation. Yashovarman of Malva was successful in
defeating Shakas or Hunas in 532 AD, which brought peace to the
region but the region could never emerge out of the devastation. e
Rajasthan was constantly in the process of cultural infusion from
outside and by 6™ century the foreigners got so much mingled with
the locals that it gets hard to tell the difference since then. Previous
Year Questions Q. Ancient city which is mentioned in both
Mahabharata and Mahabhashya? (RAS Pre 2016) A. Virat nagar
(Bairath) B. Madhyamika (Nagri) C. Red D. Kakort Q. During which



Naagda, Eran, Kaytha etc. Bairath civilization: e The region around
Jaipur is identified with the region as the capital Viratnagar of
Matsya Janpada. e The primitive hills of this region are Bijak
Dungri, Moti Dungri, Bhim Dungri etc. The Bhabru Edict of Ashoka
was discovered by Captain Bert on Bijak Dungri. e Buddhism:
remains of Stupa and Budda 

In [112]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama

In [113]:
model = ChatOllama(
    model="llama3.2",
    base_url="http://localhost:11434/",
)

In [121]:
prompt = """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use bullet points and keep the answer.
Question: {question} 
Context: {context} 
Answer:
"""
prompt  = ChatPromptTemplate.from_template(prompt)

In [122]:
def formate_doc(docs):
    return "\n\n".join([doc.page_content for doc in docs])
print(formate_doc(docs))

An Institute For Civil Services fafact Gat ait dant at aaftia un wate
deat RAS MAINS - 2018 PAPER -I Rajasthan History Art & Culture
Tea & sfrera A ww ayaa Tee Hea, Uh AS Veet UNA H ATA Woe at
wh ater a .... © SP-21, Above Canara Bank, Ridhi-Sidhi Chauraha,
Gopalpura Bypass, Jaipur (© 9875170111, 9414988860 BX
samyakjaipur@gmail.com @ www.samyakias.com @} SAMYAK-An
Institute For Civil Services
Samyak Raj. His. Art & Cult. (Mains-2018) An Institute For Civil
Services 2 Content S.N, | Chapters 1 Ancient History of Rajasthan 2
Orogin of Rajputs and resistance to Turk Invasion 3 Rise of Mewar
Under the Sisodias 4 Relations of the Rajput states with the
Mughals 5 Administrative and Revenue System in Rajasthan 6
Freedom struggle of 1857 and Rajasthan 7 Various Stages in the
Formation of Rajasthan 8 Revolutionary movements in Rajasthan 9
Organization of the Prajamandals in various states 10 | Famous
Peasant Movement in Rajasthan 11 | Social and Religious reforms in
Rajasthan 12. | Heritage of

In [123]:
retriver = new_vector_store.as_retriever(
    search_type="mmr",
    search_kwargs={
        'k':3,
        'fetch_k':100,
        'lambda_mult':1
    }
)

In [124]:
rag_chain = (
    {
        'context':retriver | formate_doc,
        'question':RunnablePassthrough(),
    } | prompt | model | StrOutputParser()
)

In [125]:
question = "Ancient city which is mentioned in both Mahabharata and Mahabhashya?(RAS Pre 2016)A. Virat nagar (Bairath)B. Madhyamika (Nagri)C. RedD. Kakort"
try:
	res = rag_chain.invoke(question)
	print(res)
except Exception as e:
	print(f"An error occurred: {e}")

Here is the answer:

• Madhyamika (Nagri)
