In [1]:
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA

In [2]:
pdf_folder = "RAGData"
documents = []

In [3]:
for file in os.listdir(pdf_folder):
    if file.endswith(".pdf"):
        loader = PyPDFLoader(os.path.join(pdf_folder, file))
        documents.extend(loader.load())
        print(f"Loaded {len(documents)} documents from {file}")

Loaded 30 documents from Consolidated IFSCA (Payment Services) Regulations, 2024.pdf
Loaded 36 documents from Consolidated IFSCA (Registration of Factors and Registration of Assignment of Receivables) Regulations.pdf


In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)

In [5]:
docs = text_splitter.split_documents(documents)

In [6]:
embedding = OllamaEmbeddings(model="nomic-embed-text")

In [7]:
vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    persist_directory="./chroma_db"  # Persistent local dir
)

In [13]:
llm = Ollama(model="llama3.1", num_ctx=2048)

In [9]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

In [14]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

In [15]:
query = "Who is Central Registrar?"

In [16]:
response = qa_chain.invoke(query)

In [17]:
print("Query:", query)

Query: Who is Central Registrar?


In [18]:
print("Answer:", response["result"])

Answer: A person appointed as such under subsection (1) of section 21 of the Securitisation and Reconstruction of Financial Assets and Enforcement of Security Interest Act, 2002 (54 of 2002).


In [19]:
print("\nSource Documents:")
for doc in response["source_documents"]:
    print(f"- {doc.metadata['source']}: {doc.page_content[:100]}...")


Source Documents:
- RAGData\Consolidated IFSCA (Registration of Factors and Registration of Assignment of Receivables) Regulations.pdf: c. “Central Registrar” means a person appointed as such under subsection (1) of 
section 21 of the S...
- RAGData\Consolidated IFSCA (Registration of Factors and Registration of Assignment of Receivables) Regulations.pdf: Enforcement of Security Interest Act, 2002 (54 of 2002);  
d. “Central Registry” means the Central R...
