In [1]:
!pip install langchain -q

In [2]:
!pip install -U langchain-community -q

In [3]:
!pip install sentence_transformers -q

In [4]:
!pip install chromadb[sqlite]==0.4.22 -q



In [5]:
!pip install -q streamlit pandas python-dotenv langchain-google-genai pytesseract pillow pypdf langchain-huggingface


In [6]:
import os
import pytesseract
from PIL import Image
import pandas as pd
import streamlit as st
from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader, TextLoader, CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter
from langchain_community.embeddings import (HuggingFaceEmbeddings, JinaEmbeddings)
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA



  from .autonotebook import tqdm as notebook_tqdm


In [7]:
# Load environment variables and Load API keys ------------------------------------------
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise ValueError(
        "GEMINI_API_KEY not found in environment variables. Please check your .env file."
    )

In [8]:
# Load Markdown data
with open("Data/RBI-Policies.md", "r", encoding="utf-8") as file:
    markdown_document = file.read() 
markdown_document

"# RBI-Aligned Policies and Eligibilities for Applying and Getting Loans from Banks\n\n## Introduction\n\nThe Reserve Bank of India (RBI) regulates the banking sector in India to ensure transparency, fair practices, and financial stability. RBI guidelines on loans and advances cover aspects such as Know Your Customer (KYC) norms, creditworthiness assessment, interest rate linkages to external benchmarks, and specific requirements for different loan types. These apply to all scheduled commercial banks and Non-Banking Financial Companies (NBFCs). Borrowers must meet eligibility criteria based on credit score, income, and purpose of the loan. This document summarizes key RBI-aligned policies and eligibilities for major loan categories, focusing on application and approval processes. All banks must adhere to the Fair Practices Code, ensuring transparent communication and no harassment during recovery.\n\n**Note:** Policies are subject to updates; always verify with the latest RBI circulars

In [None]:
# -------------- Reading PDF -----------
from langchain_community.document_loaders import PyPDFLoader, CSVLoader

pdf_read=PyPDFLoader('Data/Co-origination of loans by Banks and NBFCs for lending to priority sector.pdf').load()


In [None]:
csv_read = CSVLoader('Data/loan_applications.csv').load()
csv_read

[Document(metadata={'source': 'loan_applications.csv', 'row': 0}, page_content='Application_ID: 1001\nApplicant_Name: Applicant_1001\nAge: 34\nMarital_Status: Married\nDependents: 4\nEmployment_Type: Full-time\nYears_Employed: 13\nPrimary_Income: 35338\nPassive_Income: 5041\nDebt_Obligations: 29882\nLoan_Amount_Requested: 434616\nLoan_Term: 10\nLoan_Purpose: Other\nProperty_Value: 571688\nCredit_Score: 641\nRepayment_Worthiness_Score: 94\nApproval_Status: Approved'),
 Document(metadata={'source': 'loan_applications.csv', 'row': 1}, page_content='Application_ID: 1002\nApplicant_Name: Applicant_1002\nAge: 47\nMarital_Status: Married\nDependents: 0\nEmployment_Type: Unemployed\nYears_Employed: 16\nPrimary_Income: 141339\nPassive_Income: 12022\nDebt_Obligations: 11928\nLoan_Amount_Requested: 176777\nLoan_Term: 25\nLoan_Purpose: Refinance\nProperty_Value: 338226\nCredit_Score: 675\nRepayment_Worthiness_Score: 71\nApproval_Status: Rejected'),
 Document(metadata={'source': 'loan_applications.

In [11]:
# ----------- Combining the documents from csv into a text -----------------
text1 = '\n\n'.join([x.page_content for x in pdf_read])
text2 = '\n\n'.join([y.page_content for y in csv_read])

all_data = f"Combined data of PDF, CSV\n\n"

all_data += f"Markdown file\n\n{markdown_document}\n\n"
all_data += f"## PDF Content\n\n{text1}\n\n"
all_data += f"## CSV Content\n\n{text2}\n\n"

In [12]:
with open('Data/all_data.md', 'w') as f:
    f.write(all_data)

In [13]:
all_data

'Combined data of PDF, CSV\n\nMarkdown file\n\n# RBI-Aligned Policies and Eligibilities for Applying and Getting Loans from Banks\n\n## Introduction\n\nThe Reserve Bank of India (RBI) regulates the banking sector in India to ensure transparency, fair practices, and financial stability. RBI guidelines on loans and advances cover aspects such as Know Your Customer (KYC) norms, creditworthiness assessment, interest rate linkages to external benchmarks, and specific requirements for different loan types. These apply to all scheduled commercial banks and Non-Banking Financial Companies (NBFCs). Borrowers must meet eligibility criteria based on credit score, income, and purpose of the loan. This document summarizes key RBI-aligned policies and eligibilities for major loan categories, focusing on application and approval processes. All banks must adhere to the Fair Practices Code, ensuring transparent communication and no harassment during recovery.\n\n**Note:** Policies are subject to update

In [14]:
# Chunk the data ------------------------
headers_to_split_on = [
    ("#", "Header 1"),
    # ("##", "Header 2"),
    # ("###", "Header 3"),
]

markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on, strip_headers = False)
md_header_splits = markdown_splitter.split_text(all_data)
md_header_splits


[Document(metadata={}, page_content='Combined data of PDF, CSV  \nMarkdown file'),
 Document(metadata={'Header 1': 'RBI-Aligned Policies and Eligibilities for Applying and Getting Loans from Banks'}, page_content='# RBI-Aligned Policies and Eligibilities for Applying and Getting Loans from Banks  \n## Introduction  \nThe Reserve Bank of India (RBI) regulates the banking sector in India to ensure transparency, fair practices, and financial stability. RBI guidelines on loans and advances cover aspects such as Know Your Customer (KYC) norms, creditworthiness assessment, interest rate linkages to external benchmarks, and specific requirements for different loan types. These apply to all scheduled commercial banks and Non-Banking Financial Companies (NBFCs). Borrowers must meet eligibility criteria based on credit score, income, and purpose of the loan. This document summarizes key RBI-aligned policies and eligibilities for major loan categories, focusing on application and approval process

In [15]:
sentences = []
for i in range(len(md_header_splits)):
    sentences.append(md_header_splits[i].page_content)

In [16]:
# ---------------------- Embeddings with HuggingFace----------------

from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [17]:
# --------------- Testing the EMbeddings ---------------
text = "This is a test document."

query_result = embeddings.embed_query(text)
query_result[0]

-0.03833858668804169

In [18]:
llm = ChatGoogleGenerativeAI(
    model="gemini-flash-lite-latest", google_api_key=GEMINI_API_KEY, temperature=0.7
)

In [19]:
# DEFINING THE CHROMA DB VECTOR STORE ----------------------------------------------------------------
vectorstore = Chroma.from_documents(
    md_header_splits, embeddings, persist_directory="./chroma-db"
)
vectorstore.persist()


Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given
  vectorstore.persist()


In [20]:
# QUERYING GEMINI LLM VIA RAG QA CHAIN --------------------------------------------------------------
qa_chain = RetrievalQA.from_chain_type(
    llm=llm, retriever=vectorstore.as_retriever(search_kwargs={"k": 5})
)
query = "What are the eligibility criteria in the Loan Advisory Policy?"
response = qa_chain.invoke({"query": query})
print(response["result"])

Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


The eligibility criteria for loans are generally based on credit score, income, and the purpose of the loan, as per the RBI-aligned policies.

Here are the specific eligibility criteria mentioned for different loan categories:

### General Eligibility Criteria for All Loans:
*   **Credit Score:** Typically **$\geq 670$** is preferred.
*   **Debt-to-Income Ratio (DTI):** Ideally **$< 50\%$**.
*   **Minimum Age:** **21 years** (though 18 years for education loans).
*   **Income Proof:** Banks require salary slips (3-6 months), ITR (2 years), and bank statements.

### Home Loans Eligibility:
*   **Income:** Salaried/Self-Employed with stable income (minimum **₹25,000/month**).
*   **Credit Score:** **$\geq 670$**.
*   **Property:** Must be in India; NRIs are eligible but restricted from certain countries.

### Personal Loans Eligibility:
*   **Age:** **21-60 years**.
*   **Employment/Income:** Employed with a minimum income of **₹15,000-₹25,000/month**.
*   **Credit Score:** **$\geq 700$*

In [21]:
# Streamlit UI Setup ---------------------------------------------------------------------------
st.set_page_config(page_title="RAG Chatbot", page_icon="🏦", layout="centered")
st.title("💬 RAG Chatbot with ChromaDB")
st.header("LoanBot - Learn about Bank Loan Policy")

# Initialize session state for chat history
if "messages" not in st.session_state:
    st.session_state.messages = []


2025-10-01 23:56:31.572 
  command:

    streamlit run c:\Users\yash5\anaconda3\envs\loanenv\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-10-01 23:56:31.572 Session state does not function when running a script without `streamlit run`


In [22]:
# LOADING STREAMLIT UI ---------------------------------------------------------------------------------------

with st.form("chat_form", clear_on_submit=True):
    user_input = st.text_input("Ask me anything:", "")
    submit = st.form_submit_button("Send")

if submit and user_input:
    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": user_input})

    # Get response from RAG QA
    with st.spinner("Thinking..."):
        result = qa_chain.run(user_input)  # changes qa_chain

    # Add bot response to chat history
    st.session_state.messages.append({"role": "assistant", "content": result})


for message in st.session_state.messages:
    if message["role"] == "user":
        st.markdown(f"**You:** {message['content']}")
    else:
        st.markdown(f"**Bot:** {message['content']}")


# Run the app
# if __name__ == "__main__":
#     st.run()


