**Creating Chunks**

In [4]:
!pip install langchain tiktoken pandas



In [29]:
import pandas as pd

df = pd.read_csv("/content/bom_loan_clean.csv")

print(df.head(2))

     loan_type                                                url  \
0    home_loan  https://bankofmaharashtra.bank.in/personal-ban...   
1  retail_loan     https://bankofmaharashtra.bank.in/retail-loans   

                                             content  \
0  Home About Us Locate Us Careers Contact Us ⚲ S...   
1  Home About Us Locate Us Careers Contact Us ⚲ S...   

                                     cleaned_content  \
0  Features & Benefits Documents Required Interes...   
1  Interest Rate in Personal loan accounts will a...   

                                    filtered_content  
0  Features & Benefits Documents Required Interes...  
1  Interest Rate in Personal loan accounts will a...  


In [30]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    separators = ["\n", ".", "!", "?", ":", ";"]
)

In [31]:
chunks=[]

for _, row in df.iterrows():
  content = str(row["filtered_content"])
  loan_type = row["loan_type"]

  for chunk in splitter.split_text(content):
        chunks.append({
            "loan_type": loan_type,
            "chunk": chunk.strip()
        })

chunks_df = pd.DataFrame(chunks)
print("Chunking complete. Total chunks:", len(chunks_df))
chunks_df.head(3)

Chunking complete. Total chunks: 110


Unnamed: 0,loan_type,chunk
0,home_loan,Features & Benefits Documents Required Interes...
1,home_loan,.Bank of Maharashtra Offers Lowest Interest Ra...
2,home_loan,.Proof of Identification : (any one) Election ...


In [32]:
import os

os.makedirs("data/processed", exist_ok=True)
output_path = "data/processed/bom_loan_chunks.csv"
chunks_df.to_csv(output_path, index=False)
print(f"File saved at {output_path}")

File saved at data/processed/bom_loan_chunks.csv


**Creating Emmbeddings**

In [33]:
df = pd.read_csv("data/processed/bom_loan_chunks.csv")
print(df.head())
print(f"Total chunks: {len(df)}")

   loan_type                                              chunk
0  home_loan  Features & Benefits Documents Required Interes...
1  home_loan  .Bank of Maharashtra Offers Lowest Interest Ra...
2  home_loan  .Proof of Identification : (any one) Election ...
3  home_loan  : (any one) Electricity Bill Election ID Card ...
4  home_loan  .Shop Establishment Act Tax Registration Copy ...
Total chunks: 110


In [34]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
df["embedding"] = df["chunk"].apply(lambda x: model.encode(x).tolist())

In [35]:
os.makedirs("data/vector_data", exist_ok=True)
df.to_pickle("data/vector_data/bom_with_embeddings.pkl")
print("Embeddings saved successfully")

Embeddings saved successfully


**Storing In Vector Database - ChromaDB**

In [9]:
!pip install chromadb --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.8/20.8 MB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.2/278.2 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m50.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.3/103.3 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.4/17.4 MB[0m [31m45.5 MB/s[0m eta [36m0:00:

In [36]:
import chromadb
from chromadb.utils import embedding_functions
import pandas as pd

client = chromadb.PersistentClient(path="data/chroma_db")

embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

In [37]:
df = pd.read_pickle("data/vector_data/bom_with_embeddings.pkl")

In [38]:
collection = client.get_or_create_collection(
    name="bom_loan_docs",
    embedding_function=embedding_func
)

In [39]:
collection.add(
    ids=[str(i) for i in range(len(df))],
    documents=df["chunk"].tolist(),
    metadatas=[{"loan_type":t} for t in df["loan_type"].tolist()],
)

print("All data added to chromadb")

All data added to chromadb


In [40]:
query = "What is the interest rate for education loans?"
results = collection.query(query_texts=[query], n_results=8)

for i, doc in enumerate(results["documents"][0]):
    print(f"\nResult {i+1}:")
    print(doc[:400])


Result 1:
.For List B & C category Institutes, a minimal 5% margin of the loan amount is required.Under Model Education Loan uptoRs.Does Bank of Maharashtra offers any special interest rates concessions for education loans.Yes, under Maha Scholar Education Loans, there is a concession of 0.10% in interest rate to Girl students.However, under Model Education Loan scheme, Bank offers interest concessions keep

Result 2:
.Check your eligibility, upload your documents and obtain the loan sanction.Education Loan Interest Rate 7.A * For more Interest Rates & Charges Click here EMI Calculator Education Loan EMI Calculator Course Period (Months) * Loan Amount * Year wise Amount Required in Year 1 Interest Rate ( % P

Result 3:
.Are there any processing fees on Bank of Maharashtra Education Loans.There is no processing fee irrespective of loan amount for Mahabank Scholar Loans for premier Institutes.For Mahabank Model Education Loans, there is a minimal Processing fee of 0.50% of the loan amou

**Integrating LightWeight LLM**

In [47]:
!pip install -q huggingface_hub
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: fineGrained).
The token `HUGGINGFACE_HUB_API_TOKEN` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active tok

In [48]:
from huggingface_hub import InferenceClient


hf_token = os.environ.get("HUGGINGFACE_HUB_API_TOKEN")


client = InferenceClient(
    model="mistralai/Mistral-7B-Instruct-v0.2",
    token=hf_token
)

context = "\n".join(results["documents"][0])
query = input("Enter your question\n")

prompt = f"""
You are a banking assistant for Bank of Maharashtra.
Based on the context below, answer the question accurately.
Only answer using the context provided, otherwise respond that information isn’t available.

Context:
{context}

Question: {query}

"""

response = client.chat_completion(
    messages=[{"role": "user", "content": prompt}],
    max_tokens=300,
    temperature=0.3
)

print("\nFinal Answer:")
print(response.choices[0].message["content"])

Enter your question
what is home loan

Final Answer:
 A home loan is a type of loan provided by Bank of Maharashtra for purchasing or constructing a house. It is not mentioned in the context provided as one of the variants of Education Loans offered by the bank. Therefore, the answer to the question is not available based on the context.
