In [1]:
import pandas as pd
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA

In [2]:
# === Load the Dataset
df = pd.read_csv("C:\\Users\\Saksham Kaushik\\Downloads\\Training Dataset.csv")

In [3]:
# ===  Convert Rows to Text Documents ===
documents = []
for idx, row in df.iterrows():
    content = (
        f"Loan Application:\n"
        f"- Gender: {row['Gender']}\n"
        f"- Married: {row['Married']}\n"
        f"- Dependents: {row['Dependents']}\n"
        f"- Education: {row['Education']}\n"
        f"- Self Employed: {row['Self_Employed']}\n"
        f"- Income: {row['ApplicantIncome']} + {row['CoapplicantIncome']}\n"
        f"- Loan Amount: {row['LoanAmount']}\n"
        f"- Term: {row['Loan_Amount_Term']}\n"
        f"- Credit History: {row['Credit_History']}\n"
        f"- Property Area: {row['Property_Area']}\n"
        f"- Loan Status: {'Approved' if row['Loan_Status'] == 'Y' else 'Rejected'}"
    )
    documents.append(Document(page_content=content))

In [4]:
# === Split Text ===
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.split_documents(documents)

In [5]:
# === Embed + Store in FAISS ===
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embedding)

  embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


In [6]:
# === Load HuggingFace Language Model ===
model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=256)
llm = HuggingFacePipeline(pipeline=pipe)

Device set to use cpu
  llm = HuggingFacePipeline(pipeline=pipe)


In [7]:
# === Build the Retrieval-Augmented Generation Chain ===
retriever = vectorstore.as_retriever()
rag_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

In [9]:
# === Chat Interface ===
print("💬 RAG Q&A Chatbot is ready! Ask about the loan data (type 'exit' to quit).")
while True:
    query = input("\nYou: ")
    if query.lower() in ["exit", "quit"]:
        print("Have a nice day!")
        break
    result = rag_chain.run(query)
    print("🤖 Answer:", result)

💬 RAG Q&A Chatbot is ready! Ask about the loan data (type 'exit' to quit).



You:  What is the loan status of the applicant with high income?


🤖 Answer: No - Education: Graduate - Self Employed - No - Income: 8300 + 0.0 - Loan Amount: 152.0 - Term: 300.0 - Credit History: 0.0 - Property Area: Semiurban - Loan Status: Rejected



You:  exit


Have a nice day!
