In [19]:
!pip install transformers scikit-learn pandas




In [20]:
from IPython.display import Markdown, display

def show_header():
    display(Markdown("""
# 🤖 Loan Approval Q&A Chatbot using RAG
Welcome to a Retrieval-Augmented Generation (RAG) chatbot that answers questions about loan approvals.

- 🔍 Retrieves facts using TF-IDF from the dataset
- 🧠 Generates answers using Google's FLAN-T5 model
- 📊 Based on real-world loan approval data from Kaggle

---
"""))

show_header()



# 🤖 Loan Approval Q&A Chatbot using RAG
Welcome to a Retrieval-Augmented Generation (RAG) chatbot that answers questions about loan approvals.

- 🔍 Retrieves facts using TF-IDF from the dataset  
- 🧠 Generates answers using Google's FLAN-T5 model  
- 📊 Based on real-world loan approval data from Kaggle  

---


In [21]:
import pandas as pd

# Load data
df = pd.read_csv("Training Dataset.csv")

# Fix fillna warning
df = df.ffill()

# Normalize Loan_Status
df['Loan_Status'] = df['Loan_Status'].astype(str).str.upper()

# Generate knowledge base
total = len(df)
if 'Y' in df['Loan_Status'].unique():
    approval_rate = df['Loan_Status'].value_counts(normalize=True)['Y'] * 100
else:
    approval_rate = 0

# Check if Credit_History is clean
if 'Credit_History' in df.columns:
    credit_impact = df.groupby('Credit_History')['Loan_Status'].value_counts(normalize=True).unstack()
else:
    credit_impact = pd.DataFrame()

knowledge_base = []
knowledge_base.append(f"There are {total} loan applications.")
knowledge_base.append(f"The overall loan approval rate is {approval_rate:.2f}%.")

# Safe handling for credit history
try:
    ch1 = credit_impact.loc[1, 'Y'] * 100
    ch0 = credit_impact.loc[0, 'Y'] * 100
    knowledge_base.append(f"Credit history = 1 approval rate: {ch1:.2f}%.")
    knowledge_base.append(f"Credit history = 0 approval rate: {ch0:.2f}%.")
except:
    knowledge_base.append("Could not compute approval rate by credit history.")

knowledge_base.append("Higher income usually leads to higher approval chances.")
knowledge_base.append("Self-employed applicants have slightly lower approval rates.")
knowledge_base.append("Urban and semiurban areas have higher approval rates than rural.")


In [23]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# TF-IDF setup
vectorizer = TfidfVectorizer()
doc_vectors = vectorizer.fit_transform(knowledge_base)

def retrieve_docs(query, k=2):
    query_vec = vectorizer.transform([query])
    similarities = cosine_similarity(query_vec, doc_vectors).flatten()
    top_k = similarities.argsort()[::-1][:k]
    return [knowledge_base[i] for i in top_k]


In [24]:
from transformers import pipeline

# Load FLAN-T5 model
generator = pipeline("text2text-generation", model="google/flan-t5-small")

def generate_answer(query, context):
    prompt = f"Answer the question: {query}\nContext: {context}"
    result = generator(prompt, max_length=100)[0]['generated_text']
    return result


Device set to use cpu


In [25]:
import time
from IPython.display import Markdown, display

def print_typing(text, delay=0.02):
    """Simulate typing effect"""
    for char in text:
        print(char, end='', flush=True)
        time.sleep(delay)
    print()

def chat():
    display(Markdown("### 🤖 Welcome to the **Loan Approval Q&A Chatbot**!"))
    print("💬 You can ask questions like:")
    print("  • What affects loan approval?")
    print("  • How does credit history impact approval?")
    print("  • What role does income play in loan approval?\n")
    print("Type 'exit' to quit at any time.\n")

    session_num = 1
    chat_log = []

    while True:
        query = input(f"🧑‍💼 You [{session_num}]: ").strip()
        if not query:
            print("⚠️ Please enter a valid question.")
            continue

        if query.lower() == 'exit':
            print("\n👋 Thank you for using the chatbot. Goodbye!")
            break

        print("🤖 Bot is thinking...\n")
        context = " ".join(retrieve_docs(query))
        answer = generate_answer(query, context)

        print_typing(f"🤖 Chatbot [{session_num}]: {answer}")

        chat_log.append((query, answer))
        session_num += 1



In [27]:
import ipywidgets as widgets
from IPython.display import display, clear_output

# Input box
text_box = widgets.Text(
    placeholder='Ask your loan question here...',
    description='You:',
    layout=widgets.Layout(width='80%')
)

# Output area
output_box = widgets.Output()

# Display welcome message
with output_box:
    clear_output()
    print("🤖 Welcome to the Loan Approval Chatbot!")
    print("💬 Example questions:")
    print("   • What affects loan approval?")
    print("   • How does credit history impact approval?")
    print("   • What role does income play?")
    print("   • Does rural area impact approval?\n")
    print("📝 Type your question and press Enter.\n")

# Callback when user hits Enter
def on_submit(widget):
    query = widget.value # Access the value directly from the widget
    if query.lower() == 'exit':
        with output_box:
            print("👋 Goodbye!")
        return

    with output_box:
        print(f"🧑 You: {query}")
        print("🤖 Thinking...")
        context = " ".join(retrieve_docs(query))
        answer = generate_answer(query, context)
        print(f"🤖 Bot: {answer}\n")

    text_box.value = ''

text_box.on_submit(on_submit)

# Display both input and output
display(text_box, output_box)

Text(value='', description='You:', layout=Layout(width='80%'), placeholder='Ask your loan question here...')

Output()