🔧 Cell 1: Install Required Packages

In [1]:
# 📦 Install dependencies (only once)
!pip install openai pandas faiss-cpu sentence-transformers kagglehub ipywidgets --quiet



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [22]:
!python -m pip install --upgrade pip


Collecting pip
  Using cached pip-25.1.1-py3-none-any.whl.metadata (3.6 kB)
Using cached pip-25.1.1-py3-none-any.whl (1.8 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 25.0.1
    Uninstalling pip-25.0.1:
      Successfully uninstalled pip-25.0.1
Successfully installed pip-25.1.1


🧠 Cell 2: Import Libraries

In [2]:
# 🧠 Import core libraries
import os
import warnings
import pandas as pd
import numpy as np
import openai
import faiss
import kagglehub
from sentence_transformers import SentenceTransformer
from IPython.display import display, Markdown

# Disable HuggingFace symlink warnings
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
warnings.filterwarnings("ignore", category=UserWarning, module='tqdm')


🔐 Cell 3: Load OpenAI API Key

In [39]:
import openai

# ✅ Direct assignment of API key
openai.api_key = "a06bc67fafcc0223852ae467c341b4ab"

# Optional: test it's working



📂 Cell 4: Load Dataset from Kaggle

In [29]:
# 📂 Download and load Kaggle dataset
dataset_path = kagglehub.dataset_download("sonalisingh1411/loan-approval-prediction")
csv_file = os.path.join(dataset_path, "Training Dataset.csv")

df = pd.read_csv(csv_file)
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
df.head()


Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
1,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
2,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
3,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
4,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y


📄 Cell 5: Format Dataset into Text Documents

In [30]:
# 📄 Convert each row into a plain-text document
documents = []
for idx, row in df.iterrows():
    doc = "\n".join([f"{col}: {row[col]}" for col in df.columns])
    documents.append(doc)


🧬 Cell 6: Embed Documents using SentenceTransformer

In [31]:
# 🧬 Load embedding model and encode documents
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embeddings = model.encode(documents, show_progress_bar=True)


Batches:   0%|          | 0/15 [00:00<?, ?it/s]

📊 Cell 7: Create FAISS Index

In [32]:
# 📊 Build FAISS vector index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))


🔍 Cell 8: Define Retrieval Function

In [33]:
# 🔍 Retrieve top-k similar documents
def retrieve(query, k=3):
    query_vec = model.encode([query])
    distances, indices = index.search(np.array(query_vec), k)
    return [documents[i] for i in indices[0]]


💬 Cell 9: Generate Answer from OpenAI

In [40]:
from openai import OpenAI

# Create a client instance once (do this outside the function if possible)
client = OpenAI(api_key = "a06bc67fafcc0223852ae467c341b4ab")  # Replace with your actual API key

# 💬 Generate a natural language answer from retrieved context
def generate_answer(question):
    context = "\n\n".join(retrieve(question))
    messages = [
        {
            "role": "system",
            "content": "You are a helpful assistant answering loan-related queries based on the following applicant data."
        },
        {
            "role": "user",
            "content": f"Context:\n{context}\n\nQuestion:\n{question}"
        }
    ]
    try:
        res = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=messages,
            temperature=0.3
        )
        return res.choices[0].message.content
    except Exception as e:
        return f"Error from OpenAI: {e}"


🧾 Cell 10: Ask Questions and Display Answers

In [41]:
# 🧾 Ask a question and view the response in Markdown
def ask(question):
    answer = generate_answer(question)
    display(Markdown(f"**Q:** {question}\n\n**A:** {answer}"))


🧪 Cell 11: Example Questions to Try

In [42]:
# 🧪 Try asking some questions
ask("What are common loan amounts for applicants with high income?")
ask("Tell me about applicants who got their loans approved.")
ask("Why are some loans rejected?")


**Q:** What are common loan amounts for applicants with high income?

**A:** Error from OpenAI: Error code: 401 - {'error': {'message': 'Incorrect API key provided: a06bc67f********************b4ab. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

**Q:** Tell me about applicants who got their loans approved.

**A:** Error from OpenAI: Error code: 401 - {'error': {'message': 'Incorrect API key provided: a06bc67f********************b4ab. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

**Q:** Why are some loans rejected?

**A:** Error from OpenAI: Error code: 401 - {'error': {'message': 'Incorrect API key provided: a06bc67f********************b4ab. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}