In [1]:
# --- STEP 1: Import Libraries ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns



In [2]:
# --- STEP 2: Load Data ---
data = pd.read_csv('C:/Users/ADMIN/Downloads/train_u6lujuX_CVtuZ9i (1).csv')
print("Shape of dataset:", data.shape)
data.head()

Shape of dataset: (614, 13)


Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


# Clean Missing Values and Encode Categories

In [9]:
# --- STEP 3: Handle Missing Values ---
data.fillna({
    'Gender': data['Gender'].mode()[0],
    'Married': data['Married'].mode()[0],
    'Dependents': data['Dependents'].mode()[0],
    'Self_Employed': data['Self_Employed'].mode()[0],
    'LoanAmount': data['LoanAmount'].median(),
    'Loan_Amount_Term': data['Loan_Amount_Term'].mode()[0],
    'Credit_History': data['Credit_History'].mode()[0]
}, inplace=True)

# --- STEP 3.1: Fix Dependents column ---
data['Dependents'] = data['Dependents'].replace('3+', 3).astype(float)

# --- STEP 4: Feature Engineering ---
data['Total_Income'] = data['ApplicantIncome'] + data['CoapplicantIncome']
data['LoanAmount_log'] = np.log(data['LoanAmount'])
data['Income_to_Loan'] = data['Total_Income'] / data['LoanAmount']


# Encode Categorical Columns

In [11]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
for col in ['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area', 'Loan_Status']:
    data[col] = le.fit_transform(data[col])

# üìä PHASE 3: Model Training and Evaluation

Create another notebook:
notebooks/02_model_training.ipynb

In [12]:
# --- STEP 1: Import libraries ---
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# --- STEP 2: Split data ---
X = data.drop(['Loan_ID', 'Loan_Status'], axis=1)
y = data['Loan_Status']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- STEP 3: Train model ---
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# --- STEP 4: Evaluate ---
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7967479674796748

Confusion Matrix:
 [[23 20]
 [ 5 75]]

Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.53      0.65        43
           1       0.79      0.94      0.86        80

    accuracy                           0.80       123
   macro avg       0.81      0.74      0.75       123
weighted avg       0.80      0.80      0.78       123



# üåê PHASE 4: Streamlit App Deployment

Create a file:
src/app.py

Paste this code:

In [15]:
pip install streamlit

Collecting streamlit
  Downloading streamlit-1.12.0-py2.py3-none-any.whl (9.1 MB)
Collecting semver
  Downloading semver-3.0.4-py3-none-any.whl (17 kB)
Collecting gitpython!=3.1.19
  Downloading gitpython-3.1.45-py3-none-any.whl (208 kB)
Collecting pydeck>=0.1.dev5
  Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
Collecting blinker>=1.0.0
  Downloading blinker-1.9.0-py3-none-any.whl (8.5 kB)
Collecting rich>=10.11.0
  Downloading rich-14.2.0-py3-none-any.whl (243 kB)
Collecting altair>=3.2.0
  Downloading altair-5.5.0-py3-none-any.whl (731 kB)
Collecting protobuf<4,>=3.12
  Downloading protobuf-3.20.3-cp39-cp39-win_amd64.whl (904 kB)
Collecting cachetools>=4.0
  Downloading cachetools-6.2.1-py3-none-any.whl (11 kB)
Collecting tzlocal>=1.1
  Downloading tzlocal-5.3.1-py3-none-any.whl (18 kB)
Collecting pympler>=0.9
  Downloading Pympler-1.1-py3-none-any.whl (165 kB)
Collecting validators>=0.2
  Downloading validators-0.35.0-py3-none-any.whl (44 kB)
Collecting narwhals>=1.14.2
  

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'c:\\programdata\\anaconda3\\lib\\site-packages\\pygments-2.10.0.dist-info\\AUTHORS'
Consider using the `--user` option or check the permissions.



In [16]:
import streamlit as st
import numpy as np
import pickle

model = pickle.load(open('model.pkl', 'rb'))

st.title("üè¶ Bank Loan Eligibility Predictor")

gender = st.selectbox("Gender", ["Male", "Female"])
married = st.selectbox("Married", ["Yes", "No"])
education = st.selectbox("Education", ["Graduate", "Not Graduate"])
self_employed = st.selectbox("Self Employed", ["Yes", "No"])
applicant_income = st.number_input("Applicant Income")
coapplicant_income = st.number_input("Coapplicant Income")
loan_amount = st.number_input("Loan Amount")
loan_term = st.number_input("Loan Amount Term", value=360)
credit_history = st.selectbox("Credit History (1=Good, 0=Bad)", [1.0, 0.0])
property_area = st.selectbox("Property Area", ["Urban", "Semiurban", "Rural"])

if st.button("Predict Eligibility"):
    total_income = applicant_income + coapplicant_income
    loan_amount_log = np.log(loan_amount + 1)
    income_to_loan = total_income / (loan_amount + 1)
    
    gender = 1 if gender == "Male" else 0
    married = 1 if married == "Yes" else 0
    education = 1 if education == "Graduate" else 0
    self_employed = 1 if self_employed == "Yes" else 0
    property_map = {"Urban": 2, "Semiurban": 1, "Rural": 0}
    property_area = property_map[property_area]

    features = np.array([[gender, married, education, self_employed,
                          applicant_income, coapplicant_income, loan_amount,
                          loan_term, credit_history, property_area,
                          total_income, loan_amount_log, income_to_loan]])

    prediction = model.predict(features)

    st.success("‚úÖ Loan Approved!" if prediction[0] == 1 else "‚ùå Loan Not Approved")


ModuleNotFoundError: No module named 'streamlit'


# Run locally with:

In [8]:
cd src
streamlit run app.py

SyntaxError: invalid syntax (Temp/ipykernel_16432/2215295215.py, line 1)