## Importing Libraries

In [102]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import joblib

In [104]:
pip install pypandoc


Note: you may need to restart the kernel to use updated packages.


In [105]:
df = pd.read_csv("C:/Users/nidhi/OneDrive/Desktop/loan_data.csv")

In [106]:
df

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
1,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
2,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
3,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
4,LP001013,Male,Yes,0,Not Graduate,No,2333,1516.0,95.0,360.0,1.0,Urban,Y
...,...,...,...,...,...,...,...,...,...,...,...,...,...
376,LP002953,Male,Yes,3+,Graduate,No,5703,0.0,128.0,360.0,1.0,Urban,Y
377,LP002974,Male,Yes,0,Graduate,No,3232,1950.0,108.0,360.0,1.0,Rural,Y
378,LP002978,Female,No,0,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,Y
379,LP002979,Male,Yes,3+,Graduate,No,4106,0.0,40.0,180.0,1.0,Rural,Y


In [107]:
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
1,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
2,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
3,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
4,LP001013,Male,Yes,0,Not Graduate,No,2333,1516.0,95.0,360.0,1.0,Urban,Y


In [108]:
df.tail()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
376,LP002953,Male,Yes,3+,Graduate,No,5703,0.0,128.0,360.0,1.0,Urban,Y
377,LP002974,Male,Yes,0,Graduate,No,3232,1950.0,108.0,360.0,1.0,Rural,Y
378,LP002978,Female,No,0,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,Y
379,LP002979,Male,Yes,3+,Graduate,No,4106,0.0,40.0,180.0,1.0,Rural,Y
380,LP002990,Female,No,0,Graduate,Yes,4583,0.0,133.0,360.0,0.0,Semiurban,N


In [109]:
df.shape

(381, 13)

In [110]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 381 entries, 0 to 380
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Loan_ID            381 non-null    object 
 1   Gender             376 non-null    object 
 2   Married            381 non-null    object 
 3   Dependents         373 non-null    object 
 4   Education          381 non-null    object 
 5   Self_Employed      360 non-null    object 
 6   ApplicantIncome    381 non-null    int64  
 7   CoapplicantIncome  381 non-null    float64
 8   LoanAmount         381 non-null    float64
 9   Loan_Amount_Term   370 non-null    float64
 10  Credit_History     351 non-null    float64
 11  Property_Area      381 non-null    object 
 12  Loan_Status        381 non-null    object 
dtypes: float64(4), int64(1), object(8)
memory usage: 38.8+ KB


## Handling Missing Values

In [112]:
df.isnull().sum()

Loan_ID               0
Gender                5
Married               0
Dependents            8
Education             0
Self_Employed        21
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            0
Loan_Amount_Term     11
Credit_History       30
Property_Area         0
Loan_Status           0
dtype: int64

In [113]:
df.isnull().mean()*100

Loan_ID              0.000000
Gender               1.312336
Married              0.000000
Dependents           2.099738
Education            0.000000
Self_Employed        5.511811
ApplicantIncome      0.000000
CoapplicantIncome    0.000000
LoanAmount           0.000000
Loan_Amount_Term     2.887139
Credit_History       7.874016
Property_Area        0.000000
Loan_Status          0.000000
dtype: float64

In [114]:
df = df.drop('Loan_ID',axis =1)

In [115]:
df.head()

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
1,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
2,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
3,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
4,Male,Yes,0,Not Graduate,No,2333,1516.0,95.0,360.0,1.0,Urban,Y


In [116]:
df = df.dropna(subset = ['Gender', 'Dependents', 'Loan_Amount_Term'])

In [120]:
df.isnull().sum()

Gender                0
Married               0
Dependents            0
Education             0
Self_Employed        20
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            0
Loan_Amount_Term      0
Credit_History       30
Property_Area         0
Loan_Status           0
dtype: int64

In [125]:
df['Self_Employed'].unique()

array(['No', 'Yes', nan], dtype=object)

In [130]:
df['Self_Employed'].mode()[0]

'No'

In [134]:
df['Credit_History'].unique()

array([ 1., nan,  0.])

In [135]:
df['Credit_History'].mode()[0]

np.float64(1.0)

In [139]:
df['Self_Employed'] = df['Self_Employed'].fillna(df['Self_Employed'].mode()[0])
df['Credit_History'] = df['Credit_History'].fillna(df['Credit_History'].mode()[0])


In [141]:
df.isnull().sum()

Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

In [143]:
df['Dependents'].unique()

array(['1', '0', '2', '3+'], dtype=object)

In [145]:
df['Dependents'] = df['Dependents'].replace('3+', '4')


In [147]:
df['Dependents'].unique()

array(['1', '0', '2', '4'], dtype=object)

In [149]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 358 entries, 0 to 380
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Gender             358 non-null    object 
 1   Married            358 non-null    object 
 2   Dependents         358 non-null    object 
 3   Education          358 non-null    object 
 4   Self_Employed      358 non-null    object 
 5   ApplicantIncome    358 non-null    int64  
 6   CoapplicantIncome  358 non-null    float64
 7   LoanAmount         358 non-null    float64
 8   Loan_Amount_Term   358 non-null    float64
 9   Credit_History     358 non-null    float64
 10  Property_Area      358 non-null    object 
 11  Loan_Status        358 non-null    object 
dtypes: float64(4), int64(1), object(7)
memory usage: 36.4+ KB


In [151]:
encoding = {
    'Gender': {'Male':1 , 'Female': 0}, 
    'Married': {'Yes': 1, 'No': 0},
    'Dependents': {'0':0, '1':1, '2': 2, '4': 4},
    'Education': {'Graduate': 1, 'Not Graduate': 0},
    'Self_Employed': {'Yes': 1, 'No': 0},
    'Property_Area': {'Rural': 0, 'Semiurban': 2, 'Urban': 1},
    'Loan_Status': {'Y': 1, 'N': 0}
}

In [153]:
df = df.replace(encoding).infer_objects(copy=False)


  df = df.replace(encoding).infer_objects(copy=False)


In [155]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 358 entries, 0 to 380
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Gender             358 non-null    int64  
 1   Married            358 non-null    int64  
 2   Dependents         358 non-null    int64  
 3   Education          358 non-null    int64  
 4   Self_Employed      358 non-null    int64  
 5   ApplicantIncome    358 non-null    int64  
 6   CoapplicantIncome  358 non-null    float64
 7   LoanAmount         358 non-null    float64
 8   Loan_Amount_Term   358 non-null    float64
 9   Credit_History     358 non-null    float64
 10  Property_Area      358 non-null    int64  
 11  Loan_Status        358 non-null    int64  
dtypes: float64(4), int64(8)
memory usage: 36.4 KB


In [157]:
X = df.drop('Loan_Status', axis = 1)
y = df['Loan_Status']

In [159]:
df.head()

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,1,1,1,1,0,4583,1508.0,128.0,360.0,1.0,0,0
1,1,1,0,1,1,3000,0.0,66.0,360.0,1.0,1,1
2,1,1,0,0,0,2583,2358.0,120.0,360.0,1.0,1,1
3,1,0,0,1,0,6000,0.0,141.0,360.0,1.0,1,1
4,1,1,0,0,0,2333,1516.0,95.0,360.0,1.0,1,1


In [161]:
num_cols = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term']
scaler = StandardScaler()
X[num_cols] = scaler.fit_transform(X[num_cols])

In [163]:
X.head()

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area
0,1,1,1,1,0,0.71163,0.092069,0.80598,0.285826,1.0,0
1,1,1,0,1,1,-0.398856,-0.539332,-1.350425,0.285826,1.0,1
2,1,1,0,0,0,-0.691384,0.447965,0.527735,0.285826,1.0,1
3,1,0,0,1,0,1.705666,-0.539332,1.25813,0.285826,1.0,1
4,1,1,0,0,0,-0.866761,0.095418,-0.341784,0.285826,1.0,1


In [165]:
def evaluate_model(model):
    X_train, X_test, y_train, y_test  = train_test_split(X, y, test_size = 0.2, random_state = 42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    cross_val = cross_val_score(model, X, y, cv=5)
    avg_cross_val = np.mean(cross_val)
    print(f"{model.__class__.__name__} - Accuarcy : {accuracy: .2f} , Cross-Val-Score: {avg_cross_val: .2f}")
    return avg_cross_val

In [167]:
# List of models to evaluate
models = [
    LogisticRegression(),
    svm.SVC(),
    RandomForestClassifier()
]


In [169]:

# Evaluate each model
model_score = {model.__class__.__name__: evaluate_model(model) for model in models}


LogisticRegression - Accuarcy :  0.85 , Cross-Val-Score:  0.84
SVC - Accuarcy :  0.85 , Cross-Val-Score:  0.83
RandomForestClassifier - Accuarcy :  0.85 , Cross-Val-Score:  0.84


In [170]:
evaluate_model(LogisticRegression())


LogisticRegression - Accuarcy :  0.85 , Cross-Val-Score:  0.84


np.float64(0.8354068857589985)

## Hyperparameter Tuning

In [172]:
def tune_model(model, param_grid):
    tuner = RandomizedSearchCV(model, param_grid, cv=5, n_iter=20, verbose=1, random_state=42)
    tuner.fit(X, y)
    print(f"Best Score for {model.__class__.__name__}: {tuner.best_score_:.2f}")
    print(f"Best Parameters for {model.__class__.__name__}: {tuner.best_params_}")
    return tuner.best_estimator_


In [173]:
log_reg_grid = {'C': np.logspace(-4, 4, 20), "solver": ["liblinear"]}
svc_grid = {'C': [0.25, 0.50, 0.75, 1], "kernel": ['linear']}
rf_grid = {
    'n_estimators': np.arange(10, 1000, 10),
    'max_features': ['log2', 'sqrt'], 
    'max_depth': [None, 3, 5, 10, 20, 30],
    'min_samples_split': [2, 5, 20, 50, 100],
    'min_samples_leaf': [1, 2, 5, 10]
}


In [174]:
best_log_reg = tune_model(LogisticRegression(), log_reg_grid)
best_svc_reg = tune_model(svm.SVC(), svc_grid)
best_rf = tune_model(RandomForestClassifier(), rf_grid)



Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Score for LogisticRegression: 0.84
Best Parameters for LogisticRegression: {'solver': 'liblinear', 'C': np.float64(1.623776739188721)}
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best Score for SVC: 0.84
Best Parameters for SVC: {'kernel': 'linear', 'C': 0.25}
Fitting 5 folds for each of 20 candidates, totalling 100 fits




Best Score for RandomForestClassifier: 0.84
Best Parameters for RandomForestClassifier: {'n_estimators': np.int64(930), 'min_samples_split': 50, 'min_samples_leaf': 10, 'max_features': 'sqrt', 'max_depth': 30}


In [176]:
final_model = best_rf


In [180]:
joblib.dump(final_model, 'loan_status_predictor.pkl')

['loan_status_predictor.pkl']

In [181]:
# Prediction System

sample_data = pd.DataFrame({
    'Gender': [1],
    'Married': [1],
    'Dependents': [2],
    'Education': [0],
    'Self_Employed': [0],
    'ApplicantIncome': [1000],
    'CoapplicantIncome': [0.0],
    'LoanAmount': [150],
    'Loan_Amount_Term': [180],
    'Credit_History': [0],
    'Property_Area': [1]
})

sample_data[num_cols] = scaler.transform(sample_data[num_cols])
loaded_model = joblib.load('loan_status_predictor.pkl')
prediction = loaded_model.predict(sample_data)

result = "Loan Approved" if prediction[0] == 1 else "Loan Not Approved"
print(f"\nPrediction Result: {result}")


Prediction Result: Loan Not Approved


In [182]:
# Prediction System

sample_data = pd.DataFrame({
    'Gender': [1],
    'Married': [1],
    'Dependents': [2],
    'Education': [0],
    'Self_Employed': [0],
    'ApplicantIncome': [1000],
    'CoapplicantIncome': [0.0],
    'LoanAmount': [150],
    'Loan_Amount_Term': [180],
    'Credit_History': [0],
    'Property_Area': [1]
})

sample_data[num_cols] = scaler.transform(sample_data[num_cols])
loaded_model = joblib.load('loan_status_predictor.pkl')
prediction = loaded_model.predict(sample_data)

result = "Loan Approved" if prediction[0] == 1 else "Loan Not Approved"
print(f"\nPrediction Result: {result}")


Prediction Result: Loan Not Approved


In [1]:
import pandas as pd
import joblib
import ipywidgets as widgets
from IPython.display import display
from sklearn.preprocessing import StandardScaler

# Load the trained model and scaler
loaded_model = joblib.load('loan_status_predictor.pkl')
scaler = joblib.load('scaler.pkl')  # Ensure you saved your scaler

# Create interactive input fields
gender = widgets.Dropdown(options=[( "Male", 1), ("Female", 0)], description="Gender:")
married = widgets.Dropdown(options=[("Yes", 1), ("No", 0)], description="Married:")
dependents = widgets.Dropdown(options=[("0", 0), ("1", 1), ("2", 2), ("3+", 3)], description="Dependents:")
education = widgets.Dropdown(options=[("Graduate", 0), ("Not Graduate", 1)], description="Education:")
self_employed = widgets.Dropdown(options=[("Yes", 1), ("No", 0)], description="Self-Employed:")
applicant_income = widgets.FloatText(description="Applicant Income (₹):")
coapplicant_income = widgets.FloatText(description="Coapplicant Income (₹):")
loan_amount = widgets.FloatText(description="Loan Amount (₹ thousands):")
loan_term = widgets.IntSlider(value=180, min=12, max=360, step=12, description="Loan Term:")
credit_history = widgets.Dropdown(options=[("Good", 1), ("Bad", 0)], description="Credit History:")
property_area = widgets.Dropdown(options=[("Urban", 2), ("Semiurban", 1), ("Rural", 0)], description="Property Area:")

# Button to predict
predict_button = widgets.Button(description="Check Loan Eligibility")
output = widgets.Output()

# Function to predict loan approval
def predict_loan(_):
    with output:
        output.clear_output()  # Clear previous results
        
        # Store inputs in DataFrame
        sample_data = pd.DataFrame({
            'Gender': [gender.value],
            'Married': [married.value],
            'Dependents': [dependents.value],
            'Education': [education.value],
            'Self_Employed': [self_employed.value],
            'ApplicantIncome': [applicant_income.value],
            'CoapplicantIncome': [coapplicant_income.value],
            'LoanAmount': [loan_amount.value],
            'Loan_Amount_Term': [loan_term.value],
            'Credit_History': [credit_history.value],
            'Property_Area': [property_area.value]
        })
        
        # Scale numerical columns
        num_cols = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term']
        sample_data[num_cols] = scaler.transform(sample_data[num_cols])
        
        # Make prediction
        prediction = loaded_model.predict(sample_data)
        
        # Print result
        result = "✅ Loan Approved!" if prediction[0] == 1 else "❌ Loan Not Approved."
        display(result)

# Attach function to button
predict_button.on_click(predict_loan)

# Display all widgets
display(gender, married, dependents, education, self_employed,
        applicant_income, coapplicant_income, loan_amount, loan_term, 
        credit_history, property_area, predict_button, output)


Dropdown(description='Gender:', options=(('Male', 1), ('Female', 0)), value=1)

Dropdown(description='Married:', options=(('Yes', 1), ('No', 0)), value=1)

Dropdown(description='Dependents:', options=(('0', 0), ('1', 1), ('2', 2), ('3+', 3)), value=0)

Dropdown(description='Education:', options=(('Graduate', 0), ('Not Graduate', 1)), value=0)

Dropdown(description='Self-Employed:', options=(('Yes', 1), ('No', 0)), value=1)

FloatText(value=0.0, description='Applicant Income (₹):')

FloatText(value=0.0, description='Coapplicant Income (₹):')

FloatText(value=0.0, description='Loan Amount (₹ thousands):')

IntSlider(value=180, description='Loan Term:', max=360, min=12, step=12)

Dropdown(description='Credit History:', options=(('Good', 1), ('Bad', 0)), value=1)

Dropdown(description='Property Area:', options=(('Urban', 2), ('Semiurban', 1), ('Rural', 0)), value=2)

Button(description='Check Loan Eligibility', style=ButtonStyle())

Output()

In [7]:
joblib.dump(loaded_model, 'loan_model.joblib')
print("Model saved successfully!")


Model saved successfully!
