In [3]:
import pandas as pd
import numpy as np
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras import Sequential
from keras.layers import Dense, Input
from keras.initializers import he_normal, glorot_normal

In [4]:
dataset = pd.read_csv('bankloan.csv')
dataset

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0.0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1.0,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0.0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0.0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0.0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,LP002978,Female,No,0.0,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,Y
610,LP002979,Male,Yes,3.0,Graduate,No,4106,0.0,40.0,180.0,1.0,Rural,Y
611,LP002983,Male,Yes,1.0,Graduate,No,8072,240.0,253.0,360.0,1.0,Urban,Y
612,LP002984,Male,Yes,2.0,Graduate,No,7583,0.0,187.0,360.0,1.0,Urban,Y


In [5]:
dataset = dataset.dropna()
dataset

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,LP001003,Male,Yes,1.0,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0.0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0.0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0.0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
5,LP001011,Male,Yes,2.0,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y
...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,LP002978,Female,No,0.0,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,Y
610,LP002979,Male,Yes,3.0,Graduate,No,4106,0.0,40.0,180.0,1.0,Rural,Y
611,LP002983,Male,Yes,1.0,Graduate,No,8072,240.0,253.0,360.0,1.0,Urban,Y
612,LP002984,Male,Yes,2.0,Graduate,No,7583,0.0,187.0,360.0,1.0,Urban,Y


In [6]:
dataset.isna().any()

Loan_ID              False
Gender               False
Married              False
Dependents           False
Education            False
Self_Employed        False
ApplicantIncome      False
CoapplicantIncome    False
LoanAmount           False
Loan_Amount_Term     False
Credit_History       False
Property_Area        False
Loan_Status          False
dtype: bool

In [7]:
dataset = dataset.drop(columns= 'Loan_ID')
dataset

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,Male,Yes,1.0,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,Male,Yes,0.0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,Male,Yes,0.0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,Male,No,0.0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
5,Male,Yes,2.0,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y
...,...,...,...,...,...,...,...,...,...,...,...,...
609,Female,No,0.0,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,Y
610,Male,Yes,3.0,Graduate,No,4106,0.0,40.0,180.0,1.0,Rural,Y
611,Male,Yes,1.0,Graduate,No,8072,240.0,253.0,360.0,1.0,Urban,Y
612,Male,Yes,2.0,Graduate,No,7583,0.0,187.0,360.0,1.0,Urban,Y


In [8]:
dataset['LoanAmount'] = (dataset['LoanAmount'] * 1000).astype(int)
dataset

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,Male,Yes,1.0,Graduate,No,4583,1508.0,128000,360.0,1.0,Rural,N
2,Male,Yes,0.0,Graduate,Yes,3000,0.0,66000,360.0,1.0,Urban,Y
3,Male,Yes,0.0,Not Graduate,No,2583,2358.0,120000,360.0,1.0,Urban,Y
4,Male,No,0.0,Graduate,No,6000,0.0,141000,360.0,1.0,Urban,Y
5,Male,Yes,2.0,Graduate,Yes,5417,4196.0,267000,360.0,1.0,Urban,Y
...,...,...,...,...,...,...,...,...,...,...,...,...
609,Female,No,0.0,Graduate,No,2900,0.0,71000,360.0,1.0,Rural,Y
610,Male,Yes,3.0,Graduate,No,4106,0.0,40000,180.0,1.0,Rural,Y
611,Male,Yes,1.0,Graduate,No,8072,240.0,253000,360.0,1.0,Urban,Y
612,Male,Yes,2.0,Graduate,No,7583,0.0,187000,360.0,1.0,Urban,Y


In [9]:
dataset['Loan_Status'].value_counts()

Loan_Status
Y    332
N    148
Name: count, dtype: int64

Labelling Words with 1 and 0 s

In [10]:
pre_y = dataset['Loan_Status']
pre_x = dataset.drop('Loan_Status', axis=1)
dm_X = pd.get_dummies(pre_x)
dm_Y = pre_y.map(dict(Y=1, N=0)) # dataset.replace({pre_y: {'Y': 1, 'N': 0}})

dm_X

Unnamed: 0,Dependents,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Gender_Female,Gender_Male,Married_No,Married_Yes,Education_Graduate,Education_Not Graduate,Self_Employed_No,Self_Employed_Yes,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban
1,1.0,4583,1508.0,128000,360.0,1.0,False,True,False,True,True,False,True,False,True,False,False
2,0.0,3000,0.0,66000,360.0,1.0,False,True,False,True,True,False,False,True,False,False,True
3,0.0,2583,2358.0,120000,360.0,1.0,False,True,False,True,False,True,True,False,False,False,True
4,0.0,6000,0.0,141000,360.0,1.0,False,True,True,False,True,False,True,False,False,False,True
5,2.0,5417,4196.0,267000,360.0,1.0,False,True,False,True,True,False,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,0.0,2900,0.0,71000,360.0,1.0,True,False,True,False,True,False,True,False,True,False,False
610,3.0,4106,0.0,40000,180.0,1.0,False,True,False,True,True,False,True,False,True,False,False
611,1.0,8072,240.0,253000,360.0,1.0,False,True,False,True,True,False,True,False,False,False,True
612,2.0,7583,0.0,187000,360.0,1.0,False,True,False,True,True,False,True,False,False,False,True


In [11]:
dm_X = dm_X.astype(int)
dm_X

Unnamed: 0,Dependents,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Gender_Female,Gender_Male,Married_No,Married_Yes,Education_Graduate,Education_Not Graduate,Self_Employed_No,Self_Employed_Yes,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban
1,1,4583,1508,128000,360,1,0,1,0,1,1,0,1,0,1,0,0
2,0,3000,0,66000,360,1,0,1,0,1,1,0,0,1,0,0,1
3,0,2583,2358,120000,360,1,0,1,0,1,0,1,1,0,0,0,1
4,0,6000,0,141000,360,1,0,1,1,0,1,0,1,0,0,0,1
5,2,5417,4196,267000,360,1,0,1,0,1,1,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,0,2900,0,71000,360,1,1,0,1,0,1,0,1,0,1,0,0
610,3,4106,0,40000,180,1,0,1,0,1,1,0,1,0,1,0,0
611,1,8072,240,253000,360,1,0,1,0,1,1,0,1,0,0,0,1
612,2,7583,0,187000,360,1,0,1,0,1,1,0,1,0,0,0,1


In [12]:
dm_Y

1      0
2      1
3      1
4      1
5      1
      ..
609    1
610    1
611    1
612    1
613    0
Name: Loan_Status, Length: 480, dtype: int64

In [13]:
smote = SMOTE(sampling_strategy= 'minority', random_state=42)
X1, y = smote.fit_resample(dm_X, dm_Y)
scaler = MinMaxScaler()
X = scaler.fit_transform(X1)

In [14]:
y.value_counts()

Loan_Status
0    332
1    332
Name: count, dtype: int64

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [16]:
X.shape, X_train.shape, X_test.shape

((664, 17), (531, 17), (133, 17))

In [17]:
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))
model.add(Dense(400, activation='relu', kernel_initializer=he_normal()))
model.add(Dense(800, activation='relu', kernel_initializer=he_normal()))
model.add(Dense(50, activation='relu', kernel_initializer=he_normal()))
model.add(Dense(1, activation='sigmoid', kernel_initializer=glorot_normal()))

In [18]:
model.summary()

In [19]:
model.compile(
    optimizer='adam',
    loss= 'binary_crossentropy',
    metrics= ['accuracy']
)

In [20]:
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 118ms/step - accuracy: 0.5574 - loss: 0.7021 - val_accuracy: 0.7757 - val_loss: 0.4578
Epoch 2/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.8075 - loss: 0.4322 - val_accuracy: 0.8598 - val_loss: 0.3915
Epoch 3/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.8432 - loss: 0.4067 - val_accuracy: 0.8785 - val_loss: 0.3912
Epoch 4/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.8252 - loss: 0.3840 - val_accuracy: 0.8785 - val_loss: 0.3949
Epoch 5/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step - accuracy: 0.8485 - loss: 0.3643 - val_accuracy: 0.8785 - val_loss: 0.4039
Epoch 6/50
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step - accuracy: 0.8440 - loss: 0.3496 - val_accuracy: 0.8972 - val_loss: 0.3917
Epoch 7/50
[1m14/14[0m [32m━━━

<keras.src.callbacks.history.History at 0x217e6cadd00>

In [21]:
model.evaluate(X_test, y_test)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8071 - loss: 0.6481  


[0.8077695369720459, 0.7819548845291138]

In [22]:
X_test[0]

array([0.        , 0.05689549, 0.06894819, 0.20473773, 0.72972973,
       1.        , 0.        , 1.        , 0.        , 1.        ,
       1.        , 0.        , 1.        , 0.        , 0.        ,
       0.        , 1.        ])

In [23]:
y_predict = model.predict(X_test)
print(y_predict.shape)
y_predict

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 54ms/step
(133, 1)


array([[8.6412650e-01],
       [1.1411426e-03],
       [4.9678406e-01],
       [7.7844048e-01],
       [9.9959952e-01],
       [8.9135760e-01],
       [2.8642505e-02],
       [4.2212778e-01],
       [5.4649281e-05],
       [6.2160158e-01],
       [9.0730488e-01],
       [9.0104860e-01],
       [1.0407141e-01],
       [9.0733975e-01],
       [9.5878899e-01],
       [3.9728925e-06],
       [2.9361650e-01],
       [1.4153614e-05],
       [7.8991449e-01],
       [9.8015028e-01],
       [9.2377114e-01],
       [9.9781132e-01],
       [7.7851260e-01],
       [9.0769601e-01],
       [3.8267254e-07],
       [9.6406281e-01],
       [3.0738835e-08],
       [1.7403845e-01],
       [9.4549227e-01],
       [9.9964052e-01],
       [3.1023901e-03],
       [2.8458388e-05],
       [9.6095409e-11],
       [7.8162181e-01],
       [9.5333028e-01],
       [9.1769028e-01],
       [4.4503078e-01],
       [2.5117950e-07],
       [8.7813002e-01],
       [6.8384094e-09],
       [1.5758500e-05],
       [1.892216

In [24]:
y_predict_binary = (y_predict >= 0.5).astype(int)
y_predict_binary

array([[1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
    

In [25]:
model.save('Loan_Model.keras')

In [1]:
import pickle

In [27]:
file_name = 'scaler.pkl'
pickle.dump(scaler, open(file_name, 'wb'))

In [28]:
rows = dm_X.iloc[1]
rows

Dependents                     0
ApplicantIncome             3000
CoapplicantIncome              0
LoanAmount                 66000
Loan_Amount_Term             360
Credit_History                 1
Gender_Female                  0
Gender_Male                    1
Married_No                     0
Married_Yes                    1
Education_Graduate             1
Education_Not Graduate         0
Self_Employed_No               0
Self_Employed_Yes              1
Property_Area_Rural            0
Property_Area_Semiurban        0
Property_Area_Urban            1
Name: 2, dtype: int32

In [29]:
output = pd.DataFrame(dm_X)

output.to_excel('One_Hot_Encoded.xlsx', index=False)

In [2]:
ohe_columns = ['Dependents', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term', 'Credit_History', 'Gender_Female', 
               'Gender_Male', 'Married_No', 'Married_Yes', 'Education_Graduate', 'Education_Not Graduate', 'Self_Employed_No', 
               'Self_Employed_Yes', 'Property_Area_Rural', 'Property_Area_Semiurban', 'Property_Area_Urban']

name = 'One_Hot_Encoded_Columns.pkl'

pickle.dump(ohe_columns, open(name, 'wb'))