## Import some dependencies

In [1]:
# Set the seed value for the notebook so the results are reproducible
import numpy as np
from numpy.random import seed
seed(1)
#wanted to take a look
np.random.rand(3)

array([4.17022005e-01, 7.20324493e-01, 1.14374817e-04])

In [2]:
import pandas as pd
import os

In [3]:
import tensorflow
tensorflow.keras.__version__

'2.2.4-tf'

Setting up multiworker runs would be a bit more work here, see: https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras

## Read in and clean up data

In [4]:
test_data = pd.read_csv(os.path.join('../data', 'cleanLoanDataValidationAllIncome.csv'))
pd.set_option('display.max_columns', None)
test_data

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,TotalIncome
0,0,1,0,1,0,5720.0,0.0,110000.0,360.0,1,2,5720.0
1,0,1,1,1,0,3076.0,1500.0,126000.0,360.0,1,2,4576.0
2,0,1,2,1,0,5000.0,1800.0,208000.0,360.0,1,2,6800.0
3,0,1,2,1,0,2340.0,2546.0,100000.0,360.0,0,2,4886.0
4,0,0,0,0,0,3276.0,0.0,78000.0,360.0,1,2,3276.0
...,...,...,...,...,...,...,...,...,...,...,...,...
340,0,1,3,0,1,4009.0,1777.0,113000.0,360.0,1,2,5786.0
341,0,1,0,1,0,4158.0,709.0,115000.0,360.0,1,2,4867.0
342,0,0,0,1,0,3250.0,1993.0,126000.0,360.0,0,1,5243.0
343,0,1,0,1,0,5000.0,2393.0,158000.0,360.0,1,0,7393.0


In [5]:
train_data = pd.read_csv(os.path.join('../data', 'cleanLoanDataTrainAllIncome.csv'))
pd.set_option('display.max_columns', None)
train_data

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status,TotalIncome
0,0,1,1,1,0,4583.0,1508.0,128000.0,360.0,1,0,0,6091.0
1,0,1,0,1,1,3000.0,0.0,66000.0,360.0,1,2,1,3000.0
2,0,1,0,0,0,2583.0,2358.0,120000.0,360.0,1,2,1,4941.0
3,0,0,0,1,0,6000.0,0.0,141000.0,360.0,1,2,1,6000.0
4,0,1,2,1,1,5417.0,4196.0,267000.0,360.0,1,2,1,9613.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
558,1,0,0,1,0,2900.0,0.0,71000.0,360.0,1,0,1,2900.0
559,0,1,3,1,0,4106.0,0.0,40000.0,180.0,1,0,1,4106.0
560,0,1,1,1,0,8072.0,240.0,253000.0,360.0,1,2,1,8312.0
561,0,1,2,1,0,7583.0,0.0,187000.0,360.0,1,2,1,7583.0


## 1a: Total income

In [6]:
X = train_data.drop(["Loan_Status", "ApplicantIncome", "CoapplicantIncome"], axis=1)
y = train_data["Loan_Status"]
print(X.shape, y.shape)

(563, 10) (563,)


In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [9]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [10]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [11]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [13]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=10))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [14]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               1100      
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 202       
Total params: 11,402
Trainable params: 11,402
Non-trainable params: 0
_________________________________________________________________


In [16]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2)

Train on 422 samples
Epoch 1/60
422/422 - 2s - loss: 0.6489 - accuracy: 0.6232
Epoch 2/60
422/422 - 0s - loss: 0.5766 - accuracy: 0.7014
Epoch 3/60
422/422 - 0s - loss: 0.5488 - accuracy: 0.7204
Epoch 4/60
422/422 - 0s - loss: 0.5331 - accuracy: 0.7559
Epoch 5/60
422/422 - 0s - loss: 0.5401 - accuracy: 0.7536
Epoch 6/60
422/422 - 0s - loss: 0.5305 - accuracy: 0.7725
Epoch 7/60
422/422 - 0s - loss: 0.5165 - accuracy: 0.7725
Epoch 8/60
422/422 - 0s - loss: 0.5122 - accuracy: 0.7749
Epoch 9/60
422/422 - 0s - loss: 0.5171 - accuracy: 0.7725
Epoch 10/60
422/422 - 0s - loss: 0.5096 - accuracy: 0.7773
Epoch 11/60
422/422 - 0s - loss: 0.5057 - accuracy: 0.7773
Epoch 12/60
422/422 - 0s - loss: 0.5026 - accuracy: 0.7796
Epoch 13/60
422/422 - 0s - loss: 0.5018 - accuracy: 0.7820
Epoch 14/60
422/422 - 0s - loss: 0.4972 - accuracy: 0.7796
Epoch 15/60
422/422 - 0s - loss: 0.4965 - accuracy: 0.7796
Epoch 16/60
422/422 - 0s - loss: 0.4942 - accuracy: 0.7867
Epoch 17/60
422/422 - 0s - loss: 0.4908 - ac

<tensorflow.python.keras.callbacks.History at 0x7feda8b32f60>

In [17]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network 1 - Loss: {model_loss}, Accuracy: {model_accuracy}")

141/1 - 0s - loss: 0.6149 - accuracy: 0.7021
Normal Neural Network 1 - Loss: 0.6946593983799008, Accuracy: 0.7021276354789734


First run results

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_3 (Dense)              (None, 100)               1100      
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 202       
=================================================================
Total params: 11,402
Trainable params: 11,402
Non-trainable params: 0
    
141/1 - 0s - loss: 0.5452 - accuracy: 0.7092
Normal Neural Network 1 - Loss: 0.6671343234413905, Accuracy: 0.7092198729515076

## 1b: Separate income

In [18]:
X = train_data.drop(["Loan_Status", "TotalIncome"], axis=1)
y = train_data["Loan_Status"]
print(X.shape, y.shape)

(563, 11) (563,)


In [19]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [20]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [21]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [22]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [23]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=11))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [24]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [25]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 100)               1200      
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 202       
Total params: 11,502
Trainable params: 11,502
Non-trainable params: 0
_________________________________________________________________


In [26]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2)

Train on 422 samples
Epoch 1/60
422/422 - 1s - loss: 0.6895 - accuracy: 0.5427
Epoch 2/60
422/422 - 0s - loss: 0.5816 - accuracy: 0.7014
Epoch 3/60
422/422 - 0s - loss: 0.5676 - accuracy: 0.7014
Epoch 4/60
422/422 - 0s - loss: 0.5460 - accuracy: 0.7275
Epoch 5/60
422/422 - 0s - loss: 0.5302 - accuracy: 0.7464
Epoch 6/60
422/422 - 0s - loss: 0.5220 - accuracy: 0.7607
Epoch 7/60
422/422 - 0s - loss: 0.5174 - accuracy: 0.7678
Epoch 8/60
422/422 - 0s - loss: 0.5155 - accuracy: 0.7701
Epoch 9/60
422/422 - 0s - loss: 0.5126 - accuracy: 0.7678
Epoch 10/60
422/422 - 0s - loss: 0.5126 - accuracy: 0.7701
Epoch 11/60
422/422 - 0s - loss: 0.5077 - accuracy: 0.7749
Epoch 12/60
422/422 - 0s - loss: 0.5045 - accuracy: 0.7749
Epoch 13/60
422/422 - 0s - loss: 0.5039 - accuracy: 0.7796
Epoch 14/60
422/422 - 0s - loss: 0.4996 - accuracy: 0.7796
Epoch 15/60
422/422 - 0s - loss: 0.4968 - accuracy: 0.7773
Epoch 16/60
422/422 - 0s - loss: 0.4956 - accuracy: 0.7773
Epoch 17/60
422/422 - 0s - loss: 0.4928 - ac

<tensorflow.python.keras.callbacks.History at 0x7feda8c85b00>

In [27]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network 1 - Loss: {model_loss}, Accuracy: {model_accuracy}")

141/1 - 0s - loss: 0.6404 - accuracy: 0.6667
Normal Neural Network 1 - Loss: 0.7258663308535908, Accuracy: 0.6666666865348816


1b results
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_16 (Dense)             (None, 100)               1200      
_________________________________________________________________
dense_17 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_18 (Dense)             (None, 2)                 202       
=================================================================
Total params: 11,502
Trainable params: 11,502
Non-trainable params: 0

141/1 - 0s - loss: 0.5967 - accuracy: 0.6879
Normal Neural Network 1 - Loss: 0.6984488072124779, Accuracy: 0.6879432797431946

## Second run: take out Loan_Amount_Term column

In [28]:
X = train_data.drop(["Loan_Status", 'Loan_Amount_Term', "ApplicantIncome", "CoapplicantIncome"], axis=1)
y = train_data["Loan_Status"]
print(X.shape, y.shape)

(563, 9) (563,)


In [29]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [30]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [31]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [32]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [33]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=9))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [34]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [35]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 100)               1000      
_________________________________________________________________
dense_7 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_8 (Dense)              (None, 2)                 202       
Total params: 11,302
Trainable params: 11,302
Non-trainable params: 0
_________________________________________________________________


In [36]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2)

Train on 422 samples
Epoch 1/60
422/422 - 1s - loss: 0.6070 - accuracy: 0.7014
Epoch 2/60
422/422 - 0s - loss: 0.5663 - accuracy: 0.7014
Epoch 3/60
422/422 - 0s - loss: 0.5526 - accuracy: 0.7109
Epoch 4/60
422/422 - 0s - loss: 0.5403 - accuracy: 0.7062
Epoch 5/60
422/422 - 0s - loss: 0.5313 - accuracy: 0.7417
Epoch 6/60
422/422 - 0s - loss: 0.5252 - accuracy: 0.7701
Epoch 7/60
422/422 - 0s - loss: 0.5204 - accuracy: 0.7725
Epoch 8/60
422/422 - 0s - loss: 0.5144 - accuracy: 0.7773
Epoch 9/60
422/422 - 0s - loss: 0.5232 - accuracy: 0.7725
Epoch 10/60
422/422 - 0s - loss: 0.5125 - accuracy: 0.7749
Epoch 11/60
422/422 - 0s - loss: 0.5054 - accuracy: 0.7773
Epoch 12/60
422/422 - 0s - loss: 0.5071 - accuracy: 0.7773
Epoch 13/60
422/422 - 0s - loss: 0.5035 - accuracy: 0.7773
Epoch 14/60
422/422 - 0s - loss: 0.5031 - accuracy: 0.7796
Epoch 15/60
422/422 - 0s - loss: 0.4962 - accuracy: 0.7773
Epoch 16/60
422/422 - 0s - loss: 0.4967 - accuracy: 0.7773
Epoch 17/60
422/422 - 0s - loss: 0.4975 - ac

<tensorflow.python.keras.callbacks.History at 0x7fed88ed3828>

In [37]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network 1 - Loss: {model_loss}, Accuracy: {model_accuracy}")

141/1 - 0s - loss: 0.5383 - accuracy: 0.6879
Normal Neural Network 1 - Loss: 0.6641574151127051, Accuracy: 0.6879432797431946


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_6 (Dense)              (None, 100)               1000      
_________________________________________________________________
dense_7 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_8 (Dense)              (None, 2)                 202       
=================================================================
Total params: 11,302
Trainable params: 11,302
Non-trainable params: 0
    
141/1 - 0s - loss: 0.5926 - accuracy: 0.6950
Normal Neural Network 1 - Loss: 0.6865134027832789, Accuracy: 0.695035457611084


## 2b: Separate income

In [38]:
X = train_data.drop(["Loan_Status", 'Loan_Amount_Term', "TotalIncome"], axis=1)
y = train_data["Loan_Status"]
print(X.shape, y.shape)

(563, 10) (563,)


In [39]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [40]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [41]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [42]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [43]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=10))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [44]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [45]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 100)               1100      
_________________________________________________________________
dense_10 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_11 (Dense)             (None, 2)                 202       
Total params: 11,402
Trainable params: 11,402
Non-trainable params: 0
_________________________________________________________________


In [46]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2)

Train on 422 samples
Epoch 1/60
422/422 - 1s - loss: 0.6205 - accuracy: 0.6659
Epoch 2/60
422/422 - 0s - loss: 0.5706 - accuracy: 0.7014
Epoch 3/60
422/422 - 0s - loss: 0.5544 - accuracy: 0.7156
Epoch 4/60
422/422 - 0s - loss: 0.5407 - accuracy: 0.7464
Epoch 5/60
422/422 - 0s - loss: 0.5276 - accuracy: 0.7488
Epoch 6/60
422/422 - 0s - loss: 0.5225 - accuracy: 0.7607
Epoch 7/60
422/422 - 0s - loss: 0.5179 - accuracy: 0.7725
Epoch 8/60
422/422 - 0s - loss: 0.5163 - accuracy: 0.7725
Epoch 9/60
422/422 - 0s - loss: 0.5174 - accuracy: 0.7749
Epoch 10/60
422/422 - 0s - loss: 0.5072 - accuracy: 0.7773
Epoch 11/60
422/422 - 0s - loss: 0.5084 - accuracy: 0.7773
Epoch 12/60
422/422 - 0s - loss: 0.5063 - accuracy: 0.7749
Epoch 13/60
422/422 - 0s - loss: 0.5018 - accuracy: 0.7773
Epoch 14/60
422/422 - 0s - loss: 0.4968 - accuracy: 0.7773
Epoch 15/60
422/422 - 0s - loss: 0.5006 - accuracy: 0.7773
Epoch 16/60
422/422 - 0s - loss: 0.4957 - accuracy: 0.7773
Epoch 17/60
422/422 - 0s - loss: 0.4913 - ac

<tensorflow.python.keras.callbacks.History at 0x7fed88d93780>

In [47]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network 1 - Loss: {model_loss}, Accuracy: {model_accuracy}")

141/1 - 0s - loss: 0.6060 - accuracy: 0.7021
Normal Neural Network 1 - Loss: 0.6719605242106932, Accuracy: 0.7021276354789734


Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_19 (Dense)             (None, 100)               1100      
_________________________________________________________________
dense_20 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_21 (Dense)             (None, 2)                 202       
=================================================================
Total params: 11,402
Trainable params: 11,402
Non-trainable params: 0
    
141/1 - 0s - loss: 0.5788 - accuracy: 0.7234
Normal Neural Network 1 - Loss: 0.6754736462806133, Accuracy: 0.7234042286872864

## Third run: take out Loan_Amount_Term and LoanAmount columns

In [48]:
X = train_data.drop(["Loan_Status", 'LoanAmount', 'Loan_Amount_Term', "ApplicantIncome", "CoapplicantIncome"], axis=1)
y = train_data["Loan_Status"]
print(X.shape, y.shape)

(563, 8) (563,)


In [49]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [50]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [51]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [52]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [53]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=8))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [54]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [55]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 100)               900       
_________________________________________________________________
dense_13 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_14 (Dense)             (None, 2)                 202       
Total params: 11,202
Trainable params: 11,202
Non-trainable params: 0
_________________________________________________________________


In [56]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2)

Train on 422 samples
Epoch 1/60
422/422 - 1s - loss: 0.6323 - accuracy: 0.6517
Epoch 2/60
422/422 - 0s - loss: 0.5839 - accuracy: 0.7014
Epoch 3/60
422/422 - 0s - loss: 0.5531 - accuracy: 0.7133
Epoch 4/60
422/422 - 0s - loss: 0.5426 - accuracy: 0.7464
Epoch 5/60
422/422 - 0s - loss: 0.5308 - accuracy: 0.7441
Epoch 6/60
422/422 - 0s - loss: 0.5267 - accuracy: 0.7607
Epoch 7/60
422/422 - 0s - loss: 0.5197 - accuracy: 0.7725
Epoch 8/60
422/422 - 0s - loss: 0.5207 - accuracy: 0.7749
Epoch 9/60
422/422 - 0s - loss: 0.5214 - accuracy: 0.7725
Epoch 10/60
422/422 - 0s - loss: 0.5119 - accuracy: 0.7844
Epoch 11/60
422/422 - 0s - loss: 0.5139 - accuracy: 0.7749
Epoch 12/60
422/422 - 0s - loss: 0.5070 - accuracy: 0.7749
Epoch 13/60
422/422 - 0s - loss: 0.5071 - accuracy: 0.7773
Epoch 14/60
422/422 - 0s - loss: 0.5032 - accuracy: 0.7773
Epoch 15/60
422/422 - 0s - loss: 0.5012 - accuracy: 0.7773
Epoch 16/60
422/422 - 0s - loss: 0.5004 - accuracy: 0.7844
Epoch 17/60
422/422 - 0s - loss: 0.4986 - ac

<tensorflow.python.keras.callbacks.History at 0x7feda8f80eb8>

In [57]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network 1 - Loss: {model_loss}, Accuracy: {model_accuracy}")

141/1 - 0s - loss: 0.5502 - accuracy: 0.6950
Normal Neural Network 1 - Loss: 0.6399033718498041, Accuracy: 0.695035457611084


Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_9 (Dense)              (None, 100)               900       
_________________________________________________________________
dense_10 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_11 (Dense)             (None, 2)                 202       
=================================================================
Total params: 11,202
Trainable params: 11,202
Non-trainable params: 0

141/1 - 0s - loss: 0.5538 - accuracy: 0.7092
Normal Neural Network 1 - Loss: 0.6326239667040237, Accuracy: 0.7092198729515076

## 3b: Separate income

In [58]:
X = train_data.drop(["Loan_Status", 'LoanAmount', 'Loan_Amount_Term', "TotalIncome"], axis=1)
y = train_data["Loan_Status"]
print(X.shape, y.shape)

(563, 9) (563,)


In [59]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [60]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [61]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [62]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [63]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=9))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [64]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [65]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 100)               1000      
_________________________________________________________________
dense_16 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_17 (Dense)             (None, 2)                 202       
Total params: 11,302
Trainable params: 11,302
Non-trainable params: 0
_________________________________________________________________


In [66]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2)

Train on 422 samples
Epoch 1/60
422/422 - 1s - loss: 0.6019 - accuracy: 0.7014
Epoch 2/60
422/422 - 0s - loss: 0.5664 - accuracy: 0.7014
Epoch 3/60
422/422 - 0s - loss: 0.5441 - accuracy: 0.7085
Epoch 4/60
422/422 - 0s - loss: 0.5357 - accuracy: 0.7630
Epoch 5/60
422/422 - 0s - loss: 0.5276 - accuracy: 0.7796
Epoch 6/60
422/422 - 0s - loss: 0.5195 - accuracy: 0.7725
Epoch 7/60
422/422 - 0s - loss: 0.5186 - accuracy: 0.7749
Epoch 8/60
422/422 - 0s - loss: 0.5165 - accuracy: 0.7773
Epoch 9/60
422/422 - 0s - loss: 0.5121 - accuracy: 0.7773
Epoch 10/60
422/422 - 0s - loss: 0.5155 - accuracy: 0.7773
Epoch 11/60
422/422 - 0s - loss: 0.5098 - accuracy: 0.7749
Epoch 12/60
422/422 - 0s - loss: 0.5098 - accuracy: 0.7773
Epoch 13/60
422/422 - 0s - loss: 0.5085 - accuracy: 0.7773
Epoch 14/60
422/422 - 0s - loss: 0.5098 - accuracy: 0.7725
Epoch 15/60
422/422 - 0s - loss: 0.5018 - accuracy: 0.7773
Epoch 16/60
422/422 - 0s - loss: 0.4997 - accuracy: 0.7773
Epoch 17/60
422/422 - 0s - loss: 0.4973 - ac

<tensorflow.python.keras.callbacks.History at 0x7fedb9483e48>

In [67]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network 1 - Loss: {model_loss}, Accuracy: {model_accuracy}")

141/1 - 0s - loss: 0.7318 - accuracy: 0.6667
Normal Neural Network 1 - Loss: 0.7879310296782365, Accuracy: 0.6666666865348816


### Fourth run: take out Loan_Amount_Term and LoanAmount columns, add a layer - no change in accuracy

In [68]:
X = train_data.drop(["Loan_Status", 'LoanAmount', 'Loan_Amount_Term', "ApplicantIncome", "CoapplicantIncome"], axis=1)
y = train_data["Loan_Status"]
print(X.shape, y.shape)

(563, 8) (563,)


In [69]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [70]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [71]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [72]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [73]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=8))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [74]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [75]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_18 (Dense)             (None, 100)               900       
_________________________________________________________________
dense_19 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_20 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_21 (Dense)             (None, 2)                 202       
Total params: 21,302
Trainable params: 21,302
Non-trainable params: 0
_________________________________________________________________


In [76]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2)

Train on 422 samples
Epoch 1/60
422/422 - 1s - loss: 0.6280 - accuracy: 0.6422
Epoch 2/60
422/422 - 0s - loss: 0.5633 - accuracy: 0.6991
Epoch 3/60
422/422 - 0s - loss: 0.5380 - accuracy: 0.7488
Epoch 4/60
422/422 - 0s - loss: 0.5219 - accuracy: 0.7701
Epoch 5/60
422/422 - 0s - loss: 0.5181 - accuracy: 0.7701
Epoch 6/60
422/422 - 0s - loss: 0.5171 - accuracy: 0.7773
Epoch 7/60
422/422 - 0s - loss: 0.5093 - accuracy: 0.7773
Epoch 8/60
422/422 - 0s - loss: 0.5052 - accuracy: 0.7773
Epoch 9/60
422/422 - 0s - loss: 0.5026 - accuracy: 0.7773
Epoch 10/60
422/422 - 0s - loss: 0.4993 - accuracy: 0.7749
Epoch 11/60
422/422 - 0s - loss: 0.4995 - accuracy: 0.7725
Epoch 12/60
422/422 - 0s - loss: 0.4898 - accuracy: 0.7773
Epoch 13/60
422/422 - 0s - loss: 0.4865 - accuracy: 0.7796
Epoch 14/60
422/422 - 0s - loss: 0.4845 - accuracy: 0.7773
Epoch 15/60
422/422 - 0s - loss: 0.4855 - accuracy: 0.7820
Epoch 16/60
422/422 - 0s - loss: 0.4805 - accuracy: 0.7867
Epoch 17/60
422/422 - 0s - loss: 0.4750 - ac

<tensorflow.python.keras.callbacks.History at 0x7fed5a1b4080>

In [77]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network 1 - Loss: {model_loss}, Accuracy: {model_accuracy}")

141/1 - 0s - loss: 0.9385 - accuracy: 0.6454
Normal Neural Network 1 - Loss: 1.0012289357523547, Accuracy: 0.6453900933265686


Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_12 (Dense)             (None, 100)               900       
_________________________________________________________________
dense_13 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_14 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_15 (Dense)             (None, 2)                 202       
=================================================================
Total params: 21,302
Trainable params: 21,302
Non-trainable params: 0

141/1 - 0s - loss: 0.7248 - accuracy: 0.6809
Normal Neural Network 1 - Loss: 0.9271682179565971, Accuracy: 0.6808510422706604


## 4b: Separate income

In [78]:
X = train_data.drop(["Loan_Status", 'LoanAmount', 'Loan_Amount_Term', "TotalIncome"], axis=1)
y = train_data["Loan_Status"]
print(X.shape, y.shape)

(563, 9) (563,)


In [79]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [80]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [81]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [82]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [83]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=9))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [84]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [85]:
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_22 (Dense)             (None, 100)               1000      
_________________________________________________________________
dense_23 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_24 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_25 (Dense)             (None, 2)                 202       
Total params: 21,402
Trainable params: 21,402
Non-trainable params: 0
_________________________________________________________________


In [86]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2)

Train on 422 samples
Epoch 1/60
422/422 - 1s - loss: 0.6211 - accuracy: 0.6469
Epoch 2/60
422/422 - 0s - loss: 0.5736 - accuracy: 0.7014
Epoch 3/60
422/422 - 0s - loss: 0.5476 - accuracy: 0.7062
Epoch 4/60
422/422 - 0s - loss: 0.5317 - accuracy: 0.7749
Epoch 5/60
422/422 - 0s - loss: 0.5311 - accuracy: 0.7678
Epoch 6/60
422/422 - 0s - loss: 0.5294 - accuracy: 0.7701
Epoch 7/60
422/422 - 0s - loss: 0.5155 - accuracy: 0.7678
Epoch 8/60
422/422 - 0s - loss: 0.5174 - accuracy: 0.7678
Epoch 9/60
422/422 - 0s - loss: 0.5106 - accuracy: 0.7701
Epoch 10/60
422/422 - 0s - loss: 0.5070 - accuracy: 0.7701
Epoch 11/60
422/422 - 0s - loss: 0.5068 - accuracy: 0.7820
Epoch 12/60
422/422 - 0s - loss: 0.5009 - accuracy: 0.7701
Epoch 13/60
422/422 - 0s - loss: 0.4959 - accuracy: 0.7844
Epoch 14/60
422/422 - 0s - loss: 0.4922 - accuracy: 0.7773
Epoch 15/60
422/422 - 0s - loss: 0.5013 - accuracy: 0.7891
Epoch 16/60
422/422 - 0s - loss: 0.4855 - accuracy: 0.7867
Epoch 17/60
422/422 - 0s - loss: 0.4853 - ac

<tensorflow.python.keras.callbacks.History at 0x7fedb97e87f0>

In [87]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network 1 - Loss: {model_loss}, Accuracy: {model_accuracy}")

141/1 - 0s - loss: 0.8754 - accuracy: 0.6454
Normal Neural Network 1 - Loss: 0.8913993539539635, Accuracy: 0.6453900933265686
