## Import some dependencies

In [1]:
# Set the seed value for the notebook so the results are reproducible
import numpy as np
from numpy.random import seed
seed(1)
#wanted to take a look
np.random.rand(3)

array([4.17022005e-01, 7.20324493e-01, 1.14374817e-04])

In [2]:
import pandas as pd
import os

In [3]:
import tensorflow
tensorflow.keras.__version__

'2.2.4-tf'

Setting up multiworker runs would be a bit more work here, see: https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras

## Read in and clean up data

In [4]:
test_data_orig = pd.read_csv(os.path.join('data', 'loanDataTest.csv'))
pd.set_option('display.max_columns', None)
#test_data_orig

In [5]:
train_data_orig = pd.read_csv(os.path.join('data', 'loanDataTrain.csv'))
pd.set_option('display.max_columns', None)
#train_data_orig

In [6]:
#Check values in test and train sets
test_Gender = test_data_orig.Gender.unique()
train_Gender = train_data_orig.Gender.unique()
test_Dependents = test_data_orig.Dependents.unique()
train_Dependents = train_data_orig.Dependents.unique()
test_Education = test_data_orig.Education.unique()
train_Education = train_data_orig.Education.unique()
test_Self_Employed = test_data_orig.Self_Employed.unique()
train_Self_Employed = train_data_orig.Self_Employed.unique()
test_ApplicantIncome = test_data_orig.ApplicantIncome.unique()
train_ApplicantIncome = train_data_orig.ApplicantIncome.unique()
test_CoapplicantIncome = test_data_orig.CoapplicantIncome.unique()
train_CoapplicantIncome = train_data_orig.CoapplicantIncome.unique()
test_LoanAmount = test_data_orig.LoanAmount.unique()
train_LoanAmount = train_data_orig.LoanAmount.unique()
test_Loan_Amount_Term = test_data_orig.Loan_Amount_Term.unique()
train_Loan_Amount_Term = train_data_orig.Loan_Amount_Term.unique()
test_Credit_History = test_data_orig.Credit_History.unique()
train_Credit_History = train_data_orig.Credit_History.unique()
test_Property_Area = test_data_orig.Property_Area.unique()
train_Property_Area = train_data_orig.Property_Area.unique()

train_Loan_Status = train_data_orig.Loan_Status.unique()

#Print out values
#test_Gender, train_Gender, test_Dependents, train_Dependents, test_Education, train_Education, test_Self_Employed, train_Self_Employed, test_ApplicantIncome, train_ApplicantIncome, test_CoapplicantIncome, train_CoapplicantIncome, test_LoanAmount, train_LoanAmount, test_Loan_Amount_Term, train_Loan_Amount_Term, test_Credit_History, train_Credit_History, test_Property_Area, train_Property_Area, train_Loan_Status

In [7]:
#drop nans
test_data = test_data_orig.dropna(axis=0)
train_data_pre_dummy = train_data_orig.dropna(axis=0)

test_Gender = test_data.Gender.unique()
train_Gender = train_data_pre_dummy.Gender.unique()
test_Dependents = test_data.Dependents.unique()
train_Dependents = train_data_pre_dummy.Dependents.unique()
test_Education = test_data.Education.unique()
train_Education = train_data_pre_dummy.Education.unique()
test_Self_Employed = test_data.Self_Employed.unique()
train_Self_Employed = train_data_pre_dummy.Self_Employed.unique()
test_ApplicantIncome = test_data.ApplicantIncome.unique()
train_ApplicantIncome = train_data_pre_dummy.ApplicantIncome.unique()
test_CoapplicantIncome = test_data.CoapplicantIncome.unique()
train_CoapplicantIncome = train_data_pre_dummy.CoapplicantIncome.unique()
test_LoanAmount = test_data.LoanAmount.unique()
train_LoanAmount = train_data_pre_dummy.LoanAmount.unique()
test_Loan_Amount_Term = test_data.Loan_Amount_Term.unique()
train_Loan_Amount_Term = train_data_pre_dummy.Loan_Amount_Term.unique()
test_Credit_History = test_data.Credit_History.unique()
train_Credit_History = train_data_pre_dummy.Credit_History.unique()
test_Property_Area = test_data.Property_Area.unique()
train_Property_Area = train_data_pre_dummy.Property_Area.unique()

train_Loan_Status = train_data_pre_dummy.Loan_Status.unique()

#Print out categorical values
test_Gender, train_Gender, test_Dependents, train_Dependents, test_Education, train_Education, test_Self_Employed, train_Self_Employed, train_Loan_Amount_Term, test_Credit_History, train_Credit_History, test_Property_Area, train_Property_Area, train_Loan_Status

(array(['Male', 'Female'], dtype=object),
 array(['Male', 'Female'], dtype=object),
 array(['0', '1', '2', '3+'], dtype=object),
 array(['1', '0', '2', '3+'], dtype=object),
 array(['Graduate', 'Not Graduate'], dtype=object),
 array(['Graduate', 'Not Graduate'], dtype=object),
 array(['No', 'Yes'], dtype=object),
 array(['No', 'Yes'], dtype=object),
 array([360., 120., 180.,  60., 300., 480., 240.,  36.,  84.]),
 array([1., 0.]),
 array([1., 0.]),
 array(['Urban', 'Semiurban', 'Rural'], dtype=object),
 array(['Rural', 'Urban', 'Semiurban'], dtype=object),
 array(['N', 'Y'], dtype=object))

In [8]:
#Make loan status a dummy variable, drop the "N" column
train_data = pd.get_dummies(train_data_pre_dummy, prefix=['Loan_Status'], columns=['Loan_Status'])
train_data.drop('Loan_Status_N', axis=1, inplace=True)
train_data

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status_Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,0
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,1
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,1
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,1
5,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,LP002978,Female,No,0,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,1
610,LP002979,Male,Yes,3+,Graduate,No,4106,0.0,40.0,180.0,1.0,Rural,1
611,LP002983,Male,Yes,1,Graduate,No,8072,240.0,253.0,360.0,1.0,Urban,1
612,LP002984,Male,Yes,2,Graduate,No,7583,0.0,187.0,360.0,1.0,Urban,1


In [9]:
#Replace "3+" in dependents with "3" - not perfect, but easier to deal with
train_data = train_data.replace(['3+'],'3')
test_data = test_data.replace(['3+'],'3')

In [10]:
#Limited dummy variables
train_data_limit_dummies = pd.get_dummies(train_data, prefix=['Gender'], columns=['Gender'])
train_data_limit_dummies.drop('Gender_Male', axis=1, inplace=True)
test_data_limit_dummies = pd.get_dummies(test_data, prefix=['Gender'], columns=['Gender'])
test_data_limit_dummies.drop('Gender_Male', axis=1, inplace=True)
train_data_limit_dummies = pd.get_dummies(train_data_limit_dummies, prefix=['Education'], columns=['Education'])
train_data_limit_dummies.drop('Education_Not Graduate', axis=1, inplace=True)
test_data_limit_dummies = pd.get_dummies(test_data_limit_dummies, prefix=['Education'], columns=['Education'])
test_data_limit_dummies.drop('Education_Not Graduate', axis=1, inplace=True)
train_data_limit_dummies = pd.get_dummies(train_data_limit_dummies, prefix=['Married'], columns=['Married'])
train_data_limit_dummies.drop('Married_No', axis=1, inplace=True)
test_data_limit_dummies = pd.get_dummies(test_data_limit_dummies, prefix=['Married'], columns=['Married'])
test_data_limit_dummies.drop('Married_No', axis=1, inplace=True)
train_data_limit_dummies = pd.get_dummies(train_data_limit_dummies, prefix=['Self_Employed'], columns=['Self_Employed'])
train_data_limit_dummies.drop('Self_Employed_No', axis=1, inplace=True)
test_data_limit_dummies = pd.get_dummies(test_data_limit_dummies, prefix=['Self_Employed'], columns=['Self_Employed'])
test_data_limit_dummies.drop('Self_Employed_No', axis=1, inplace=True)
train_data_limit_dummies = pd.get_dummies(train_data_limit_dummies, prefix=['Credit_History'], columns=['Credit_History'])
train_data_limit_dummies.drop('Credit_History_0.0', axis=1, inplace=True)
test_data_limit_dummies = pd.get_dummies(test_data_limit_dummies, prefix=['Credit_History'], columns=['Credit_History'])
test_data_limit_dummies.drop('Credit_History_0.0', axis=1, inplace=True)

train_data_limit_dummies = pd.get_dummies(train_data_limit_dummies, prefix=['Property_Area'], columns=['Property_Area'])
test_data_limit_dummies = pd.get_dummies(test_data_limit_dummies, prefix=['Property_Area'], columns=['Property_Area'])

train_data_limit_dummies

Unnamed: 0,Loan_ID,Dependents,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Loan_Status_Y,Gender_Female,Education_Graduate,Married_Yes,Self_Employed_Yes,Credit_History_1.0,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban
1,LP001003,1,4583,1508.0,128.0,360.0,0,0,1,1,0,1,1,0,0
2,LP001005,0,3000,0.0,66.0,360.0,1,0,1,1,1,1,0,0,1
3,LP001006,0,2583,2358.0,120.0,360.0,1,0,0,1,0,1,0,0,1
4,LP001008,0,6000,0.0,141.0,360.0,1,0,1,0,0,1,0,0,1
5,LP001011,2,5417,4196.0,267.0,360.0,1,0,1,1,1,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,LP002978,0,2900,0.0,71.0,360.0,1,1,1,0,0,1,1,0,0
610,LP002979,3,4106,0.0,40.0,180.0,1,0,1,1,0,1,1,0,0
611,LP002983,1,8072,240.0,253.0,360.0,1,0,1,1,0,1,0,0,1
612,LP002984,2,7583,0.0,187.0,360.0,1,0,1,1,0,1,0,0,1


## First run: all columns from above

In [11]:
X = train_data_limit_dummies.drop(["Loan_Status_Y", "Loan_ID"], axis=1)
y = train_data_limit_dummies["Loan_Status_Y"]
print(X.shape, y.shape)

(480, 13) (480,)


In [12]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [13]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [14]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [15]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [16]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [18]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=13))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [19]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               1400      
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 202       
Total params: 11,702
Trainable params: 11,702
Non-trainable params: 0
_________________________________________________________________


In [21]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2)

Train on 360 samples
Epoch 1/60
360/360 - 17s - loss: 0.5949 - accuracy: 0.6806
Epoch 2/60
360/360 - 1s - loss: 0.5350 - accuracy: 0.7111
Epoch 3/60
360/360 - 1s - loss: 0.5064 - accuracy: 0.7806
Epoch 4/60
360/360 - 1s - loss: 0.4823 - accuracy: 0.8056
Epoch 5/60
360/360 - 1s - loss: 0.4579 - accuracy: 0.8139
Epoch 6/60
360/360 - 1s - loss: 0.4422 - accuracy: 0.8278
Epoch 7/60
360/360 - 1s - loss: 0.4326 - accuracy: 0.8306
Epoch 8/60
360/360 - 1s - loss: 0.4251 - accuracy: 0.8306
Epoch 9/60
360/360 - 1s - loss: 0.4177 - accuracy: 0.8306
Epoch 10/60
360/360 - 1s - loss: 0.4129 - accuracy: 0.8333
Epoch 11/60
360/360 - 1s - loss: 0.4111 - accuracy: 0.8306
Epoch 12/60
360/360 - 1s - loss: 0.4178 - accuracy: 0.8306
Epoch 13/60
360/360 - 1s - loss: 0.4059 - accuracy: 0.8361
Epoch 14/60
360/360 - 1s - loss: 0.4035 - accuracy: 0.8361
Epoch 15/60
360/360 - 1s - loss: 0.3979 - accuracy: 0.8361
Epoch 16/60
360/360 - 1s - loss: 0.3967 - accuracy: 0.8333
Epoch 17/60
360/360 - 1s - loss: 0.3910 - a

<tensorflow.python.keras.callbacks.History at 0x7f956076cda0>

In [22]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network 1 - Loss: {model_loss}, Accuracy: {model_accuracy}")

120/1 - 3s - loss: 0.6665 - accuracy: 0.7500
Normal Neural Network 1 - Loss: 0.700603723526001, Accuracy: 0.75


First run results

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 100)               1400      
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 202       
=================================================================
Total params: 11,702
Trainable params: 11,702
Non-trainable params: 0
    
120/1 - 3s - loss: 0.6665 - accuracy: 0.7500
Normal Neural Network 1 - Loss: 0.700603723526001, Accuracy: 0.75

## Second run: take out Loan_Amount_Term column

In [23]:
X = train_data_limit_dummies.drop(["Loan_Status_Y", "Loan_ID", 'Loan_Amount_Term'], axis=1)
y = train_data_limit_dummies["Loan_Status_Y"]
print(X.shape, y.shape)

(480, 12) (480,)


In [24]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [25]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [26]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [27]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [28]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [29]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [30]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=12))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [31]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [32]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 100)               1300      
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 202       
Total params: 11,602
Trainable params: 11,602
Non-trainable params: 0
_________________________________________________________________


In [33]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2)

Train on 360 samples
Epoch 1/60
360/360 - 11s - loss: 0.5832 - accuracy: 0.7111
Epoch 2/60
360/360 - 1s - loss: 0.5396 - accuracy: 0.7111
Epoch 3/60
360/360 - 1s - loss: 0.5098 - accuracy: 0.7611
Epoch 4/60
360/360 - 1s - loss: 0.4865 - accuracy: 0.8083
Epoch 5/60
360/360 - 1s - loss: 0.4644 - accuracy: 0.8194
Epoch 6/60
360/360 - 1s - loss: 0.4479 - accuracy: 0.8306
Epoch 7/60
360/360 - 1s - loss: 0.4330 - accuracy: 0.8306
Epoch 8/60
360/360 - 1s - loss: 0.4304 - accuracy: 0.8333
Epoch 9/60
360/360 - 1s - loss: 0.4180 - accuracy: 0.8333
Epoch 10/60
360/360 - 1s - loss: 0.4199 - accuracy: 0.8278
Epoch 11/60
360/360 - 1s - loss: 0.4092 - accuracy: 0.8361
Epoch 12/60
360/360 - 1s - loss: 0.4060 - accuracy: 0.8361
Epoch 13/60
360/360 - 1s - loss: 0.4033 - accuracy: 0.8389
Epoch 14/60
360/360 - 1s - loss: 0.4016 - accuracy: 0.8389
Epoch 15/60
360/360 - 1s - loss: 0.3984 - accuracy: 0.8333
Epoch 16/60
360/360 - 1s - loss: 0.3936 - accuracy: 0.8361
Epoch 17/60
360/360 - 1s - loss: 0.3934 - a

<tensorflow.python.keras.callbacks.History at 0x7f95c094ba20>

In [34]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network 1 - Loss: {model_loss}, Accuracy: {model_accuracy}")

120/1 - 3s - loss: 0.7005 - accuracy: 0.7333
Normal Neural Network 1 - Loss: 0.7009410381317138, Accuracy: 0.7333333492279053


Run 2 summary

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_3 (Dense)              (None, 100)               1300      
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 202       
=================================================================
Total params: 11,602
Trainable params: 11,602
Non-trainable params: 0
    
120/1 - 3s - loss: 0.7005 - accuracy: 0.7333
Normal Neural Network 1 - Loss: 0.7009410381317138, Accuracy: 0.7333333492279053


## Third run: take out Loan_Amount_Term and LoanAmount columns

In [35]:
X = train_data_limit_dummies.drop(["Loan_Status_Y", "Loan_ID", 'LoanAmount', 'Loan_Amount_Term'], axis=1)
y = train_data_limit_dummies["Loan_Status_Y"]
print(X.shape, y.shape)

(480, 11) (480,)


In [36]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [37]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [38]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [39]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [40]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [41]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [42]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=11))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [43]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [44]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 100)               1200      
_________________________________________________________________
dense_7 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_8 (Dense)              (None, 2)                 202       
Total params: 11,502
Trainable params: 11,502
Non-trainable params: 0
_________________________________________________________________


In [45]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2)

Train on 360 samples
Epoch 1/60
360/360 - 10s - loss: 0.6764 - accuracy: 0.5444
Epoch 2/60
360/360 - 1s - loss: 0.5583 - accuracy: 0.7083
Epoch 3/60
360/360 - 1s - loss: 0.5273 - accuracy: 0.7167
Epoch 4/60
360/360 - 1s - loss: 0.4945 - accuracy: 0.7944
Epoch 5/60
360/360 - 1s - loss: 0.4665 - accuracy: 0.8222
Epoch 6/60
360/360 - 1s - loss: 0.4449 - accuracy: 0.8278
Epoch 7/60
360/360 - 1s - loss: 0.4336 - accuracy: 0.8278
Epoch 8/60
360/360 - 1s - loss: 0.4251 - accuracy: 0.8278
Epoch 9/60
360/360 - 1s - loss: 0.4237 - accuracy: 0.8278
Epoch 10/60
360/360 - 1s - loss: 0.4195 - accuracy: 0.8278
Epoch 11/60
360/360 - 1s - loss: 0.4135 - accuracy: 0.8250
Epoch 12/60
360/360 - 1s - loss: 0.4191 - accuracy: 0.8222
Epoch 13/60
360/360 - 1s - loss: 0.4101 - accuracy: 0.8306
Epoch 14/60
360/360 - 1s - loss: 0.4112 - accuracy: 0.8250
Epoch 15/60
360/360 - 1s - loss: 0.4075 - accuracy: 0.8306
Epoch 16/60
360/360 - 1s - loss: 0.4072 - accuracy: 0.8306
Epoch 17/60
360/360 - 1s - loss: 0.4043 - a

<tensorflow.python.keras.callbacks.History at 0x7f95609e9b70>

In [46]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network 1 - Loss: {model_loss}, Accuracy: {model_accuracy}")

120/1 - 3s - loss: 0.6535 - accuracy: 0.7500
Normal Neural Network 1 - Loss: 0.6735727985699972, Accuracy: 0.75


Run 3 summary

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_6 (Dense)              (None, 100)               1200      
_________________________________________________________________
dense_7 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_8 (Dense)              (None, 2)                 202       
=================================================================
Total params: 11,502
Trainable params: 11,502
Non-trainable params: 0
    
120/1 - 3s - loss: 0.6535 - accuracy: 0.7500
Normal Neural Network 1 - Loss: 0.6735727985699972, Accuracy: 0.75

### Fourth run: take out Loan_Amount_Term and LoanAmount columns, add a layer - no change in accuracy

In [48]:
X = train_data_limit_dummies.drop(["Loan_Status_Y", "Loan_ID", 'LoanAmount', 'Loan_Amount_Term'], axis=1)
y = train_data_limit_dummies["Loan_Status_Y"]
print(X.shape, y.shape)

(480, 11) (480,)


In [49]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [50]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [51]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [52]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [53]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [54]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [55]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=11))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [56]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [57]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 100)               1200      
_________________________________________________________________
dense_10 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_11 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_12 (Dense)             (None, 2)                 202       
Total params: 21,602
Trainable params: 21,602
Non-trainable params: 0
_________________________________________________________________


In [58]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2)

Train on 360 samples
Epoch 1/60
360/360 - 8s - loss: 0.6059 - accuracy: 0.6944
Epoch 2/60
360/360 - 2s - loss: 0.5481 - accuracy: 0.7083
Epoch 3/60
360/360 - 2s - loss: 0.5078 - accuracy: 0.7639
Epoch 4/60
360/360 - 2s - loss: 0.4788 - accuracy: 0.8139
Epoch 5/60
360/360 - 2s - loss: 0.4517 - accuracy: 0.8306
Epoch 6/60
360/360 - 2s - loss: 0.4354 - accuracy: 0.8306
Epoch 7/60
360/360 - 2s - loss: 0.4329 - accuracy: 0.8361
Epoch 8/60
360/360 - 2s - loss: 0.4236 - accuracy: 0.8306
Epoch 9/60
360/360 - 2s - loss: 0.4185 - accuracy: 0.8278
Epoch 10/60
360/360 - 2s - loss: 0.4123 - accuracy: 0.8333
Epoch 11/60
360/360 - 2s - loss: 0.4102 - accuracy: 0.8333
Epoch 12/60
360/360 - 2s - loss: 0.4085 - accuracy: 0.8333
Epoch 13/60
360/360 - 2s - loss: 0.4025 - accuracy: 0.8333
Epoch 14/60
360/360 - 2s - loss: 0.3993 - accuracy: 0.8306
Epoch 15/60
360/360 - 2s - loss: 0.4058 - accuracy: 0.8250
Epoch 16/60
360/360 - 2s - loss: 0.3984 - accuracy: 0.8333
Epoch 17/60
360/360 - 2s - loss: 0.3943 - ac

<tensorflow.python.keras.callbacks.History at 0x7f9598450c50>

In [59]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network 1 - Loss: {model_loss}, Accuracy: {model_accuracy}")

120/1 - 2s - loss: 0.8270 - accuracy: 0.7500
Normal Neural Network 1 - Loss: 0.8330453395843506, Accuracy: 0.75


Run 4 summary

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_9 (Dense)              (None, 100)               1200      
_________________________________________________________________
dense_10 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_11 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_12 (Dense)             (None, 2)                 202       
=================================================================
Total params: 21,602
Trainable params: 21,602
Non-trainable params: 0

120/1 - 2s - loss: 0.8270 - accuracy: 0.7500
Normal Neural Network 1 - Loss: 0.8330453395843506, Accuracy: 0.75
