In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("./dataset/pima-indians-diabetes.csv", header=None)
x = df.values[:,0:-1]
y = df.values[:,-1]

In [3]:
print(x.shape, y.shape)

(768, 8) (768,)


In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y,
                                                    test_size=0.3, 
                                                    random_state=789)
print(x_train.shape, y_train.shape)
print(np.unique(y_train, return_counts=True))
print(x_train[:2,:])

(537, 8) (537,)
(array([0., 1.]), array([350, 187], dtype=int64))
[[  2.     71.     70.     27.      0.     28.      0.586  22.   ]
 [  1.    199.     76.     43.      0.     42.9     1.394  22.   ]]


In [5]:
model = Sequential()
model.add(Dense(10, input_dim=8, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                90        
_________________________________________________________________
dense_1 (Dense)              (None, 10)                110       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                110       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 11        
Total params: 321
Trainable params: 321
Non-trainable params: 0
_________________________________________________________________


In [7]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

early_stopping_cb = EarlyStopping(monitor='val_loss', patience=20)
check_pt = ModelCheckpoint(filepath="./pima_model.hdf5",
                           monitor='val_loss', verbose=1, save_best_only=True)
model.fit(x_train, y_train, validation_data=(x_test,y_test), epochs=1000, 
          batch_size=200, callbacks=[early_stopping_cb, check_pt])

Epoch 1/1000
Epoch 00001: val_loss improved from inf to 0.64058, saving model to .\pima_model.hdf5
Epoch 2/1000
Epoch 00002: val_loss improved from 0.64058 to 0.60566, saving model to .\pima_model.hdf5
Epoch 3/1000
Epoch 00003: val_loss did not improve from 0.60566
Epoch 4/1000
Epoch 00004: val_loss did not improve from 0.60566
Epoch 5/1000
Epoch 00005: val_loss did not improve from 0.60566
Epoch 6/1000
Epoch 00006: val_loss did not improve from 0.60566
Epoch 7/1000
Epoch 00007: val_loss did not improve from 0.60566
Epoch 8/1000
Epoch 00008: val_loss did not improve from 0.60566
Epoch 9/1000
Epoch 00009: val_loss did not improve from 0.60566
Epoch 10/1000
Epoch 00010: val_loss improved from 0.60566 to 0.59250, saving model to .\pima_model.hdf5
Epoch 11/1000
Epoch 00011: val_loss improved from 0.59250 to 0.59084, saving model to .\pima_model.hdf5
Epoch 12/1000
Epoch 00012: val_loss did not improve from 0.59084
Epoch 13/1000
Epoch 00013: val_loss did not improve from 0.59084
Epoch 14/100

Epoch 31/1000
Epoch 00031: val_loss did not improve from 0.59084


<tensorflow.python.keras.callbacks.History at 0x1bcef620188>

In [8]:
df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [7]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(x_train)
x_tr_std = scaler.transform(x_train)
x_te_std = scaler.transform(x_test)

In [8]:
model = Sequential()
model.add(Dense(10, input_dim=8, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 10)                90        
_________________________________________________________________
dense_5 (Dense)              (None, 10)                110       
_________________________________________________________________
dense_6 (Dense)              (None, 10)                110       
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 11        
Total params: 321
Trainable params: 321
Non-trainable params: 0
_________________________________________________________________


In [9]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

early_stopping_cb = EarlyStopping(monitor='val_loss', patience=20)
check_pt = ModelCheckpoint(filepath="./pima_std_model.hdf5",
                           monitor='val_loss', verbose=1, save_best_only=True)
model.fit(x_tr_std, y_train, validation_data=(x_te_std,y_test), epochs=1000, 
          batch_size=200, callbacks=[early_stopping_cb, check_pt])

Epoch 1/1000
Epoch 00001: val_loss improved from inf to 0.73508, saving model to .\pima_std_model.hdf5
Epoch 2/1000
Epoch 00002: val_loss improved from 0.73508 to 0.72146, saving model to .\pima_std_model.hdf5
Epoch 3/1000
Epoch 00003: val_loss improved from 0.72146 to 0.70886, saving model to .\pima_std_model.hdf5
Epoch 4/1000
Epoch 00004: val_loss improved from 0.70886 to 0.69740, saving model to .\pima_std_model.hdf5
Epoch 5/1000
Epoch 00005: val_loss improved from 0.69740 to 0.68723, saving model to .\pima_std_model.hdf5
Epoch 6/1000
Epoch 00006: val_loss improved from 0.68723 to 0.67800, saving model to .\pima_std_model.hdf5
Epoch 7/1000
Epoch 00007: val_loss improved from 0.67800 to 0.67003, saving model to .\pima_std_model.hdf5
Epoch 8/1000
Epoch 00008: val_loss improved from 0.67003 to 0.66320, saving model to .\pima_std_model.hdf5
Epoch 9/1000
Epoch 00009: val_loss improved from 0.66320 to 0.65697, saving model to .\pima_std_model.hdf5
Epoch 10/1000
Epoch 00010: val_loss impro

Epoch 27/1000
Epoch 00027: val_loss improved from 0.59029 to 0.58712, saving model to .\pima_std_model.hdf5
Epoch 28/1000
Epoch 00028: val_loss improved from 0.58712 to 0.58390, saving model to .\pima_std_model.hdf5
Epoch 29/1000
Epoch 00029: val_loss improved from 0.58390 to 0.58056, saving model to .\pima_std_model.hdf5
Epoch 30/1000
Epoch 00030: val_loss improved from 0.58056 to 0.57725, saving model to .\pima_std_model.hdf5
Epoch 31/1000
Epoch 00031: val_loss improved from 0.57725 to 0.57409, saving model to .\pima_std_model.hdf5
Epoch 32/1000
Epoch 00032: val_loss improved from 0.57409 to 0.57114, saving model to .\pima_std_model.hdf5
Epoch 33/1000
Epoch 00033: val_loss improved from 0.57114 to 0.56817, saving model to .\pima_std_model.hdf5
Epoch 34/1000
Epoch 00034: val_loss improved from 0.56817 to 0.56541, saving model to .\pima_std_model.hdf5
Epoch 35/1000
Epoch 00035: val_loss improved from 0.56541 to 0.56253, saving model to .\pima_std_model.hdf5
Epoch 36/1000
Epoch 00036: v

Epoch 53/1000
Epoch 00053: val_loss improved from 0.53423 to 0.53354, saving model to .\pima_std_model.hdf5
Epoch 54/1000
Epoch 00054: val_loss improved from 0.53354 to 0.53307, saving model to .\pima_std_model.hdf5
Epoch 55/1000
Epoch 00055: val_loss improved from 0.53307 to 0.53232, saving model to .\pima_std_model.hdf5
Epoch 56/1000
Epoch 00056: val_loss improved from 0.53232 to 0.53160, saving model to .\pima_std_model.hdf5
Epoch 57/1000
Epoch 00057: val_loss improved from 0.53160 to 0.53119, saving model to .\pima_std_model.hdf5
Epoch 58/1000
Epoch 00058: val_loss improved from 0.53119 to 0.53092, saving model to .\pima_std_model.hdf5
Epoch 59/1000
Epoch 00059: val_loss did not improve from 0.53092
Epoch 60/1000
Epoch 00060: val_loss did not improve from 0.53092
Epoch 61/1000
Epoch 00061: val_loss improved from 0.53092 to 0.53071, saving model to .\pima_std_model.hdf5
Epoch 62/1000
Epoch 00062: val_loss improved from 0.53071 to 0.53059, saving model to .\pima_std_model.hdf5
Epoch 

Epoch 82/1000
Epoch 00082: val_loss did not improve from 0.53046
Epoch 83/1000
Epoch 00083: val_loss did not improve from 0.53046


<tensorflow.python.keras.callbacks.History at 0x25e42818dc8>

In [10]:
from tensorflow.keras.models import load_model

raw_model = load_model("./pima_model.hdf5")
std_model = load_model("./pima_std_model.hdf5")

print(raw_model.evaluate(x_test, y_test))
print(std_model.evaluate(x_te_std, y_test))

[0.6540851593017578, 0.6406926512718201]
[0.5304639339447021, 0.761904776096344]
