In [7]:
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
import pandas as pd
seed = 7
np.random.seed(seed)

In [20]:
# load in the dataset
dataset = pd.read_csv('diabetes.csv', delimiter=',').values
print('Shape of data\n',dataset.shape)

# Splitting the dataset in to features and target
X = dataset[:,:8]
y = dataset[:,8]

Shape of data
 (768, 9)


In [94]:
# creating a model with keras
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [95]:
# compiling the model 
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [96]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2,)

In [97]:
# training the model
model.fit(X_train,y_train, epochs= 15, batch_size=10, validation_data=(X_test,y_test))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x259695df8b0>

In [98]:
score = model.evaluate(X_test,y_test)
print(score)

[0.82732754945755, 0.5844155550003052]


In [99]:
score[1]*100

58.44155550003052

In [48]:
model.metrics_names

['loss', 'accuracy']

In [100]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=10)

In [101]:
for train_index, val_index in skf.split(X,y):
    train_scores = []
    val_scores = []
    # getting the splitted train and validation set
    train_set, train_target = X[train_index], y[train_index]
    val_set, val_target = X[val_index], y[val_index]
    
    # create the model
    model = Sequential()
    model.add(Dense(12, input_dim=8, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    # compile the model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fitting the model on the train set
    model.fit(train_set,train_target, verbose=0, epochs=150, batch_size=10)
    
    # evaluating the model
    train_score = model.evaluate(train_set,train_target, verbose=0)
    val_score = model.evaluate(val_set,val_target, verbose=0)
    
    train_scores.append(train_score[1])
    val_scores.append(val_score[1])
    
    # printing the train and validation score for each fold
    print('%s:> Train: %.2f%%       val: %.2f%% ' % (model.metrics_names[1], train_score[1]*100, val_score[1]*100))

# printing the avaerage train and validation scores
print('%s:> Average Train: %.2f%% (+/-) %.2f%%      Average Val: %.2f%% (+/-) %.2f%% ' % (model.metrics_names[1], np.mean(train_scores)*100,np.std(train_scores)*100, np.mean(val_scores)*100, np.std(val_scores)*100))

accuracy:> Train: 70.19%       val: 68.83% 
accuracy:> Train: 75.40%       val: 71.43% 
accuracy:> Train: 77.28%       val: 72.73% 
accuracy:> Train: 75.11%       val: 72.73% 
accuracy:> Train: 75.69%       val: 74.03% 
accuracy:> Train: 72.36%       val: 71.43% 
accuracy:> Train: 76.85%       val: 75.32% 
accuracy:> Train: 78.44%       val: 75.32% 
accuracy:> Train: 77.46%       val: 75.00% 
accuracy:> Train: 76.16%       val: 76.32% 
accuracy:> Average Train: 76.16% (+/-) 0.00%      Average Val: 76.32% (+/-) 0.00% 


Using Keras with sci-kit learn

In [102]:
from sklearn.model_selection import cross_validate, cross_val_score
from keras.wrappers.scikit_learn import KerasClassifier

In [113]:
def build_model():
    model = Sequential()
    model.add(Dense(units=12, input_dim=8, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.summary()
    return model

In [114]:
model = KerasClassifier(build_fn=build_model, verbose=0, epochs=150, batch_size=10)

In [110]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [106]:
results = cross_val_score(model,X,y, cv=kf)

In [107]:
results.mean()

0.7395891785621643