In [1]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
import pandas as pd
import numpy as np

In [2]:
def create_model():
    model = Sequential()
    model.add(Dense(12, input_dim = 8, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # compile
    model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    return model

In [3]:
data = pd.read_csv('./diabetes.csv', sep=',')
X = data.iloc[:,0:8]
y = data.iloc[:,8]

In [4]:
model = KerasClassifier(build_fn=create_model,epochs=150, batch_size=10, verbose=0)

In [5]:
# StratifiedKFold conserva la distribución de clases del conjunto de datos en las divisiones
# Recuerda el concepto de startificado
kfold = StratifiedKFold(n_splits = 10, shuffle = True)

In [6]:
result = cross_val_score(model, X, y, cv=kfold)

**Existe un accuracy de 73%**

In [7]:
print(result.mean())

0.7006322562694549


# Optimizacion de hiperparametros

In [8]:
from sklearn.model_selection import GridSearchCV

In [9]:
def create_model(optimizer = 'rmsprop', init='glorot_uniform'):
    model = Sequential()
    model.add(Dense(12, input_dim = 8, kernel_initializer=init, activation='relu'))
    model.add(Dense(8, kernel_initializer = init, activation='relu'))
    model.add(Dense(1, kernel_initializer = init, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics = ['accuracy'])
    return model

data = pd.read_csv('./diabetes.csv', sep=',')
X = data.iloc[:,0:8]
y = data.iloc[:,8]

model = KerasClassifier(build_fn=create_model, verbose=0)

optimizers = ['rmsprop','adam']
inits = ['glorot_uniform','normal','uniform']
epoch = [50, 100, 150]
batches = [5,10,20]

param_grid = dict(optimizer=optimizers, epochs=epoch, batch_size=batches, init=inits)

grid = GridSearchCV(estimator = model, param_grid = param_grid, cv = 3)
grid_result = grid.fit(X,y)



In [10]:
print("Mejor: %f usando %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) con %r" % (mean, stdev, param) )

Mejor: 0.746094 usando {'batch_size': 5, 'epochs': 150, 'init': 'normal', 'optimizer': 'adam'}
0.645833 (0.030978) con {'batch_size': 5, 'epochs': 50, 'init': 'glorot_uniform', 'optimizer': 'rmsprop'}
0.677083 (0.012075) con {'batch_size': 5, 'epochs': 50, 'init': 'glorot_uniform', 'optimizer': 'adam'}
0.703125 (0.031412) con {'batch_size': 5, 'epochs': 50, 'init': 'normal', 'optimizer': 'rmsprop'}
0.720052 (0.001841) con {'batch_size': 5, 'epochs': 50, 'init': 'normal', 'optimizer': 'adam'}
0.692708 (0.018414) con {'batch_size': 5, 'epochs': 50, 'init': 'uniform', 'optimizer': 'rmsprop'}
0.682292 (0.038051) con {'batch_size': 5, 'epochs': 50, 'init': 'uniform', 'optimizer': 'adam'}
0.703125 (0.014616) con {'batch_size': 5, 'epochs': 100, 'init': 'glorot_uniform', 'optimizer': 'rmsprop'}
0.718750 (0.037603) con {'batch_size': 5, 'epochs': 100, 'init': 'glorot_uniform', 'optimizer': 'adam'}
0.727865 (0.027866) con {'batch_size': 5, 'epochs': 100, 'init': 'normal', 'optimizer': 'rmsprop'