In [1]:
import tensorflow as tf
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV


import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical
import numpy as np

In [2]:
data = pd.read_csv('heart_failure.csv')
# print(data.info())

# print('Classes and number of values in the dataset',Counter(data['death_event']))
 
y = data["death_event"]
x = data[['age','anaemia','creatinine_phosphokinase','diabetes','ejection_fraction','high_blood_pressure','platelets','serum_creatinine','serum_sodium','sex','smoking','time']]

x  = pd.get_dummies(x)

X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.3, random_state = 0)


In [3]:
ct = ColumnTransformer([("numeric", StandardScaler(), 
                        X_train.columns 
                        )])

X_train = ct.fit_transform(X_train)
# X_train
X_test = ct.transform(X_test)
# X_test

In [4]:
le= LabelEncoder()
Y_train = le.fit_transform(Y_train.astype(str))
Y_test = le.transform(Y_test.astype(str))

# dont convert to 2d array if using for gridsearch
# # convert to 2d array
# Y_train = to_categorical(Y_train)
# Y_test = to_categorical(Y_test)

In [5]:
# model = Sequential()
# model.add(InputLayer(input_shape=(X_train.shape[1],)))
# model.add(Dense(12, activation='relu'))
# model.add(Dense(2, activation='softmax'))
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# model.fit(X_train, Y_train, epochs = 50, batch_size = 2, verbose=1)

# loss, acc = model.evaluate(X_test, Y_test, verbose=0)
# print("Loss", loss, "Accuracy:", acc)


# y_estimate = model.predict(X_test, verbose=0)
# y_estimate = np.argmax(y_estimate, axis=1)
# print(y_estimate)

# y_true = np.argmax(Y_test, axis=1)
# print(y_true)

# print(classification_report(y_true, y_estimate))

# GridSearchCV

In [7]:
# Function to create model, required for KerasClassifier
def create_model():
    model = Sequential()
    model.add(InputLayer(input_shape=(X_train.shape[1],)))
    model.add(Dense(12, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    return model

# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)

# create model
model = KerasClassifier(model=create_model, verbose=0)

In [8]:
# define the grid search parameters
batch_size = [1, 20]
epochs = [5, 10]

param_grid = dict(
                  batch_size = batch_size,
                  epochs = epochs
                  )

In [18]:
X_train

array([[ 0.81433868, -0.01153283,  0.22299372, ...,  0.74037028,
        -1.48841682,  1.48841682],
       [-0.0490202 , -0.00552695,  2.01207961, ..., -1.35067551,
         0.67185481, -0.67185481],
       [ 0.38265924, -0.01153283, -0.67154923, ...,  0.74037028,
        -1.48841682,  1.48841682],
       ...,
       [ 2.109377  , -0.49200363,  2.01207961, ..., -1.35067551,
         0.67185481, -0.67185481],
       [-0.0490202 , -0.01153283,  0.04408513, ...,  0.74037028,
        -1.48841682,  1.48841682],
       [ 0.81433868, -0.42293595,  2.01207961, ...,  0.74037028,
        -1.48841682,  1.48841682]])

In [12]:
grid = GridSearchCV(estimator = model, 
                    param_grid = param_grid,
                    n_jobs=-1,
                    verbose=1, 
                    cv=2)


grid_result = grid.fit(X_train, Y_train)

Fitting 2 folds for each of 4 candidates, totalling 8 fits


In [13]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.779853 using {'batch_size': 1, 'epochs': 5}
0.779853 (0.010623) with: {'batch_size': 1, 'epochs': 5}
0.770421 (0.018040) with: {'batch_size': 1, 'epochs': 10}
0.684020 (0.039789) with: {'batch_size': 20, 'epochs': 5}
0.612225 (0.044918) with: {'batch_size': 20, 'epochs': 10}
