In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, confusion_matrix
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.utils import to_categorical

Using TensorFlow backend.


In [2]:
# =====Read data======
# Dataset link-> https://www.kaggle.com/sulianova/cardiovascular-disease-dataset
rootPath='DS1/'
dfDS = pd.read_csv(rootPath+"cardio.csv", delimiter=';')

In [3]:
#=====Feature Scaling======
columnsToScale = ['age', 'height', 'weight', 'ap_hi', 'ap_lo']
scaler = MinMaxScaler(feature_range=(0,1))
dfDS[columnsToScale] = scaler.fit_transform(dfDS[columnsToScale])

In [4]:
#=====Split Dataset=======
X = dfDS.iloc[:, 1:len(dfDS.columns)-1].values
Y = dfDS["cardio"].values
xTrain, xTest, yTrain, yTest = train_test_split(X, Y, test_size=0.20)

yTrain = to_categorical(yTrain)
yTest = to_categorical(yTest)

In [5]:
verbose, epochs = 1, 10

def getModel(optimizer='adam', activation='relu'):
    model = Sequential()
    model.add(Dense(100, input_dim = xTrain.shape[1], activation=activation))
    model.add(Dense(50, activation=activation))
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])    
    return model

In [6]:
def showResults(test, pred):
    target_names = ['positive', 'negative']
    # print(classification_report(test, pred, target_names=target_names))
    accuracy = accuracy_score(test, pred)
    precision=precision_score(test, pred, average='weighted')
    f1Score=f1_score(test, pred, average='weighted') 
    print("Accuracy  : {}".format(accuracy))
    print("Precision : {}".format(precision))
    print("f1Score : {}".format(f1Score))
    cm=confusion_matrix(test, pred)
    print(cm)    

In [7]:
# Create the sklearn model for the network
model_init_batch_epoch_CV = KerasClassifier(build_fn=getModel, verbose=1)

# Select the hyper-parameter values
batches = [64, 512, 1024]
optimizer = ['sgd','adam','rmsprop','Adagrad']
activationFunction = ['relu','selu','elu']

# Grid search for initializer, batch size and number of epochs
param_grid = dict(optimizer=optimizer, 
                  batch_size=batches, 
                  activation=activationFunction)
grid = GridSearchCV(estimator=model_init_batch_epoch_CV, 
                    param_grid=param_grid,
                    cv=3, return_train_score=True)
grid_result = grid.fit(xTrain, yTrain)

Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples


Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples


Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples


Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 37333 samples
Train on 37333 samples


Train on 37334 samples
Train on 37333 samples
Train on 37333 samples
Train on 37334 samples
Train on 56000 samples


In [8]:
# Print Results
print("===========RESULTS============")
print(f'Best Accuracy for {grid_result.best_score_:.4} using {grid_result.best_params_}')
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean_accuracy, stdev, param in zip(means, stds, params):
    print(f'mean={mean_accuracy:.4}, std={stdev:.4} using {param}')

Best Accuracy for 0.6388 using {'activation': 'selu', 'batch_size': 64, 'optimizer': 'adam'}
mean=0.5895, std=0.003602 using {'activation': 'relu', 'batch_size': 64, 'optimizer': 'sgd'}
mean=0.6344, std=0.003056 using {'activation': 'relu', 'batch_size': 64, 'optimizer': 'adam'}
mean=0.6301, std=0.009361 using {'activation': 'relu', 'batch_size': 64, 'optimizer': 'rmsprop'}
mean=0.6018, std=0.00179 using {'activation': 'relu', 'batch_size': 64, 'optimizer': 'Adagrad'}
mean=0.5665, std=0.01677 using {'activation': 'relu', 'batch_size': 512, 'optimizer': 'sgd'}
mean=0.6117, std=0.008841 using {'activation': 'relu', 'batch_size': 512, 'optimizer': 'adam'}
mean=0.6172, std=0.007465 using {'activation': 'relu', 'batch_size': 512, 'optimizer': 'rmsprop'}
mean=0.5912, std=0.005585 using {'activation': 'relu', 'batch_size': 512, 'optimizer': 'Adagrad'}
mean=0.5306, std=0.04411 using {'activation': 'relu', 'batch_size': 1024, 'optimizer': 'sgd'}
mean=0.6025, std=0.004708 using {'activation': 'r