In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

#**Data Preprocessing** 

In [None]:
# Read Dataset to pandas dataframe
heartdata = pd.read_csv('heart.csv')

heartdata.head()

# Assign data from first ten columns to x variable
x = heartdata.iloc[:, 0:11]

# Assign data from the eleventh column to y variable
y = heartdata.iloc[:, 11:12]

In [None]:
# Convert x from categorical to numerical
from sklearn import preprocessing
le = preprocessing.LabelEncoder()

y = y.apply(le.fit_transform)
x = x.apply(le.fit_transform)

# Recheck column values 
x.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope
0,12,1,1,41,147,0,1,98,0,10,2
1,21,0,2,55,40,0,1,82,0,20,1
2,9,1,1,31,141,0,2,25,0,10,2
3,20,0,0,39,72,0,1,34,1,25,1
4,26,1,2,49,53,0,1,48,0,10,2


# **Train, Test, Split and Feature Scaling**

In [None]:
# Train, Test, Split
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20)

# Feature Scaling (Standardization)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(x_train)

x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)
print(x_test)

[[-1.23159588 -1.90933748  1.28038485 ... -0.83846064 -0.88056038
  -0.57218741]
 [ 2.53010881  0.52374188 -0.81973242 ...  1.19266183 -0.88056038
   1.06838119]
 [ 1.56281332  0.52374188 -0.81973242 ... -0.83846064  2.27564322
  -0.57218741]
 ...
 [-0.15682311  0.52374188  0.23032621 ... -0.83846064 -0.88056038
   1.06838119]
 [-1.98393682  0.52374188  0.23032621 ... -0.83846064 -0.88056038
   1.06838119]
 [-0.26430039 -1.90933748  1.28038485 ... -0.83846064  0.59891006
   1.06838119]]


In [None]:
# Train, Test, Split
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20)

# Feature Scaling (Standardization)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(x)

X_standardized = scaler.transform(x)

data = pd.DataFrame(X_standardized)
data.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
count,918.0,918.0,918.0,918.0,918.0,918.0,918.0,918.0,918.0,918.0,918.0
mean,-7.304735000000001e-17,-9.44778e-16,-7.178958e-16,1.079988e-16,4.573925e-16,-1.009843e-15,3.085204e-15,4.88353e-16,-7.031412e-16,1.907939e-15,-4.474755e-16
std,1.000545,1.000545,1.000545,1.000545,1.000545,1.000545,1.000545,1.000545,1.000545,1.000545,1.000545
min,-2.706015,-1.938163,-0.816995,-2.33482,-1.390554,-0.5513413,-1.56671,-2.528673,-0.8235563,-1.865299,-2.24427
25%,-0.6906294,0.5159524,-0.816995,-0.7735319,-0.8242184,-0.5513413,0.01725451,-0.6817336,-0.8235563,-0.8724628,-0.5960781
50%,0.05188098,0.5159524,-0.816995,-0.1348231,-0.0109098,-0.5513413,0.01725451,0.04098187,-0.8235563,-0.2767612,-0.5960781
75%,0.6883185,0.5159524,1.275059,0.5748534,0.7385264,-0.5513413,0.01725451,0.7636973,1.214246,0.6167913,1.052114
max,2.491558,0.5159524,2.321086,2.349045,2.37366,1.813758,1.601219,2.209128,1.214246,3.297448,1.052114


# **Artificial Neural Network (ANN)**

In [None]:
# Training and Predictions
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes = (10, 10), max_iter = 2000)
mlp.fit(x_train, y_train.values.ravel())
MLPClassifier(hidden_layer_sizes = (10, 10), max_iter = 2000)

# Making Predictions
predictions = mlp.predict(x_test)

# Evaluating algorithm
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))


[[64 14]
 [19 87]]
              precision    recall  f1-score   support

           0       0.77      0.82      0.80        78
           1       0.86      0.82      0.84       106

    accuracy                           0.82       184
   macro avg       0.82      0.82      0.82       184
weighted avg       0.82      0.82      0.82       184



# **Deep Neural Network (DNN)**

In [None]:
from sklearn.model_selection import GridSearchCV, KFold
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.optimizers import Adam

Do a grid search for the optimal batch size and number of epochs

In [None]:
# Define a random seed
seed = 6
np.random.seed(seed)

# Start defining the model
def create_model():
    # create model
    model = Sequential()
    model.add(Dense(30, input_dim = 11, kernel_initializer='normal', activation='tanh'))
    model.add(Dense(20, activation='tanh'))
    model.add(Dense(1, activation='sigmoid'))
    
    # compile the model
    adam = Adam(learning_rate = 0.01)
    model.compile(loss = 'binary_crossentropy', optimizer = adam, metrics = ['accuracy'])
    return model

# create the model
model = KerasClassifier(build_fn = create_model, verbose = 1)

# define the grid search parameters
batch_size = [10, 20, 40]
epochs = [10, 50, 100]

# make a dictionary of the grid search parameters
param_grid = dict(batch_size=batch_size, epochs=epochs)

# build and fit the GridSearchCV
grid = GridSearchCV(estimator = model, param_grid = param_grid, cv = KFold(random_state=None), verbose = 10)
grid_results = grid.fit(X_standardized, y)

# summarize the results
print("Best: {0}, using {1}".format(grid_results.best_score_, grid_results.best_params_))
means = grid_results.cv_results_['mean_test_score']
stds = grid_results.cv_results_['std_test_score']
params = grid_results.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print('{0} ({1}) with: {2}'.format(mean, stdev, param))



Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5; 1/9] START batch_size=10, epochs=10....................................
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV 1/5; 1/9] END .....batch_size=10, epochs=10;, score=0.880 total time=   2.3s
[CV 2/5; 1/9] START batch_size=10, epochs=10....................................
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV 2/5; 1/9] END .....batch_size=10, epochs=10;, score=0.837 total time=   1.9s
[CV 3/5; 1/9] START batch_size=10, epochs=10....................................
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[CV 3/5; 1/9] END .....batch_size=10, epochs=10;, score=0.913 total time=   1.9s
[CV 4/5; 1/9] START batch_size=10, epochs=10....................................
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4

**Do a grid search to find the optimal number of neurons in each hidden layer**

In [None]:
# import necessary packages

# Define a random seed
seed = 6
np.random.seed(seed)

# Start defining the model
def create_model(neuron1, neuron2):
    # create model
    model = Sequential()
    model.add(Dense(neuron1, input_dim = 11, kernel_initializer= 'uniform', activation= 'linear'))
    model.add(Dense(neuron2, input_dim = neuron1, kernel_initializer= 'uniform', activation= 'linear'))
    model.add(Dense(1, activation='sigmoid'))
    
    # compile the model
    adam = Adam(learning_rate = 0.001)
    model.compile(loss = 'binary_crossentropy', optimizer = adam, metrics = ['accuracy'])
    return model

# create the model
model = KerasClassifier(build_fn = create_model, epochs = 100, batch_size = 20, verbose = 0)

# define the grid search parameters
neuron1 = [4, 8, 16]
neuron2 = [2, 4, 8]

# make a dictionary of the grid search parameters
param_grid = dict(neuron1 = neuron1, neuron2 = neuron2)

# build and fit the GridSearchCV
grid = GridSearchCV(estimator = model, param_grid = param_grid, cv = KFold(random_state=None), refit = True, verbose = 10)
grid_results = grid.fit(X_standardized, y)

# summarize the results
print("Best: {0}, using {1}".format(grid_results.best_score_, grid_results.best_params_))
means = grid_results.cv_results_['mean_test_score']
stds = grid_results.cv_results_['std_test_score']
params = grid_results.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print('{0} ({1}) with: {2}'.format(mean, stdev, param))



Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 1/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.837 total time=   4.4s
[CV 2/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 2/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.848 total time=   5.7s
[CV 3/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 3/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.870 total time=   5.7s
[CV 4/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 4/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.809 total time=   6.0s
[CV 5/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 5/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.770 total time=   5.7s
[CV 1/5; 2/9] START neuron1=4, neuron2=4........................................
[CV 1/5; 2/9] END .........neuron1=4, neuron2=4;,

**Generate predictions with optimal hyperparameters**

In [None]:
y_pred = grid.predict(X_standardized)

In [None]:
print(y_pred.shape)

(918, 1)


In [None]:
print(y_pred[:5])

[[0]
 [0]
 [0]
 [1]
 [0]]


**Generate a classification report**

In [None]:
from sklearn.metrics import classification_report, accuracy_score

print(accuracy_score(y, y_pred))
print(classification_report(y, y_pred))

0.8496732026143791
              precision    recall  f1-score   support

           0       0.84      0.82      0.83       410
           1       0.86      0.87      0.87       508

    accuracy                           0.85       918
   macro avg       0.85      0.85      0.85       918
weighted avg       0.85      0.85      0.85       918

