In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [23]:
dataset = pd.read_csv("processed.cleveland.data", header=None)
dataset.columns = [
    "age",
    "sex",
    "cp",
    "restbp",
    "chol",
    "fbs",
    "restecg",
    "thalach",
    "exang",
    "oldpeak",
    "slope",
    "ca",
    "thal",
    "hd",
]
dataset.head()

Unnamed: 0,age,sex,cp,restbp,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,hd
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0


In [24]:
dataset.dtypes


age        float64
sex        float64
cp         float64
restbp     float64
chol       float64
fbs        float64
restecg    float64
thalach    float64
exang      float64
oldpeak    float64
slope      float64
ca          object
thal        object
hd           int64
dtype: object

In [25]:
dataset['ca'].unique()

array(['0.0', '3.0', '2.0', '1.0', '?'], dtype=object)

In [26]:
dataset['thal'].unique()

array(['6.0', '3.0', '7.0', '?'], dtype=object)

In [27]:
df = dataset.loc[(dataset["ca"] != "?") & (dataset["thal"] != "?")]

In [28]:
X = df.iloc[:, :-1]
X.head()

Unnamed: 0,age,sex,cp,restbp,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0


In [29]:
y = df.iloc[:, [-1]]
y.head()

Unnamed: 0,hd
0,0
1,2
2,1
3,0
4,0


In [30]:
y['hd'].unique()

array([0, 2, 1, 3, 4], dtype=int64)

In [31]:
X_ohe = pd.get_dummies(X, columns=["cp", "restecg", "slope", "thal"])


In [32]:
X_ohe.head()

Unnamed: 0,age,sex,restbp,chol,fbs,thalach,exang,oldpeak,ca,cp_1.0,...,cp_4.0,restecg_0.0,restecg_1.0,restecg_2.0,slope_1.0,slope_2.0,slope_3.0,thal_3.0,thal_6.0,thal_7.0
0,63.0,1.0,145.0,233.0,1.0,150.0,0.0,2.3,0.0,1,...,0,0,0,1,0,0,1,0,1,0
1,67.0,1.0,160.0,286.0,0.0,108.0,1.0,1.5,3.0,0,...,1,0,0,1,0,1,0,1,0,0
2,67.0,1.0,120.0,229.0,0.0,129.0,1.0,2.6,2.0,0,...,1,0,0,1,0,1,0,0,0,1
3,37.0,1.0,130.0,250.0,0.0,187.0,0.0,3.5,0.0,0,...,0,1,0,0,0,0,1,1,0,0
4,41.0,0.0,130.0,204.0,0.0,172.0,0.0,1.4,0.0,0,...,0,0,0,1,1,0,0,1,0,0


In [33]:
X_ohe.columns


Index(['age', 'sex', 'restbp', 'chol', 'fbs', 'thalach', 'exang', 'oldpeak',
       'ca', 'cp_1.0', 'cp_2.0', 'cp_3.0', 'cp_4.0', 'restecg_0.0',
       'restecg_1.0', 'restecg_2.0', 'slope_1.0', 'slope_2.0', 'slope_3.0',
       'thal_3.0', 'thal_6.0', 'thal_7.0'],
      dtype='object')

In [34]:
X_ohe = X_ohe.drop(['cp_1.0', 'restecg_0.0', 'slope_1.0', 'thal_3.0'], axis = 1)

In [35]:
y_ohe = pd.get_dummies(y, columns=['hd'])

In [36]:
y_ohe.head()

Unnamed: 0,hd_0,hd_1,hd_2,hd_3,hd_4
0,1,0,0,0,0
1,0,0,1,0,0
2,0,1,0,0,0
3,1,0,0,0,0
4,1,0,0,0,0


In [37]:
y_ohe.columns

Index(['hd_0', 'hd_1', 'hd_2', 'hd_3', 'hd_4'], dtype='object')

In [38]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_ohe, y_ohe, test_size=0.3, random_state=0
)

In [39]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [40]:
X_train.shape

(207, 18)

In [41]:
y_train.shape

(207, 5)

In [42]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score

In [43]:
def build_classifier():
    classifier = Sequential()
    classifier.add(keras.Input(shape=(18,)))
    classifier.add(Dense(1024, activation="relu"))
    classifier.add(Dropout(0.1))
    classifier.add(Dense(512, activation="relu"))
    classifier.add(Dropout(0.1))
    classifier.add(Dense(512, activation="relu"))
    classifier.add(Dropout(0.1))
    classifier.add(Dense(5, activation="softmax"))
    classifier.compile(
        optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
    )
    return classifier

In [44]:
classifier = KerasClassifier(build_fn=build_classifier, epochs=70, batch_size=10)
accuracies = cross_val_score(
    estimator=classifier, X=X_train, y=y_train, cv=5, n_jobs=-1
)

In [45]:
mean = accuracies.mean()
variance = accuracies.std()
print(mean)
print(variance)

0.5319396051103368
0.053422461108998336


In [46]:
classifier.fit(X_train, y_train)



Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70


KerasClassifier(
	model=None
	build_fn=<function build_classifier at 0x00000206C18FAC20>
	warm_start=False
	random_state=None
	optimizer=rmsprop
	loss=None
	metrics=None
	batch_size=10
	validation_batch_size=None
	verbose=1
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=70
	class_weight=None
)

In [51]:
y_pred = classifier.predict(X_test)



In [52]:
from sklearn.metrics import accuracy_score


In [53]:
acc = accuracy_score(y_true=y_test, y_pred=y_pred) * 100
print('Accuracy score:', acc)

Accuracy score: 50.0
