In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing
import warnings
# Ignore all warnings
warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt 
import seaborn as sns

from sklearn.model_selection import train_test_split

import tensorflow as tf

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import SGD
from tensorflow.keras.layers import BatchNormalization
from sklearn.metrics import classification_report


In [2]:
df = pd.read_csv('heart.csv')

In [3]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


### Data Verification

In [4]:
df.isna().sum() #checking for null values

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [5]:
df.duplicated().sum() #checking for duplicate values

723

In [6]:
round(df['target'].value_counts(normalize=True),2) #checking target feature distribution

1    0.51
0    0.49
Name: target, dtype: float64

### Data Preprocessing (scaling & encoding)

In [7]:
df_us = df[['age','trestbps','chol','thalach','oldpeak']]
df_ue = df[['cp','slope','ca','thal']]
df_bi = df[['sex','fbs','exang']]

In [8]:
from sklearn.preprocessing import StandardScaler

norm = StandardScaler()

scaled_array = norm.fit_transform(df_us)

df_s = pd.DataFrame(scaled_array,columns = df_us.columns)

In [9]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse_output=False, drop='first')
encoded_data = encoder.fit_transform(df_ue)

df_encoded = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(df_ue.columns))

In [10]:
df_encoded

Unnamed: 0,cp_1,cp_2,cp_3,slope_1,slope_2,ca_1,ca_2,ca_3,ca_4,thal_1,thal_2,thal_3
0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
1020,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1021,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
1022,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1023,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [11]:
df_s

Unnamed: 0,age,trestbps,chol,thalach,oldpeak
0,-0.268437,-0.377636,-0.659332,0.821321,-0.060888
1,-0.158157,0.479107,-0.833861,0.255968,1.727137
2,1.716595,0.764688,-1.396233,-1.048692,1.301417
3,0.724079,0.936037,-0.833861,0.516900,-0.912329
4,0.834359,0.364875,0.930822,-1.874977,0.705408
...,...,...,...,...,...
1020,0.503520,0.479107,-0.484803,0.647366,-0.912329
1021,0.613800,-0.377636,0.232705,-0.352873,1.471705
1022,-0.819834,-1.234378,0.562371,-1.353113,-0.060888
1023,-0.488996,-1.234378,0.155137,0.429923,-0.912329


In [12]:
X = pd.concat([df_s,df_encoded,df_bi], axis = 1)

In [13]:
y = df['target'].copy()

### Model Building

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size =0.2, random_state = 42)

In [15]:
X_train = tf.convert_to_tensor(X_train)
X_test = tf.convert_to_tensor(X_test)
y_train = tf.convert_to_tensor(y_train)
y_test = tf.convert_to_tensor(y_test)

In [16]:
model = keras.models.Sequential()
model.add(keras.layers.Dense(50, input_shape=(20,), activation= "relu"))#hidden layer with 300 neurons with relu activation15
model.add(BatchNormalization())
model.add(keras.layers.Dense(75, activation = "relu"))#hidden layer with 100 neurons with relu activation
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(keras.layers.Dense(1, activation = "sigmoid"))

In [17]:
model.summary()

In [18]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

model.compile(optimizer=optimizer,
             loss = "binary_crossentropy",
             metrics= ["accuracy"])

In [19]:
model.fit(X_train, y_train, validation_data=(X_test, y_test),epochs=150)

Epoch 1/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5819 - loss: 0.8405 - val_accuracy: 0.6585 - val_loss: 0.6427
Epoch 2/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6083 - loss: 0.7712 - val_accuracy: 0.6585 - val_loss: 0.6213
Epoch 3/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6832 - loss: 0.6488 - val_accuracy: 0.7268 - val_loss: 0.5978
Epoch 4/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7222 - loss: 0.5942 - val_accuracy: 0.7415 - val_loss: 0.5755
Epoch 5/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7379 - loss: 0.5514 - val_accuracy: 0.7415 - val_loss: 0.5538
Epoch 6/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7583 - loss: 0.5132 - val_accuracy: 0.7415 - val_loss: 0.5344
Epoch 7/150
[1m26/26[0m [32m━━━

[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9283 - loss: 0.2053 - val_accuracy: 0.8488 - val_loss: 0.2923
Epoch 52/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9304 - loss: 0.2070 - val_accuracy: 0.8537 - val_loss: 0.2880
Epoch 53/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9168 - loss: 0.2133 - val_accuracy: 0.8537 - val_loss: 0.2847
Epoch 54/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9210 - loss: 0.2061 - val_accuracy: 0.8537 - val_loss: 0.2813
Epoch 55/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9190 - loss: 0.2123 - val_accuracy: 0.8537 - val_loss: 0.2790
Epoch 56/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9232 - loss: 0.1946 - val_accuracy: 0.8585 - val_loss: 0.2767
Epoch 57/150
[1m26/26[0m [32m━━━━━━━━━

[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9602 - loss: 0.1160 - val_accuracy: 0.9171 - val_loss: 0.1679
Epoch 102/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9657 - loss: 0.1291 - val_accuracy: 0.9268 - val_loss: 0.1667
Epoch 103/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9660 - loss: 0.1035 - val_accuracy: 0.9268 - val_loss: 0.1655
Epoch 104/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9698 - loss: 0.1182 - val_accuracy: 0.9268 - val_loss: 0.1638
Epoch 105/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9449 - loss: 0.1362 - val_accuracy: 0.9171 - val_loss: 0.1622
Epoch 106/150
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9683 - loss: 0.1058 - val_accuracy: 0.9268 - val_loss: 0.1613
Epoch 107/150
[1m26/26[0m [32m━━━

<keras.src.callbacks.history.History at 0x14fbf524fa0>

In [20]:
y_prob = model.predict(X_test)

print("Validation Report:")
print(classification_report(y_test, y_prob.round())) 

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
Validation Report:
              precision    recall  f1-score   support

           0       0.99      0.92      0.95       102
           1       0.93      0.99      0.96       103

    accuracy                           0.96       205
   macro avg       0.96      0.96      0.96       205
weighted avg       0.96      0.96      0.96       205

