In [50]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [3]:
df = pd.read_csv('heart.csv')

In [5]:
df.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,Sex,AgeCategory,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,No,16.6,Yes,No,No,3.0,30.0,Female,55-59,Yes,Yes,Very good,5.0,Yes,No,Yes
1,No,20.34,No,No,Yes,0.0,0.0,Female,80 or older,No,Yes,Very good,7.0,No,No,No
2,No,26.58,Yes,No,No,20.0,30.0,Male,65-69,Yes,Yes,Fair,8.0,Yes,No,No
3,No,24.21,No,No,No,0.0,0.0,Female,75-79,No,No,Good,6.0,No,No,Yes
4,No,23.71,No,No,No,28.0,0.0,Female,40-44,No,Yes,Very good,8.0,No,No,No


In [4]:
df = df.iloc[: , [0,1,2,3,4,5,6,8,9,11,12,13,14,15,16,17]]

In [6]:
df.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,Sex,AgeCategory,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,No,16.6,Yes,No,No,3.0,30.0,Female,55-59,Yes,Yes,Very good,5.0,Yes,No,Yes
1,No,20.34,No,No,Yes,0.0,0.0,Female,80 or older,No,Yes,Very good,7.0,No,No,No
2,No,26.58,Yes,No,No,20.0,30.0,Male,65-69,Yes,Yes,Fair,8.0,Yes,No,No
3,No,24.21,No,No,No,0.0,0.0,Female,75-79,No,No,Good,6.0,No,No,Yes
4,No,23.71,No,No,No,28.0,0.0,Female,40-44,No,Yes,Very good,8.0,No,No,No


In [7]:
columns = ['HeartDisease', 'Smoking', 'AlcoholDrinking', 'Stroke', 'Diabetic', 'PhysicalActivity', 'Asthma', 'KidneyDisease', 'SkinCancer']
df[columns] = df[columns].apply(lambda x: x.map({'Yes': 1, 'No': 0}))

In [8]:
df.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,Sex,AgeCategory,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0,16.6,1,0,0,3.0,30.0,Female,55-59,1.0,1,Very good,5.0,1,0,1
1,0,20.34,0,0,1,0.0,0.0,Female,80 or older,0.0,1,Very good,7.0,0,0,0
2,0,26.58,1,0,0,20.0,30.0,Male,65-69,1.0,1,Fair,8.0,1,0,0
3,0,24.21,0,0,0,0.0,0.0,Female,75-79,0.0,0,Good,6.0,0,0,1
4,0,23.71,0,0,0,28.0,0.0,Female,40-44,0.0,1,Very good,8.0,0,0,0


In [21]:
df['GenHealth'].value_counts()

GenHealth
Very good    113858
Good          93129
Excellent     66842
Fair          34677
Poor          11289
Name: count, dtype: int64

In [13]:
df['Sex'] = df['Sex'].map({'Male': 1, 'Female': 0})

In [20]:
df['AgeCategory'].dtype

dtype('O')

In [30]:
health_map = {
    'Very good': 4,   
    'Good': 3, 
    'Excellent': 2, 
    'Fair': 1,
    'Poor': 0
}

df['GenHealth'] = df['GenHealth'].map(health_map)


In [31]:
df.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,Sex,AgeCategory,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer,Genealth
0,0,16.6,1,0,0,3.0,30.0,0,55-59,1.0,1,4,5.0,1,0,1,4
1,0,20.34,0,0,1,0.0,0.0,0,80 or older,0.0,1,4,7.0,0,0,0,4
2,0,26.58,1,0,0,20.0,30.0,1,65-69,1.0,1,1,8.0,1,0,0,1
3,0,24.21,0,0,0,0.0,0.0,0,75-79,0.0,0,3,6.0,0,0,1,3
4,0,23.71,0,0,0,28.0,0.0,0,40-44,0.0,1,4,8.0,0,0,0,4


In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 319795 entries, 0 to 319794
Data columns (total 17 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   HeartDisease      319795 non-null  int64  
 1   BMI               319795 non-null  float64
 2   Smoking           319795 non-null  int64  
 3   AlcoholDrinking   319795 non-null  int64  
 4   Stroke            319795 non-null  int64  
 5   PhysicalHealth    319795 non-null  float64
 6   MentalHealth      319795 non-null  float64
 7   Sex               319795 non-null  int64  
 8   AgeCategory       319795 non-null  object 
 9   Diabetic          310455 non-null  float64
 10  PhysicalActivity  319795 non-null  int64  
 11  GenHealth         319795 non-null  int64  
 12  SleepTime         319795 non-null  float64
 13  Asthma            319795 non-null  int64  
 14  KidneyDisease     319795 non-null  int64  
 15  SkinCancer        319795 non-null  int64  
 16  Genealth          31

In [33]:
df['AgeCategory'].value_counts()


AgeCategory
65-69          34151
60-64          33686
70-74          31065
55-59          29757
50-54          25382
80 or older    24153
45-49          21791
75-79          21482
18-24          21064
40-44          21006
35-39          20550
30-34          18753
25-29          16955
Name: count, dtype: int64

In [35]:
age_map = {
    '18-24': 21,
    '25-29': 27,
    '30-34': 32,
    '35-39': 37,
    '40-44': 42,
    '45-49': 47,
    '50-54': 52,
    '55-59': 57,
    '60-64': 62,
    '65-69': 67,
    '70-74': 72,
    '75-79': 77,
    '80 or older': 85 
}

df['Age'] = df['AgeCategory'].map(age_map)


In [36]:
df.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,Sex,AgeCategory,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer,Genealth,Age
0,0,16.6,1,0,0,3.0,30.0,0,55-59,1.0,1,4,5.0,1,0,1,4,57
1,0,20.34,0,0,1,0.0,0.0,0,80 or older,0.0,1,4,7.0,0,0,0,4,85
2,0,26.58,1,0,0,20.0,30.0,1,65-69,1.0,1,1,8.0,1,0,0,1,67
3,0,24.21,0,0,0,0.0,0.0,0,75-79,0.0,0,3,6.0,0,0,1,3,77
4,0,23.71,0,0,0,28.0,0.0,0,40-44,0.0,1,4,8.0,0,0,0,4,42


In [37]:
dataframe = df.drop('AgeCategory', axis=1)


In [56]:
dataframe.shape

(319795, 17)

In [42]:
X = dataframe.drop('HeartDisease' , axis = 1)
y = dataframe['HeartDisease']

In [44]:
X_train, X_test , y_train , y_test = train_test_split(X,y,test_size = 0.2, random_state = 42 , stratify = y)

In [46]:
scaler = StandardScaler()

In [47]:
X_train_Scaled = scaler.fit_transform(X_train)
X_Test_Scaled = scaler.transform(X_test)

In [52]:
model = Sequential([
    Dense(32 , activation = 'relu' , input_dim=16),
    Dense(16 , activation = 'relu'),
    Dense(8 , activation = 'relu'),
    Dense(1 , activation = 'sigmoid')
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [53]:
model.compile(optimizer = 'adam' , loss = 'binary_crossentropy' , metrics = ['accuracy'])

In [55]:
model.fit(X_train_Scaled , y_train , epochs = 100)

Epoch 1/100
[1m7995/7995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 4ms/step - accuracy: 0.9139 - loss: 0.3218
Epoch 2/100
[1m7995/7995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 4ms/step - accuracy: 0.9137 - loss: 0.2942
Epoch 3/100
[1m7995/7995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 4ms/step - accuracy: 0.9130 - loss: 0.2957
Epoch 4/100
[1m7995/7995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 4ms/step - accuracy: 0.9141 - loss: 0.2931
Epoch 5/100
[1m7995/7995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 4ms/step - accuracy: 0.9136 - loss: 0.2942
Epoch 6/100
[1m7995/7995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 4ms/step - accuracy: 0.9152 - loss: 0.2904
Epoch 7/100
[1m7995/7995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 4ms/step - accuracy: 0.9150 - loss: 0.2910
Epoch 8/100
[1m7995/7995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 4ms/step - accuracy: 0.9140 - loss: 0.2933
Epoch 9/

<keras.src.callbacks.history.History at 0x29662a096f0>

In [59]:
loss, accuracy = model.evaluate(X_Test_Scaled, y_test)
print(f"Test Accuracy: {accuracy:.4f}")


[1m1999/1999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9143 - loss: 0.2926
Test Accuracy: 0.9144
