In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv('/content/heart.csv')
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             918 non-null    int64  
 1   Sex             918 non-null    object 
 2   ChestPainType   918 non-null    object 
 3   RestingBP       918 non-null    int64  
 4   Cholesterol     918 non-null    int64  
 5   FastingBS       918 non-null    int64  
 6   RestingECG      918 non-null    object 
 7   MaxHR           918 non-null    int64  
 8   ExerciseAngina  918 non-null    object 
 9   Oldpeak         918 non-null    float64
 10  ST_Slope        918 non-null    object 
 11  HeartDisease    918 non-null    int64  
dtypes: float64(1), int64(6), object(5)
memory usage: 86.2+ KB


In [4]:
df.columns

Index(['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope',
       'HeartDisease'],
      dtype='object')

In [5]:
df.dtypes

Unnamed: 0,0
Age,int64
Sex,object
ChestPainType,object
RestingBP,int64
Cholesterol,int64
FastingBS,int64
RestingECG,object
MaxHR,int64
ExerciseAngina,object
Oldpeak,float64


In [6]:
df.isnull().sum()

Unnamed: 0,0
Age,0
Sex,0
ChestPainType,0
RestingBP,0
Cholesterol,0
FastingBS,0
RestingECG,0
MaxHR,0
ExerciseAngina,0
Oldpeak,0


In [7]:
df.duplicated().sum()

np.int64(0)

In [8]:
df.shape

(918, 12)

In [9]:
numeric_cols = ['Age',  'RestingBP', 'Cholesterol', 'FastingBS',
        'MaxHR',  'Oldpeak', 'HeartDisease']

outlier_counts = {}

for col in numeric_cols:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    outliers = df[(df[col] < lower_bound) | (df[col] > upper_bound)]
    outlier_counts[col] = len(outliers)

for col, count in outlier_counts.items():
    print(f"{col}: {count} outliers")

Age: 0 outliers
RestingBP: 28 outliers
Cholesterol: 183 outliers
FastingBS: 214 outliers
MaxHR: 2 outliers
Oldpeak: 16 outliers
HeartDisease: 0 outliers


In [10]:

def remove_outliers_zscore(df, column, threshold=3):
    mean = df[column].mean()
    std = df[column].std()
    z_scores = (df[column] - mean) / std
    df_cleaned = df[(z_scores < threshold) & (z_scores > -threshold)].copy()
    return df_cleaned

for col in numeric_cols:
    df = remove_outliers_zscore(df, col)

df.shape

(899, 12)

In [11]:
df['HeartDisease'].unique()

array([0, 1])

In [12]:
df.dtypes

Unnamed: 0,0
Age,int64
Sex,object
ChestPainType,object
RestingBP,int64
Cholesterol,int64
FastingBS,int64
RestingECG,object
MaxHR,int64
ExerciseAngina,object
Oldpeak,float64


In [13]:
cols=['Sex','ChestPainType','RestingECG','ExerciseAngina','ST_Slope']
for col in cols:
  print(df[col].unique())


['M' 'F']
['ATA' 'NAP' 'ASY' 'TA']
['Normal' 'ST' 'LVH']
['N' 'Y']
['Up' 'Flat' 'Down']


In [14]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
cat_cols=['Sex','ChestPainType','RestingECG','ExerciseAngina','ST_Slope']
for col in cat_cols:
  df[col]=le.fit_transform(df[col])


In [15]:
df.dtypes

Unnamed: 0,0
Age,int64
Sex,int64
ChestPainType,int64
RestingBP,int64
Cholesterol,int64
FastingBS,int64
RestingECG,int64
MaxHR,int64
ExerciseAngina,int64
Oldpeak,float64


In [16]:
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,1,1,140,289,0,1,172,0,0.0,2,0
1,49,0,2,160,180,0,1,156,0,1.0,1,1
2,37,1,1,130,283,0,2,98,0,0.0,2,0
3,48,0,0,138,214,0,1,108,1,1.5,1,1
4,54,1,2,150,195,0,1,122,0,0.0,2,0


In [17]:
df.columns

Index(['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope',
       'HeartDisease'],
      dtype='object')

In [18]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
cols=['Age', 'RestingBP', 'Cholesterol', 'FastingBS', 'MaxHR', 'Oldpeak']
df[cols]=scaler.fit_transform(df[cols])


In [19]:
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,0.244898,1,1,0.571429,0.557915,0.0,1,0.784173,0,0.333333,2,0
1,0.428571,0,2,0.761905,0.34749,0.0,1,0.669065,0,0.5,1,1
2,0.183673,1,1,0.47619,0.546332,0.0,2,0.251799,0,0.333333,2,0
3,0.408163,0,0,0.552381,0.413127,0.0,1,0.323741,1,0.583333,1,1
4,0.530612,1,2,0.666667,0.376448,0.0,1,0.42446,0,0.333333,2,0


In [20]:
df.dtypes

Unnamed: 0,0
Age,float64
Sex,int64
ChestPainType,int64
RestingBP,float64
Cholesterol,float64
FastingBS,float64
RestingECG,int64
MaxHR,float64
ExerciseAngina,int64
Oldpeak,float64


In [21]:
X=df.drop('HeartDisease',axis=1)
y=df['HeartDisease']

In [22]:
X.shape

(899, 11)

In [23]:
y.shape

(899,)

In [24]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [25]:
X_train.shape

(719, 11)

In [26]:
X_test.shape

(180, 11)

In [27]:
y_train.shape

(719,)

In [28]:
y_test.shape

(180,)

In [29]:
y_train_reshaped = y_train.values.reshape(-1, 1)
y_test_reshaped = y_test.values.reshape(-1, 1)

In [30]:
y_train_reshaped.shape

(719, 1)

In [31]:
y_test_reshaped.shape

(180, 1)

In [32]:
from keras import Sequential
from keras.layers import Dense,Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint

model = Sequential()
model.add(Dense(units=35, input_dim=11, kernel_initializer='random_uniform', activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=50, kernel_initializer='he_uniform', activation='relu', name='H3'))
model.add(Dropout(0.2))
model.add(Dense(units=30,kernel_initializer='he_uniform',activation='relu',name='H5'))
model.add(Dropout(0.3))
model.add(Dense(units=20,kernel_initializer='he_uniform',activation='relu',name='H7'))
model.add(Dense(units=1, kernel_initializer='he_uniform',activation='sigmoid'))





  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [33]:
early_stop=EarlyStopping(monitor='val_loss',patience=5,restore_best_weights=True)

In [34]:
checkpoint=ModelCheckpoint(filepath='/content/Myfiles/heart_model_cp.keras',monitor='val_loss',verbose=1,save_best_only=True)

In [35]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['recall'])

In [36]:
hist=model.fit(X_train,y_train_reshaped,epochs=100,batch_size=40,verbose=1,validation_split=0.2,callbacks=[early_stop,checkpoint])

Epoch 1/100
[1m 9/15[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m0s[0m 6ms/step - loss: 0.6869 - recall: 0.6729
Epoch 1: val_loss improved from inf to 0.65953, saving model to /content/Myfiles/heart_model_cp.keras
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 75ms/step - loss: 0.6848 - recall: 0.7289 - val_loss: 0.6595 - val_recall: 0.9744
Epoch 2/100
[1m12/15[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 5ms/step - loss: 0.6593 - recall: 0.9191  
Epoch 2: val_loss improved from 0.65953 to 0.59743, saving model to /content/Myfiles/heart_model_cp.keras
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.6566 - recall: 0.9060 - val_loss: 0.5974 - val_recall: 0.8846
Epoch 3/100
[1m14/15[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 4ms/step - loss: 0.6102 - recall: 0.8800 
Epoch 3: val_loss improved from 0.59743 to 0.49325, saving model to /content/Myfiles/heart_model_cp.keras
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━

epoch=31
test=0.9231
train=0.9165
diff=0.07     

accuracy=
diff=0.06

In [37]:
model.summary()

In [38]:
loss,accuracy=model.evaluate(X_test,y_test_reshaped)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.3781 - recall: 0.9587 


In [39]:
loss

0.3554600775241852

In [40]:
accuracy

0.9680851101875305

In [41]:
model.predict(X_test)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step


array([[0.91157657],
       [0.9448026 ],
       [0.29440656],
       [0.91865224],
       [0.78655285],
       [0.8318453 ],
       [0.8638722 ],
       [0.95882493],
       [0.02658019],
       [0.91694665],
       [0.9356959 ],
       [0.07412817],
       [0.713557  ],
       [0.9529659 ],
       [0.9473894 ],
       [0.09551086],
       [0.85954   ],
       [0.18602471],
       [0.6267243 ],
       [0.9161516 ],
       [0.12878996],
       [0.9365729 ],
       [0.8307832 ],
       [0.37209752],
       [0.09242855],
       [0.92132324],
       [0.31502417],
       [0.06462649],
       [0.9191971 ],
       [0.71741515],
       [0.72012454],
       [0.18810494],
       [0.03850037],
       [0.82458985],
       [0.7830982 ],
       [0.81333613],
       [0.8400282 ],
       [0.07109594],
       [0.05388509],
       [0.7467787 ],
       [0.08253929],
       [0.9316878 ],
       [0.8010619 ],
       [0.6060523 ],
       [0.8710936 ],
       [0.666173  ],
       [0.05655957],
       [0.735