In [37]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#https://www.kaggle.com/datasets/oktayrdeki/heart-disease
df = pd.read_csv('Data/heart_disease.csv')


In [38]:
print(f"Dataset Shape {df.shape}")
print(f"Total Records: {df.shape[0]:,}")
print(f"Total Features: {df.shape[1]}")
print(f"Dataset Info: {df.info()}")
print(f"First five rows: {df.head()}")
print(f"Missing values: ${df.isnull().sum()}")
print(f"Basic Stats: {df.describe()}")



Dataset Shape (10000, 21)
Total Records: 10,000
Total Features: 21
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Age                   9971 non-null   float64
 1   Gender                9981 non-null   object 
 2   Blood Pressure        9981 non-null   float64
 3   Cholesterol Level     9970 non-null   float64
 4   Exercise Habits       9975 non-null   object 
 5   Smoking               9975 non-null   object 
 6   Family Heart Disease  9979 non-null   object 
 7   Diabetes              9970 non-null   object 
 8   BMI                   9978 non-null   float64
 9   High Blood Pressure   9974 non-null   object 
 10  Low HDL Cholesterol   9975 non-null   object 
 11  High LDL Cholesterol  9974 non-null   object 
 12  Alcohol Consumption   7414 non-null   object 
 13  Stress Level          9978 non-null   object 
 14  Slee

In [39]:
#You can use obj_df = df.select_dtypes(include=['object']).copy()
# Guide to refrence for encoding valueshttps://pbpython.com/categorical-encoding.html 

print(df.columns)
categroical_values = []
for colum in df.columns:
    if not(pd.api.types.is_numeric_dtype(df[colum])):
        categroical_values.append(colum)
        
print(categroical_values)


Index(['Age', 'Gender', 'Blood Pressure', 'Cholesterol Level',
       'Exercise Habits', 'Smoking', 'Family Heart Disease', 'Diabetes', 'BMI',
       'High Blood Pressure', 'Low HDL Cholesterol', 'High LDL Cholesterol',
       'Alcohol Consumption', 'Stress Level', 'Sleep Hours',
       'Sugar Consumption', 'Triglyceride Level', 'Fasting Blood Sugar',
       'CRP Level', 'Homocysteine Level', 'Heart Disease Status'],
      dtype='object')
['Gender', 'Exercise Habits', 'Smoking', 'Family Heart Disease', 'Diabetes', 'High Blood Pressure', 'Low HDL Cholesterol', 'High LDL Cholesterol', 'Alcohol Consumption', 'Stress Level', 'Sugar Consumption', 'Heart Disease Status']


In [40]:
for feature in categroical_values:
    print(f"\n{feature.upper()} Distribution:")
    distribution = df[feature].value_counts()
    percentage = df[feature].value_counts(normalize=True) * 100
    for value, count in distribution.items():
        print(f"  {value}: {count:,} ({percentage[value]:.1f}%)")



GENDER Distribution:
  Male: 5,003 (50.1%)
  Female: 4,978 (49.9%)

EXERCISE HABITS Distribution:
  High: 3,372 (33.8%)
  Medium: 3,332 (33.4%)
  Low: 3,271 (32.8%)

SMOKING Distribution:
  Yes: 5,123 (51.4%)
  No: 4,852 (48.6%)

FAMILY HEART DISEASE Distribution:
  No: 5,004 (50.1%)
  Yes: 4,975 (49.9%)

DIABETES Distribution:
  No: 5,018 (50.3%)
  Yes: 4,952 (49.7%)

HIGH BLOOD PRESSURE Distribution:
  Yes: 5,022 (50.4%)
  No: 4,952 (49.6%)

LOW HDL CHOLESTEROL Distribution:
  Yes: 5,000 (50.1%)
  No: 4,975 (49.9%)

HIGH LDL CHOLESTEROL Distribution:
  No: 5,036 (50.5%)
  Yes: 4,938 (49.5%)

ALCOHOL CONSUMPTION Distribution:
  Medium: 2,500 (33.7%)
  Low: 2,488 (33.6%)
  High: 2,426 (32.7%)

STRESS LEVEL Distribution:
  Medium: 3,387 (33.9%)
  Low: 3,320 (33.3%)
  High: 3,271 (32.8%)

SUGAR CONSUMPTION Distribution:
  Low: 3,390 (34.0%)
  High: 3,330 (33.4%)
  Medium: 3,250 (32.6%)

HEART DISEASE STATUS Distribution:
  No: 8,000 (80.0%)
  Yes: 2,000 (20.0%)


In [41]:
encoded_df = df.copy()
encoded_df=encoded_df.dropna()

mapping = {'Low': 0, 'Medium': 1, 'High': 2}

encoded_df["Sugar Consumption"] = encoded_df['Sugar Consumption'].map(mapping)
encoded_df["Exercise Habits"] = encoded_df['Exercise Habits'].map(mapping)
encoded_df["Stress Level"] = encoded_df["Stress Level"].map(mapping)
encoded_df["Alcohol Consumption"] = encoded_df["Alcohol Consumption"].map(mapping)


encoded_df["Gender"] = (df['Gender']=="Male").astype(int)
encoded_df["Smoking"] = (df['Smoking'] == 'Yes').astype(int)
encoded_df["Family Heart Disease"] = (df["Family Heart Disease"] == 'Yes').astype(int)
encoded_df["Diabetes"] = (df["Diabetes"]=="Yes").astype(int)
encoded_df["High Blood Pressure"] = (df["High Blood Pressure"]=="Yes").astype(int)
encoded_df["Low HDL Cholesterol"] = (df["Low HDL Cholesterol"]=="Yes").astype(int)
encoded_df["Heart Disease Status"] = (df["Heart Disease Status"]=="Yes").astype(int)
encoded_df["High LDL Cholesterol"] = (df["High LDL Cholesterol"]=="Yes").astype(int)

print(encoded_df)


       Age  Gender  Blood Pressure  Cholesterol Level  Exercise Habits  \
1     69.0       0           146.0              286.0                2   
2     46.0       1           126.0              216.0                0   
3     32.0       0           122.0              293.0                2   
4     60.0       1           166.0              242.0                0   
5     25.0       1           152.0              257.0                0   
...    ...     ...             ...                ...              ...   
9992  68.0       0           169.0              291.0                1   
9994  73.0       0           144.0              191.0                1   
9995  25.0       0           136.0              243.0                1   
9998  23.0       1           142.0              299.0                0   
9999  38.0       0           128.0              193.0                1   

      Smoking  Family Heart Disease  Diabetes        BMI  High Blood Pressure  \
1           0                 

In [50]:
#Normalize features
encoded_df.describe()

scaler = StandardScaler()
features = ['Age', 'Gender', 'Blood Pressure', 'Cholesterol Level',
       'Exercise Habits', 'Smoking', 'Family Heart Disease', 'Diabetes', 'BMI',
       'High Blood Pressure', 'Low HDL Cholesterol', 'High LDL Cholesterol',
       'Alcohol Consumption', 'Stress Level', 'Sleep Hours',
       'Sugar Consumption', 'Triglyceride Level', 'Fasting Blood Sugar',
       'CRP Level', 'Homocysteine Level']
X=encoded_df[features]
y=encoded_df["Heart Disease Status"].copy()

X_standardized = scaler.fit_transform(X)
X_standardized = pd.DataFrame(X_standardized,columns=X.columns)
print(X_standardized)

           Age    Gender  Blood Pressure  Cholesterol Level  Exercise Habits  \
0     1.078260 -1.008669       -0.219036           1.407813         1.205930   
1    -0.180721  0.991405       -1.355089          -0.200406        -1.238794   
2    -0.947057 -1.008669       -1.582299           1.568635         1.205930   
3     0.585616  0.991405        0.917016           0.396933        -1.238794   
4    -1.330226  0.991405        0.121780           0.741551        -1.238794   
...        ...       ...             ...                ...              ...   
7062  1.023522 -1.008669        1.087424           1.522686        -0.016432   
7063  1.297214 -1.008669       -0.332641          -0.774769        -0.016432   
7064 -1.330226 -1.008669       -0.787062           0.419907        -0.016432   
7065 -1.439702  0.991405       -0.446247           1.706482        -1.238794   
7066 -0.618627 -1.008669       -1.241484          -0.728820        -0.016432   

       Smoking  Family Heart Disease  D

In [51]:
X_train, X_test, y_train, y_test = train_test_split(X_standardized,y,test_size=0.20,random_state=42)


In [57]:
#define Neural Network
model = Sequential([
    tf.keras.Input(shape=(20,)),
    Dense(40,activation="relu",name="layer_1"),
    Dense(20,activation="relu",name="layer_2"),
    Dense(10,activation="relu",name="layer_3"),
    Dense(1,activation="sigmoid",name="layer_4"),

])

model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(0.001),

)
model.fit(X_train,y_train, epochs=32)

Epoch 1/32
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.6038
Epoch 2/32
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4913
Epoch 3/32
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.5149
Epoch 4/32
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4984
Epoch 5/32
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4789
Epoch 6/32
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4885
Epoch 7/32
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4733
Epoch 8/32
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4751
Epoch 9/32
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4778
Epoch 10/32
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - lo

<keras.src.callbacks.history.History at 0x2552d218750>

In [None]:
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert to binary



[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1]
0
