In [2]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, LabelEncoder
import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.metrics import accuracy_score

In [3]:
# data from https://www.kaggle.com/datasets/asgharalikhan/mortality-rate-heart-patient-pakistan-hospital/data
pakistan_data = pd.read_csv('Resources/FIC.Full CSV.csv')
pakistan_data.head()

Unnamed: 0,Age,Age.Group,Gender,Locality,Marital status,Life.Style,Sleep,Category,Depression,Hyperlipi,...,oldpeak,slope,ca,thal,num,SK,SK.React,Reaction,Mortality,Follow.Up
0,45,41-50,Female,RURAL,MARRIED,NO,NO,FREE,YES,YES,...,3.0,2,0,7,2,1,NO,0,0,60
1,51,51-60,Female,URBAN,MARRIED,NO,NO,FREE,YES,YES,...,1.2,2,0,7,2,1,NO,0,0,15
2,55,51-60,Female,RURAL,MARRIED,YES,YES,FREE,YES,YES,...,3.4,2,0,3,2,1,NO,0,0,6
3,55,51-60,Female,RURAL,MARRIED,YES,YES,FREE,YES,YES,...,2.0,2,1,7,3,1,NO,0,0,52
4,56,51-60,Female,RURAL,MARRIED,YES,NO,FREE,YES,YES,...,4.0,3,2,7,3,1,NO,0,0,34


In [4]:
pakistan_data.dtypes

Age                                                                                           int64
Age.Group                                                                                    object
Gender                                                                                       object
Locality                                                                                     object
Marital status                                                                               object
Life.Style                                                                                   object
Sleep                                                                                        object
Category                                                                                     object
Depression                                                                                   object
Hyperlipi                                                                                    object


In [5]:
pakistan_data.columns

Index(['Age', 'Age.Group', 'Gender', 'Locality  ',
       'Marital status                       ',
       'Life.Style                                                                              ',
       'Sleep', 'Category', 'Depression', 'Hyperlipi', 'Smoking',
       'Family.History', 'F.History', 'Diabetes', 'HTN', 'Allergies', 'BP',
       'Thrombolysis', 'BGR', 'B.Urea', 'S.Cr', 'S.Sodium', 'S.Potassium',
       'S.Chloride', 'C.P.K', 'CK.MB', 'ESR', 'WBC', 'RBC', 'Hemoglobin',
       'P.C.V', 'M.C.V', 'M.C.H', 'M.C.H.C', 'PLATELET_COUNT', 'NEUTROPHIL',
       'LYMPHO', 'MONOCYTE', 'EOSINO', 'Others ', 'CO', 'Diagnosis',
       'Hypersensitivity', 'cp', 'trestbps', 'chol', 'fbs', 'restecg',
       'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num', 'SK',
       'SK.React', 'Reaction', 'Mortality', 'Follow.Up'],
      dtype='object')

In [6]:
pakistan_data['Category'].unique()

array(['FREE', 'PAID'], dtype=object)

In [7]:
Binary_Data = ['Gender', 'Locality  ', 'Marital status                       ','Life.Style                                                                              ',
                'Sleep', 'Depression', 'Hyperlipi','Smoking', 'HTN', 'Allergies', 'Hypersensitivity' ]
Ordinal_Data = ['Age.Group']
Nominal_Data = ['Others ', 'CO', 'Diagnosis', 'SK.React']

In [8]:
y = pakistan_data['Mortality']

In [9]:
# Scale numerical columns
# List of numerical columns 
numerical_columns = pakistan_data.select_dtypes(include=[np.number]).columns.tolist()

# Apply scaling
scaler = StandardScaler()
pakistan_data[numerical_columns] = scaler.fit_transform(pakistan_data[numerical_columns])

In [10]:
#Create Ordinal Encoders:
age_group_encoder = OrdinalEncoder(categories=[['21-30', '31-40', '41-50', '51-60', '61-70']])
# Apply Ordinal Encoding to Ordinal Data
pakistan_data['Age.Group'] = age_group_encoder.fit_transform(pakistan_data[['Age.Group']])

In [11]:
# Label encode Binary_Data:
label_encoder = LabelEncoder()

for col in Binary_Data:
    pakistan_data[col] = label_encoder.fit_transform(pakistan_data[col])

In [12]:
#One-hot encode the Nominal_data:
pakistan_data = pd.get_dummies(pakistan_data, columns= Nominal_Data, drop_first=True)

In [23]:
pakistan_data = pakistan_data.drop(columns=['Category', 'Family.History'])

In [24]:
pd.set_option('display.max_rows', None) 
pd.set_option('display.max_columns', None)

In [25]:
print(pakistan_data.dtypes)

Age                                                                                         float64
Age.Group                                                                                   float64
Gender                                                                                        int64
Locality                                                                                      int64
Marital status                                                                                int64
Life.Style                                                                                    int64
Sleep                                                                                         int64
Depression                                                                                    int64
Hyperlipi                                                                                     int64
Smoking                                                                                       int64


In [26]:
X = pakistan_data.drop(columns=['Mortality'])
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [27]:
import tensorflow as tf
nn_model = tf.keras.models.Sequential()

# Set the input nodes to the number of features
input_nodes = len(X.columns)

nn_model.add(tf.keras.layers.Dense(units=10, activation="relu", input_dim=input_nodes))

nn_model.add(tf.keras.layers.Dense(units=10, activation="relu"))
nn_model.add(tf.keras.layers.Dense(units=10, activation="relu"))

nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the Sequential model
nn_model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [28]:
set(y)

{0, 1}

In [30]:
# Compile the model and train over more than ### epochs
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

fit_model = nn_model.fit(X_train, y_train, epochs=50)

Epoch 1/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.7787 - loss: 0.6208
Epoch 2/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.7623 - loss: 0.5893
Epoch 3/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.7695 - loss: 0.5559
Epoch 4/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.7895 - loss: 0.5215
Epoch 5/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.7759 - loss: 0.5055
Epoch 6/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.8167 - loss: 0.4469
Epoch 7/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.7750 - loss: 0.4443
Epoch 8/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.8043 - loss: 0.4028
Epoch 9/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [31]:
y_test.dtype

dtype('int64')

In [32]:
loss, accuracy = nn_model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - accuracy: 0.9509 - loss: 0.1638 
Test Accuracy: 0.9565
