In [15]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import OrdinalEncoder


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



In [3]:
#Creating a dataframe from the data
heart_data = pd.read_csv('../input/heart-failure-prediction/heart.csv')

In [4]:
#Looking at the different types of data stored in each column to determine if there are any categoricals that will need to be encoded
heart_data.dtypes

In [5]:
#A quick view of the data
heart_data.head()

In [6]:
heart_data.hist(figsize = (10,15))

In [19]:
discrette_feature = [i for i in heart_data.columns if heart_data[i].nunique() < 10]
continuous_feature = [i for i in heart_data.columns if heart_data[i].nunique() > 10]
print(f'discrette feature: {discrette_feature}')
print(f'continuous feature: {continuous_feature}')

In [22]:
plt.style.use('fivethirtyeight')
i = 1
plt.figure(figsize = (18,16)) 
for feature in continuous_feature:
    plt.subplot(3, 2, i)
    sns.histplot(x=heart_data[feature],kde=True,bins = 50, hue = heart_data.HeartDisease)
    plt.xlabel(feature,size = 1 2)
    plt.ylabel("Density",size = 12)
    i += 1
    
plt.show()

In [7]:
#Setting the target for prediction and features that will be used to train the network
y = heart_data['HeartDisease']
X = heart_data.drop(['HeartDisease'], axis=1)

train_X, test_X, train_y, test_y = train_test_split(X, y, train_size=0.8, test_size=0.2)

In [8]:
#Using ordinal encoder to convert categoricals to numerical values for network
s = (heart_data.dtypes == 'object')
object_cols = list(s[s].index)

label_train_X = train_X.copy()
label_test_X = test_X.copy()

ordinal_encoder = OrdinalEncoder()
label_train_X[object_cols] = ordinal_encoder.fit_transform(train_X[object_cols])
label_test_X[object_cols] = ordinal_encoder.transform(label_test_X[object_cols])

In [9]:
#Checking that encoding worked 
label_train_X.info()

In [10]:
#Scaling the data for use in the network
scaler = StandardScaler()

label_train_X = scaler.fit_transform(label_train_X)
label_test_X = scaler.transform(label_test_X)

In [11]:
#Building the structure of the model
model = keras.Sequential([
    layers.Dense(32, activation = 'relu', input_shape = [11]),
    layers.Dropout(.3),
    layers.Dense(32, activation = 'relu'),
    layers.Dropout(.3),
    layers.Dense(32, activation = 'relu'),
    layers.Dropout(.3),
    layers.Dense(1, activation = 'sigmoid'),
])

In [12]:
#Adding methods for optimizing and evaluating the model
model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['binary_accuracy']
)

In [13]:
#Adding a callback that will stop the model at best values.
early_stopping = keras.callbacks.EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True,
)
#Training the model
history = model.fit(
    label_train_X, train_y,
    validation_data=(label_test_X, test_y),
    batch_size=512,
    epochs=200,
    callbacks=[early_stopping],
)
#Plotting the learning curves
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot(title="Cross-entropy")
history_df.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot(title="Accuracy")