In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import csv
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import IntegerLookup
from tensorflow.keras.layers import Normalization
from tensorflow.keras.layers import StringLookup

In [11]:
file = open('healthcare-dataset-stroke-data.csv')

In [12]:
dataframe = pd.read_csv(file)

In [13]:
val_df = dataframe.sample(frac=0.2, random_state=1337)
train_df = dataframe.drop(val_df.index)

print(
"Using %d for train, %d for val"
%(len(train_df), len(val_df)))

Using 4088 for train, 1022 for val


In [54]:
def df_to_ds(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("stroke")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds

train_ds = df_to_ds(train_df)
val_ds = df_to_ds(val_df)

In [55]:
train_ds = train_ds.batch(32)
val_ds = val_ds.batch(32)

In [56]:
def encode_num_feature(feature, name, dataset):
    normal = Normalization()
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))
    
    normal.adapt(feature_ds)
    
    encoded_feature = normal(feature)
    return encoded_feature

In [57]:
def encode_cat_feature(feature, name, dataset, is_string):
    lookup_class = StringLookup if is_string else IntegerLookup
    lookup = lookup_class(output_mode="binary")
    
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))
    
    lookup.adapt(feature_ds)
    
    encoded_feature = lookup(feature)
    return encoded_feature

In [58]:
# Categorical features encoded as strings
gender = keras.Input(shape=(1,), name="gender", dtype="string")
ever_married = keras.Input(shape=(1,), name="ever_married", dtype="string")
res = keras.Input(shape=(1,), name="Residence_type", dtype="string")
work_type = keras.Input(shape=(1,), name="work_type", dtype="string")
smoke = keras.Input(shape=(1,), name="smoking_status", dtype="string")

In [59]:
# Categorical features encoded as integers
heart_disease = keras.Input(shape=(1,), name="heart_disease", dtype="int64")
hypertension = keras.Input(shape=(1,), name="hypertension", dtype="int64")

In [60]:
age = keras.Input(shape=(1,), name="age")
avg_glucose_level = keras.Input(shape=(1,), name="avg_glucose_level")
bmi = keras.Input(shape=(1,), name="bmi")
id_num = keras.Input(shape=(1,), name="id")

In [61]:
all_inputs = [
    id_num,
    gender,
    age,
    hypertension,
    heart_disease,
    ever_married,
    work_type,
    res,
    avg_glucose_level,
    bmi,
    smoke,
    ]

In [62]:
gender_encoded = encode_cat_feature(gender, "gender", train_ds, True)
ever_married_encoded = encode_cat_feature(ever_married, "ever_married", train_ds, True)
res_encoded = encode_cat_feature(res, "Residence_type", train_ds, True)
work_type_encoded = encode_cat_feature(work_type, "work_type", train_ds, True)
smoke_encoded = encode_cat_feature(smoke, "smoking_status", train_ds, True)

In [63]:
heart_encoded = encode_cat_feature(heart_disease, "heart_disease", train_ds, False)
hypertension_encoded = encode_cat_feature(hypertension, "hypertension", train_ds, False)

In [64]:
age_encoded = encode_num_feature(age, "age", train_ds)
avg_glucose_encoded = encode_num_feature(avg_glucose_level, "avg_glucose_level", train_ds)
bmi_encoded = encode_num_feature(bmi, "bmi", train_ds)
id_encoded = encode_num_feature(id_num, "id", train_ds)

In [65]:
all_features = layers.concatenate(
[
    id_encoded,
    gender_encoded,
    age_encoded,
    hypertension_encoded,
    heart_encoded,
    ever_married_encoded,
    work_type_encoded,
    res_encoded,
    avg_glucose_encoded,
    bmi_encoded,
    smoke_encoded,
]
)
x = layers.Dense(32, activation="relu")(all_features)
x = layers.Dropout(0.5)(x)
output = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(all_inputs, output)
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])

In [66]:
model.fit(train_ds, epochs=50, validation_data=val_ds)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fed34391040>

ValueError: in user code:

    File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/keras/engine/training.py", line 1621, in predict_function  *
        return step_function(self, iterator)
    File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/keras/engine/training.py", line 1611, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/keras/engine/training.py", line 1604, in run_step  **
        outputs = model.predict_step(data)
    File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/keras/engine/training.py", line 1572, in predict_step
        return self(x, training=False)
    File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/keras/engine/input_spec.py", line 182, in assert_input_compatibility
        raise ValueError(f'Missing data for input "{name}". '

    ValueError: Missing data for input "Residence_type". You passed a data dictionary with keys ['gender', 'age', 'hypertension', 'heart_disease', 'ever_married', 'work_type', 'res', 'avg_glucose_level', 'bmi', 'smoking_status', 'id']. Expected the following keys: ['gender', 'ever_married', 'Residence_type', 'work_type', 'smoking_status', 'heart_disease', 'hypertension', 'age', 'avg_glucose_level', 'bmi', 'id']
