In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Read the csv file
data = pd.read_csv("heart.csv")

# Split data into a training set and testing set
train, test = train_test_split(data, test_size=0.15, random_state=42)

# Save the training and testing data to a new csv file
train.to_csv("heart_train.csv", index=False)
test.to_csv("heart_test.csv", index=False)

In [17]:
import tensorflow as tf
import functools

BATCH_SIZE = 128

# Preprocessing data and normalization of data was coded 
# based off of how to load pandas DataFrames into TensorFlow tutorial.
# https://www.tensorflow.org/tutorials/load_data/pandas_dataframe

def stack_dict(inputs, fun=tf.stack):
    values = []
    for key in sorted(inputs.keys()):
      values.append(tf.cast(inputs[key], tf.float32))

    return fun(values, axis=-1)

print("--Get data--")
# Path to csv files
TRAIN_DATA_PATH = "heart_train.csv"
TEST_DATA_PATH = "heart_test.csv"

raw_train_dataset = pd.read_csv(TRAIN_DATA_PATH)
raw_test_dataset = pd.read_csv(TEST_DATA_PATH)
train_label = raw_train_dataset.pop('chd')
test_label = raw_test_dataset.pop('chd')

print("--Process data--")
binary_feature_names = ["famhist"]
numeric_feature_names = ["sbp", "tobacco", "ldl", "adiposity", "typea", "obesity", "alcohol", "age"]
numeric_features = raw_train_dataset[numeric_feature_names]

inputs = {}
for name, column in raw_train_dataset.items():
  if type(column[0]) == str:
    dtype = tf.string
  elif (name in binary_feature_names):
    dtype = tf.int64
  else:
    dtype = tf.float32

  inputs[name] = tf.keras.Input(shape=(), name=name, dtype=dtype)

preprocessed = []

for name in binary_feature_names:
  vocab = sorted(set(raw_train_dataset[name]))
  print(f'name: {name}')
  print(f'vocab: {vocab}\n')

  if type(vocab[0]) is str:
    lookup = tf.keras.layers.StringLookup(vocabulary=vocab, output_mode='one_hot')
 
  x = inputs[name][:, tf.newaxis]
  x = lookup(x)
  preprocessed.append(x)


--Get data--
--Process data--
name: famhist
vocab: ['Absent', 'Present']



In [18]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(stack_dict(dict(numeric_features)))

In [19]:
numeric_inputs = {}
for name in numeric_feature_names:
  numeric_inputs[name]=inputs[name]

numeric_inputs = stack_dict(numeric_inputs)
numeric_normalized = normalizer(numeric_inputs)

preprocessed.append(numeric_normalized)

In [20]:
preprocesssed_result = tf.concat(preprocessed, axis=-1)
preprocessor = tf.keras.Model(inputs, preprocesssed_result)

In [21]:
print("--Make model--")
# Build the Keras model
# Model based off TensorFlow 2 tutorial on overfitting and underfitting
# https://www.tensorflow.org/tutorials/keras/overfit_and_underfit
body = tf.keras.Sequential([
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(0.01), activation='relu'),
  tf.keras.layers.Dropout(rate=0.2),
  tf.keras.layers.Dense(128, kernel_regularizer=tf.keras.regularizers.l2(0.01), activation='relu'),
  tf.keras.layers.Dense(1, activation = 'sigmoid')
])

x = preprocessor(inputs)
result = body(x)

model = tf.keras.Model(inputs, result)

model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

print("--Fit model--")
model.fit(dict(raw_train_dataset), train_label, epochs=15, batch_size=BATCH_SIZE, verbose=2)


--Make model--
--Fit model--
Epoch 1/15
4/4 - 1s - loss: 2.8877 - accuracy: 0.5026 - 810ms/epoch - 203ms/step
Epoch 2/15
4/4 - 0s - loss: 2.5987 - accuracy: 0.6913 - 32ms/epoch - 8ms/step
Epoch 3/15
4/4 - 0s - loss: 2.3730 - accuracy: 0.7194 - 32ms/epoch - 8ms/step
Epoch 4/15
4/4 - 0s - loss: 2.1873 - accuracy: 0.7041 - 31ms/epoch - 8ms/step
Epoch 5/15
4/4 - 0s - loss: 2.0191 - accuracy: 0.7219 - 30ms/epoch - 8ms/step
Epoch 6/15
4/4 - 0s - loss: 1.8616 - accuracy: 0.7449 - 32ms/epoch - 8ms/step
Epoch 7/15
4/4 - 0s - loss: 1.7183 - accuracy: 0.7347 - 33ms/epoch - 8ms/step
Epoch 8/15
4/4 - 0s - loss: 1.6005 - accuracy: 0.7372 - 30ms/epoch - 8ms/step
Epoch 9/15
4/4 - 0s - loss: 1.4860 - accuracy: 0.7321 - 28ms/epoch - 7ms/step
Epoch 10/15
4/4 - 0s - loss: 1.3841 - accuracy: 0.7296 - 32ms/epoch - 8ms/step
Epoch 11/15
4/4 - 0s - loss: 1.2897 - accuracy: 0.7423 - 33ms/epoch - 8ms/step
Epoch 12/15
4/4 - 0s - loss: 1.2190 - accuracy: 0.7321 - 32ms/epoch - 8ms/step
Epoch 13/15
4/4 - 0s - loss: 

<keras.callbacks.History at 0x7fd8e437e750>

In [22]:
print("--Evaluate model--")
model_loss1, model_acc1 = model.evaluate(dict(raw_train_dataset), train_label, verbose=2)
model_loss2, model_acc2 = model.evaluate(dict(raw_test_dataset), test_label, verbose=2)
print(f"Train / Test Accuracy: {model_acc1*100:.1f}% / {model_acc2*100:.1f}%")

--Evaluate model--
13/13 - 0s - loss: 0.9770 - accuracy: 0.7398 - 276ms/epoch - 21ms/step
3/3 - 0s - loss: 1.0151 - accuracy: 0.7714 - 32ms/epoch - 11ms/step
Train / Test Accuracy: 74.0% / 77.1%
