# Import libraries

In [1]:
import pandas as pd
import tensorflow as tf

# Read the input data

In [2]:
df = pd.read_csv('heart.csv')

# Take a look at the data

In [3]:
print(df.head())

   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  


# Normalize the numerical inputs

In [4]:
def normalize_column(df, column):
    max_value = df[column].max()
    min_value = df[column].min()
    df[column] = (df[column] - min_value)/(max_value - min_value)
    return df

In [5]:
df = normalize_column(df, "age")
df = normalize_column(df, "trestbps")
df = normalize_column(df, "chol")
df = normalize_column(df, "thalach")
df = normalize_column(df, "oldpeak")
df = normalize_column(df, "ca")

# Convert categorical input to one hot encoding

In [6]:
def make_one_hot_encoding(df, column):
    values = df.pop(column)
    unique_values = values.unique()
    unique_values = sorted(unique_values)
    for unique_value in unique_values:
        df[column + str(unique_value)] = (values == unique_value)*1.0
    return df

In [7]:
df = make_one_hot_encoding(df, "cp")
df = make_one_hot_encoding(df, "thal")

# Take a look at the final transformed data

In [8]:
print(df.head())

        age  sex  trestbps      chol  fbs  restecg   thalach  exang   oldpeak  \
0  0.708333    1  0.481132  0.244292    1        0  0.603053      0  0.370968   
1  0.166667    1  0.339623  0.283105    0        1  0.885496      0  0.564516   
2  0.250000    0  0.339623  0.178082    0        0  0.770992      0  0.225806   
3  0.562500    1  0.245283  0.251142    0        1  0.816794      0  0.129032   
4  0.583333    0  0.245283  0.520548    0        1  0.702290      1  0.096774   

   slope   ca  target  cp0  cp1  cp2  cp3  thal0  thal1  thal2  thal3  
0      0  0.0       1  0.0  0.0  0.0  1.0    0.0    1.0    0.0    0.0  
1      0  0.0       1  0.0  0.0  1.0  0.0    0.0    0.0    1.0    0.0  
2      2  0.0       1  0.0  1.0  0.0  0.0    0.0    0.0    1.0    0.0  
3      2  0.0       1  0.0  1.0  0.0  0.0    0.0    0.0    1.0    0.0  
4      2  0.0       1  1.0  0.0  0.0  0.0    0.0    0.0    1.0    0.0  


# Shuffle the data

In [9]:
df = df.sample(frac=1)

# Split the dataset into training set and test set

In [10]:
x_train = df.iloc[:273, :-1].to_numpy()
y_train = df.iloc[:273, -1:].to_numpy()

x_test = df.iloc[273:, :-1].to_numpy()
y_test = df.iloc[273:, -1:].to_numpy()

In [11]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(273, 19)
(273, 1)
(30, 19)
(30, 1)


# Build the model

In [12]:
model = tf.keras.Sequential()

model.add(tf.keras.layers.Input(shape=[19]))
model.add(tf.keras.layers.Dense(units=64, activation='relu'))
model.add(tf.keras.layers.Dense(units=64, activation='relu'))
model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                1280      
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
Total params: 5,505
Trainable params: 5,505
Non-trainable params: 0
_________________________________________________________________


# Choose Loss function and Optimizer

In [13]:
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

# Start the training process

In [14]:
model.fit(x_train, y_train, epochs=10)

W0728 11:34:56.428033 140367398168384 deprecation.py:323] From /home/rishi/Programs/ML/mlvenv/lib/python3.7/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 273 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fa98c2d3908>

# Evaluate with Test set

In [15]:
test_loss, test_acc = model.evaluate(x_test, y_test)

print(test_acc)

1.0
