## Import packages

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

%matplotlib inline

  from ._conv import register_converters as _register_converters


## Load and explore the dataset
There are 10 variables:

* sbp: Systolic blood pressure
* tobacco: Cumulative tobacco consumption, in kg
* ldl: Low-density lipoprotein cholesterol
* adiposity: Adipose tissue concentration
* famhist: Family history of heart disease (1=Present, 0=Absent)
* typea: Score on test designed to measure type-A behavior
* obesity: Obesity
* alcohol: Current consumption of alcohol
* age: Age of subject
* chd: Coronary heart disease at baseline; 1=Yes 0=No
    
Each following row contains the information of one patient. There are 462 samples in total.

In [3]:
data = pd.read_csv('heart.csv')

In [4]:
data.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
0,160,12.0,5.73,23.11,Present,49,25.3,97.2,52,1
1,144,0.01,4.41,28.61,Absent,55,28.87,2.06,63,1
2,118,0.08,3.48,32.28,Present,52,29.14,3.81,46,0
3,170,7.5,6.41,38.03,Present,51,31.99,24.26,58,1
4,134,13.6,3.5,27.78,Present,60,25.99,57.34,49,1


In [5]:
data.shape

(303, 10)

## Preprocessing the data
* Use pd.get_dummies to specify which columns you want encoded and get a dataframe with original columns replaced with one-hot encodings.
* add the encoded dataframe to the dataset and drop the duplicate
* Split the data into fetures and albels and drop the labels column from the features
* perform min-max scaling, a normalization strategy which linearly transforms x to y= (x-min)/(max-min). when x=min, then y=0, and when x=max, then y=1.
* Split the data into train and test sets


In [6]:
dummies = pd.get_dummies(data['famhist'],prefix='famhist', drop_first=False)

In [7]:
data = pd.concat([data,dummies], axis=1)

In [8]:
data.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd,famhist_Absent,famhist_Present
0,160,12.0,5.73,23.11,Present,49,25.3,97.2,52,1,0,1
1,144,0.01,4.41,28.61,Absent,55,28.87,2.06,63,1,1,0
2,118,0.08,3.48,32.28,Present,52,29.14,3.81,46,0,0,1
3,170,7.5,6.41,38.03,Present,51,31.99,24.26,58,1,0,1
4,134,13.6,3.5,27.78,Present,60,25.99,57.34,49,1,0,1


In [9]:
data = data.drop(['famhist'], axis=1)

In [10]:
data.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,typea,obesity,alcohol,age,chd,famhist_Absent,famhist_Present
0,160,12.0,5.73,23.11,49,25.3,97.2,52,1,0,1
1,144,0.01,4.41,28.61,55,28.87,2.06,63,1,1,0
2,118,0.08,3.48,32.28,52,29.14,3.81,46,0,0,1
3,170,7.5,6.41,38.03,51,31.99,24.26,58,1,0,1
4,134,13.6,3.5,27.78,60,25.99,57.34,49,1,0,1


In [11]:
inputs=['sbp','tobacco','ldl','adiposity','typea','obesity','alcohol','age']

In [12]:
labels = data['chd']

In [13]:
for each in inputs:
    data[each] = ( data[each] - data[each].min() ) / data[each].max() 

In [38]:
print(data.head())

        sbp   tobacco       ldl  adiposity     typea   obesity   alcohol  \
0  0.283654  0.384615  0.309850   0.323370  0.461538  0.162087  0.660371   
1  0.206731  0.000321  0.223744   0.452812  0.538462  0.238729  0.013996   
2  0.081731  0.002564  0.163079   0.539186  0.500000  0.244526  0.025885   
3  0.331731  0.240385  0.354207   0.674512  0.487179  0.305711  0.164821   
4  0.158654  0.435897  0.164384   0.433278  0.602564  0.176900  0.389565   

        age  chd  famhist_Absent  famhist_Present  
0  0.578125    1               0                1  
1  0.750000    1               1                0  
2  0.484375    0               0                1  
3  0.671875    1               0                1  
4  0.531250    1               0                1  


In [39]:
print(labels.shape)

(303,)


In [40]:
features = data.drop(['chd'], axis=1)

In [41]:
features.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,typea,obesity,alcohol,age,famhist_Absent,famhist_Present
0,0.283654,0.384615,0.30985,0.32337,0.461538,0.162087,0.660371,0.578125,0,1
1,0.206731,0.000321,0.223744,0.452812,0.538462,0.238729,0.013996,0.75,1,0
2,0.081731,0.002564,0.163079,0.539186,0.5,0.244526,0.025885,0.484375,0,1
3,0.331731,0.240385,0.354207,0.674512,0.487179,0.305711,0.164821,0.671875,0,1
4,0.158654,0.435897,0.164384,0.433278,0.602564,0.1769,0.389565,0.53125,0,1


In [42]:
features, labels = np.array(features), np.array(labels)

In [43]:
print(len(features), len(labels))

303 303


In [44]:
train_X, test_X, train_Y, test_Y = train_test_split(features, labels, test_size=0.2)

In [45]:
print (train_X.shape, train_Y.shape)

(242, 10) (242,)


In [21]:
print (test_X.shape, test_Y.shape)

(61, 10) (61,)


## Define parameters
* Number of labels
* Number of features
* Learning rate
* Number of epochs


In [46]:
n_labels= 2

In [47]:
n_features = 10

In [48]:
earning_rate = 0.1

In [49]:
n_epochs= 200

In [50]:
n_hidden1 = 5

In [51]:
tf.reset_default_graph()

## Build a classifer model
* Define placeholders for labels and inputs
* Define operations for weights and bias
* Define softmax cross entropy as the loss funciton, minimize it for the cost fucntion and set gradient descent as the optimizer.
* Define a variable initializer

In [52]:
inputs = tf.placeholder(tf.float32,[None, 10], name ='inputs' )

In [53]:
labels = tf.placeholder(tf.int32, [None,], name='output')

In [54]:
labels_one_hot = tf.one_hot(labels, 2)

In [55]:
weights = {
    'hidden_layer': tf.Variable(tf.truncated_normal([n_features,n_hidden1], stddev=0.1)),
    'output':tf.Variable(tf.truncated_normal([n_hidden1, n_labels], stddev=0.1))
}

In [56]:
bias = {
    'hidden_layer':tf.Variable(tf.zeros([n_hidden1])),
    'output':tf.Variable(tf.zeros(n_labels))
}

In [57]:
hidden_layer = tf.nn.bias_add(tf.matmul(inputs,weights['hidden_layer']), bias['hidden_layer'])

In [58]:
hidden_layer = tf.nn.relu(hidden_layer)

In [59]:
logits = tf.nn.bias_add(tf.matmul(hidden_layer, weights['output']), bias['output'])

In [60]:
entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels_one_hot)

In [61]:
cost = tf.reduce_mean(entropy)

In [62]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)

In [63]:
init = tf.global_variables_initializer()

## Train and evaluate the model
* Start the session
* Initialize the variables
* Print the loss after every epoch
* Test the model for correct predictions and calculate the accuracy 

In [64]:
with tf.Session() as sess:

    sess.run(init)

    for epoch in range(n_epochs):

        _, loss = sess.run([optimizer, cost], feed_dict={inputs:train_X, labels:train_Y})

        print("Epoch: {0} ; training loss: {1}".format(epoch, loss))

    print('training finished')

    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels_one_hot, 1))

    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy:", accuracy.eval({inputs: test_X, labels: test_Y}))

Epoch: 0 ; training loss: 0.6929178237915039
Epoch: 1 ; training loss: 0.6892012357711792
Epoch: 2 ; training loss: 0.6858416199684143
Epoch: 3 ; training loss: 0.6827988028526306
Epoch: 4 ; training loss: 0.680014967918396
Epoch: 5 ; training loss: 0.6774773001670837
Epoch: 6 ; training loss: 0.6751903891563416
Epoch: 7 ; training loss: 0.6731235980987549
Epoch: 8 ; training loss: 0.671241819858551
Epoch: 9 ; training loss: 0.6695281267166138
Epoch: 10 ; training loss: 0.6679601073265076
Epoch: 11 ; training loss: 0.6665526628494263
Epoch: 12 ; training loss: 0.665275514125824
Epoch: 13 ; training loss: 0.6641184091567993
Epoch: 14 ; training loss: 0.6630815863609314
Epoch: 15 ; training loss: 0.6621429920196533
Epoch: 16 ; training loss: 0.6612824201583862
Epoch: 17 ; training loss: 0.6604915857315063
Epoch: 18 ; training loss: 0.659765362739563
Epoch: 19 ; training loss: 0.6590947508811951
Epoch: 20 ; training loss: 0.658475399017334
Epoch: 21 ; training loss: 0.6578998565673828
Epo