In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures, StandardScaler

%matplotlib inline
sns.set_context('notebook')
sns.set_style('white')

In [2]:
POLY_DEGREE = 2
BATCH_SIZE = 100

In [3]:
data = pd.read_csv('../data/heart.csv')
data['famhist'] = data['famhist'].map({'Present': 1, 'Absent': 0})
data.head()

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
0,160,12.0,5.73,23.11,1,49,25.3,97.2,52,1
1,144,0.01,4.41,28.61,0,55,28.87,2.06,63,1
2,118,0.08,3.48,32.28,1,52,29.14,3.81,46,0
3,170,7.5,6.41,38.03,1,51,31.99,24.26,58,1
4,134,13.6,3.5,27.78,1,60,25.99,57.34,49,1


In [4]:
np_data = np.array(data)

In [5]:
poly = PolynomialFeatures(POLY_DEGREE, interaction_only=False)
std = StandardScaler()

In [6]:
features = np_data[:,0:-1]
features = poly.fit_transform(features)
features = std.fit_transform(features)
labels = np_data[:,-1]

In [7]:
def convert_to_one_hot(vector, num_classes=None):
    """
    Converts an input 1-D vector of integers into an output
    2-D array of one-hot vectors, where an i'th input value
    of j will set a '1' in the i'th row, j'th column of the
    output array.

    Example:
        v = np.array((1, 0, 4))
        one_hot_v = convertToOneHot(v)
        print one_hot_v

        [[0 1 0 0 0]
         [1 0 0 0 0]
         [0 0 0 0 1]]
    """

    assert isinstance(vector, np.ndarray)
    assert len(vector) > 0

    if num_classes is None:
        num_classes = np.max(vector) + 1
    else:
        assert num_classes > 0
        assert num_classes >= np.max(vector)

    result = np.zeros(shape=(len(vector), int(num_classes)), dtype=np.int8)
    result[np.arange(len(vector)), vector.astype(int)] = 1
    return result

In [8]:
labels = convert_to_one_hot(labels)
labels[0], labels.shape

(array([0, 1], dtype=int8), (462, 2))

In [9]:
train_features, train_labels = features[:400], labels[:400]
test_features, test_labels = features[400:], labels[400:]

In [10]:
learning_rate = 0.001
n_epochs = 200

In [11]:
def get_next_batch(batch_size, mat):
    length = mat.shape[0]
    for i in range(int(np.ceil(length / batch_size))):
        yield mat[i * batch_size:(i + 1) * batch_size]

In [12]:
X = tf.placeholder(tf.float32, [None, features.shape[1]], name='feature')
Y = tf.placeholder(tf.float32, [None, 2], name='label')
X, Y

(<tf.Tensor 'feature:0' shape=(?, 55) dtype=float32>,
 <tf.Tensor 'label:0' shape=(?, 2) dtype=float32>)

In [13]:
w = tf.Variable(tf.random_normal(shape=[features.shape[1], 2], stddev=0.01), name='weights')
b = tf.Variable(tf.zeros([1, 2]), name='bias')

In [14]:
logits = tf.matmul(X, w) + b

In [15]:
entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y, name='entropy')
loss = tf.reduce_mean(entropy) # computes the mean over examples in the batch

In [16]:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

In [17]:
init = tf.global_variables_initializer()

In [18]:
with tf.Session() as sess:
    sess.run(init)
    n_batches = int(train_labels.shape[0] / BATCH_SIZE)
    for i in range(n_epochs): # train the model n_epochs times
        get_train_feature_batch = get_next_batch(BATCH_SIZE, train_features)
        get_train_label_batch = get_next_batch(BATCH_SIZE, train_labels)
        for _ in range(n_batches):
            X_batch = get_train_feature_batch.__next__()
            Y_batch = get_train_label_batch.__next__()
            sess.run([optimizer, loss], feed_dict={X: X_batch, Y: Y_batch})
            
    total_correct_preds = 0
    _, loss_batch, logits_batch = sess.run([optimizer, loss, logits],
    feed_dict={X: test_features, Y: test_labels})
    preds = tf.nn.softmax(logits_batch)
    correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(test_labels, 1))
    accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32)) # similar to numpy.count_nonzero(boolarray) :(
    acc = sess.run(accuracy)
    total_correct_preds += acc
    print("Accuracy {0}".format(total_correct_preds / test_labels.shape[0]))

Accuracy 0.8225806451612904
