In [31]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import matplotlib.pylab as plt

In [32]:
df = pd.read_excel('data/titanic3.xls')

In [33]:
y = df['survived']
X = df.drop(columns='survived')

In [34]:
X = X.drop(columns=['name', 'ticket', 'cabin', 'body',
                    'home.dest'])

In [35]:
X = pd.get_dummies(X, columns=['pclass', 'sex', 
                               'embarked', 'boat'], dummy_na=True)


In [7]:
y = 1*(y == 1) + -1*(y == 0)

In [36]:
num_feat = ['age', 'sibsp', 'parch', 'fare']
for feat in num_feat:
    X[feat] = X[feat].fillna(X[feat].median())


In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=42,
                                                    stratify=y)

In [64]:
def one_hot_target(n_classes, target):
    if n_classes > 1:
        target = pd.get_dummies(target)
    return target    

In [65]:
one_hot_y = one_hot_target(1, y)

In [53]:
tf.reset_default_graph()

n_feats = X_train.shape[1]

w = tf.Variable(tf.random_normal(shape=[n_feats, 1]), name='weights')
b = tf.Variable(tf.random_normal(shape=[1, 1]), name='bias')

X = tf.placeholder(dtype=tf.float32, shape=[None, n_feats])
Y = tf.placeholder(dtype=tf.float32, shape=[None, 1])

Y_pred = tf.matmul(X, w) + b

loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=Y_pred, labels=Y),
                      name='loss')

opt = tf.train.GradientDescentOptimizer(0.005).minimize(loss)
prediction = tf.round(tf.sigmoid(Y_pred))
correct = tf.cast(tf.equal(prediction, Y), dtype=tf.float32)
accuracy = tf.reduce_mean(correct)


iter_num = 3000

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer()) 
    for epoch in range(iter_num):
        feed_dict = {X: X_train, Y: np.matrix(y_train).T}
        _, temp_loss = sess.run([opt, loss], feed_dict)
        temp_train_acc = sess.run(accuracy, feed_dict=feed_dict)
        w_out = sess.run(w, feed_dict=feed_dict)
        if (epoch + 1) % 100 == 0:
            print('epoch: {:4d} loss: {:5f} train_acc: {:5f}'.format(epoch + 1, temp_loss,
                                                                              temp_train_acc))

epoch:  100 loss: 1.136217 train_acc: 0.596944
epoch:  200 loss: 1.070203 train_acc: 0.600764
epoch:  300 loss: 1.007717 train_acc: 0.612225
epoch:  400 loss: 0.948816 train_acc: 0.618911
epoch:  500 loss: 0.893534 train_acc: 0.631328
epoch:  600 loss: 0.841894 train_acc: 0.644699
epoch:  700 loss: 0.793923 train_acc: 0.663801
epoch:  800 loss: 0.749654 train_acc: 0.674308
epoch:  900 loss: 0.709109 train_acc: 0.685769
epoch: 1000 loss: 0.672260 train_acc: 0.694365
epoch: 1100 loss: 0.639000 train_acc: 0.702006
epoch: 1200 loss: 0.609131 train_acc: 0.713467
epoch: 1300 loss: 0.582375 train_acc: 0.730659
epoch: 1400 loss: 0.558407 train_acc: 0.744986
epoch: 1500 loss: 0.536882 train_acc: 0.751671
epoch: 1600 loss: 0.517461 train_acc: 0.761223
epoch: 1700 loss: 0.499834 train_acc: 0.775549
epoch: 1800 loss: 0.483728 train_acc: 0.782235
epoch: 1900 loss: 0.468920 train_acc: 0.787966
epoch: 2000 loss: 0.455226 train_acc: 0.795606
epoch: 2100 loss: 0.442499 train_acc: 0.800382
epoch: 2200 l