In [0]:
import tensorflow as tf
import numpy as np
import pandas as pd

np.random.seed(1)

In [0]:
def load_dataset():
  data_file = "./processed_data.csv"
  df = pd.read_csv(data_file, index_col=0)
  
  X_train = (df.values[:891, 1:]).T
  Y_train = (df.values[:891, 0]).T
  X_test = (df.values[891:, 1:]).T
  
  return X_train, Y_train.reshape((1, 891)), X_test

In [0]:
def create_placeholders(n_x, n_y):
  X = tf.placeholder(shape=(n_x, None), name="X", dtype=tf.float32)
  Y = tf.placeholder(shape=(n_y, None), name="Y", dtype=tf.float32)
  
  return X, Y

In [0]:
def initialize_parameters():
  W1 = tf.get_variable(shape=(26, 13), name="W1", initializer=tf.contrib.layers.xavier_initializer(seed=1))
  b1 = tf.get_variable(shape=(26, 1), name="b1", initializer=tf.zeros_initializer())
  W2 = tf.get_variable(shape=(13, 26), name="W2", initializer=tf.contrib.layers.xavier_initializer(seed=1))
  b2 = tf.get_variable(shape=(13, 1), name="b2", initializer=tf.zeros_initializer())
  W3 = tf.get_variable(shape=(6, 13), name="W3", initializer=tf.contrib.layers.xavier_initializer(seed=1))
  b3 = tf.get_variable(shape=(6, 1), name="b3", initializer=tf.zeros_initializer())
  W4 = tf.get_variable(shape=(1, 6), name="W4", initializer=tf.contrib.layers.xavier_initializer(seed=1))
  b4 = tf.get_variable(shape=(1, 1), name="b4", initializer=tf.zeros_initializer())
  
  parameters = {
      "W1": W1,
      "W2": W2,
      "W3": W3,
      "W4": W4,
      "b1": b1,
      "b2": b2,
      "b3": b3,
      "b4": b4
  }
  
  return parameters

In [0]:
def forward_propagation(X, parameters):
  Z1 = tf.matmul(parameters["W1"], X) + parameters["b1"]
  A1 = tf.nn.leaky_relu(Z1)
  Z2 = tf.matmul(parameters["W2"], A1) + parameters["b2"]
  A2 = tf.nn.leaky_relu(Z2)
  Z3 = tf.matmul(parameters["W3"], A2) + parameters["b3"]
  A3 = tf.nn.leaky_relu(Z3)
  Z4 = tf.matmul(parameters["W4"], A3) + parameters["b4"]
  
  return Z4

In [0]:
def compute_cost(Y, Z4, parameters, lambd):
  logits = tf.transpose(Z4)
  labels = tf.transpose(Y)
  l2_loss = 0
  for i in range(1, 4):
    l2_loss += tf.nn.l2_loss(parameters["W" + str(i)])
  regularizer = lambd * l2_loss
  cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                                                   labels=labels))
  cost = tf.reduce_mean(cost + regularizer)
  
  return cost

In [0]:
def compute_accuracy(Z4, Y):
  accuracy = 1 - tf.math.reduce_mean(tf.math.abs(tf.nn.sigmoid(Z4) - Y))
  
  return accuracy

In [0]:
def model(X_train, Y_train, X_test, learning_rate=0.005, num_epochs=30000,
          lambd=0.0):
  n_x, m = X_train.shape
  n_y, _ = Y_train.shape
  
  X, Y = create_placeholders(n_x, n_y)
  parameters = initialize_parameters()
  Z4 = forward_propagation(X, parameters)
  cost = compute_cost(Y, Z4, parameters, lambd)
  optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
  g_init = tf.global_variables_initializer()
  
  with tf.Session() as sess:
    sess.run(g_init)
    epoch_cost = 0
    for epoch in range(num_epochs):
      _, epoch_cost = sess.run([optimizer, cost], feed_dict={X: X_train,
                                                             Y: Y_train})
      
      if epoch % 100 == 0:
        print("Cost after {} epochs: {}".format(epoch, epoch_cost))
    
    print("Training data accuracy {}".format(sess.run(compute_accuracy(Z4, Y),
                                         feed_dict={X: X_train, Y: Y_train})))

    predictions = sess.run(tf.cast(tf.math.greater_equal(tf.nn.sigmoid(Z4),
                                         0.5), tf.int32), feed_dict={X: X_test})
    parameters = sess.run(parameters)
    
    return predictions, parameters

In [9]:
predictions, parameters = model(*load_dataset())

W0707 13:40:45.269325 140419496703872 lazy_loader.py:50] 
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

W0707 13:40:45.328612 140419496703872 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py:180: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Cost after 0 epochs: 0.8287937641143799
Cost after 100 epochs: 0.4112977385520935
Cost after 200 epochs: 0.39736899733543396
Cost after 300 epochs: 0.3772742748260498
Cost after 400 epochs: 0.35954025387763977
Cost after 500 epochs: 0.3413722515106201
Cost after 600 epochs: 0.3241663873195648
Cost after 700 epochs: 0.31093472242355347
Cost after 800 epochs: 0.3035987615585327
Cost after 900 epochs: 0.29578056931495667
Cost after 1000 epochs: 0.2904365062713623
Cost after 1100 epochs: 0.284953236579895
Cost after 1200 epochs: 0.28751587867736816
Cost after 1300 epochs: 0.28297844529151917
Cost after 1400 epochs: 0.27790114283561707
Cost after 1500 epochs: 0.27446985244750977
Cost after 1600 epochs: 0.2775067389011383
Cost after 1700 epochs: 0.27107104659080505
Cost after 1800 epochs: 0.26714763045310974
Cost after 1900 epochs: 0.2696828842163086
Cost after 2000 epochs: 0.2673737704753876
Cost after 2100 epochs: 0.2615720331668854
Cost after 2200 epochs: 0.2590779662132263
Cost after 230

In [0]:
df_submission = pd.DataFrame({"PassengerId": [i for i in range(892, 1310)],
                              "Survived": np.squeeze(predictions)})

In [0]:
df_submission.to_csv("./titanic_predictions.csv")