In [126]:
#Imports

import pandas as pd
import numpy as np
import math

In [127]:
#Functions

def initialize_with_zeros(dim):
    w = np.zeros((dim, 1))
    b = 0
    
    assert(w.shape == (dim, 1))
    assert(isinstance(b, float) or isinstance(b, int))
    
    return w, b
    

def sigmoid(z):
    g = 1 / (1 + np.exp(-z))
    return g

def propagate(w, b, X, Y):
    m = X.shape[1]
    
    #Forward propagation
    print("Updated w: " + str(w))
    print("Updated b: " + str(b))
    print("-- Sigmoid on " + str(w.T.shape) + ", " + str(X.shape))
    A = sigmoid(np.dot(w.T, X) + b)
    print("-- Activations: " + str(A.shape))
    print("-- Cost values:")
    print("---- For term1:")
    print("------ Y: " + str(Y.shape))
    temp = np.log(1 - A).T
    print("------ log(1 - A).T: " + str(temp.shape))
    term1 = np.dot(Y, np.log(A).T)
    print("--- For term2:")
    print("------ Y: " + str(Y.shape))
    a = 1 - Y
    b = 1 - A
    c = np.log(b)
    d = c.T
    e = np.dot(a, d)
    print("------ 1 - Y: " + str(a.shape))
    print("------ 1 - A: " + str(b.shape))
    print("------ np.log(1 - A)" + str(c.shape))
    print("------ transpose: " + str(d.shape))
    print("------ full term2: " + str(e.shape))
    term2 = np.dot((1 - Y), np.log(1 - A).T)
    if math.isnan(term2):
        term2 = 0
    add_term = term1 + term2
    sum_term = np.sum(add_term)
    mul_term = -(1 / m) * sum_term
    print("---- term1: " + str(term1))
    print("---- term2: " + str(term2))
    print("---- add_term: " + str(add_term))
    print("---- sum_term: " + str(sum_term))
    print("---- mul_term: " + str(mul_term))
    cost = -(1 / m) * np.sum(np.dot(Y, np.log(A).T) + np.dot((1 - Y), np.log(1 - A).T))
    print("-- Computed cost: " + str(cost))
    
    #Backward propagation
    dw = (1 / m) * np.dot(X, (A - Y).T)
    db = (1 / m) * np.sum((A - Y), axis = 1)

    
    assert(dw.shape == w.shape)
    assert(db.dtype == float)
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    
    grads = {"dw": dw,
             "db": db}
    
    return grads, cost

def train(w, b, X, Y, num_iterations, learning_rate):
    costs = []
    
    for i in range(num_iterations):
        print()
        print("Iteration " + str(i) + ":")
        
        print("-- Starting propagation...")
        print("-- Set size: " + str(X.shape))
        print("-- Label set size: " + str(Y.shape))
        #Calculate cost and gradient step to take
        grads, cost = propagate(w, b, X, Y)
        
        #Retrieve the derivatives (steps)
        dw = grads["dw"]
        db = grads["db"]
        
        #Update values
        w = w - learning_rate * dw
        b = b - learning_rate * db.shape[0]
        
        #Check each 100th iteration
        if i % 10 == 0:
            costs.append(cost)
            print("Cost after iteration %i: %f" %(i, cost))
            
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    
    return params, grads, costs

def predict(w, b, X):
    m = X.shape[1]
    Y_prediction = np.zeros((1, m))
    w = w.reshape(X.shape[0], 1)
    
    #Compute the vector of probabilities
    A = sigmoid(np.dot(w.T, X) + b)
    
    for i in range(A.shape[1]):
        if A[0][i] > 0.5:
            Y_prediction[0][i] = 1
        else:
            Y_prediction[0][i] = 0
    
    assert(Y_prediction.shape == (1, m))
    
    return Y_prediction

def model(X_train, Y_train, X_test, Y_test, num_iterations = 5, learning_rate = 0.5):
    #Initialize parameters
    print("Initializing parameters...")
    w, b = initialize_with_zeros(X_train.shape[0])
    print("w: " + str(w)) 
    print("b: " + str(b))
    
    #Train the model
    print("Training the model...")
    parameters, grads, costs = train(w, b, X_train, Y_train, num_iterations, learning_rate)
    print("Training done!")
    
    #Retrieve the trained parameters
    w = parameters["w"]
    b = parameters["b"]
    
    #Use the parameters to predict the output
    Y_prediction_test = predict(w, b, X_test)
    
    d = {"costs": costs,
         "Y_prediction_test": Y_prediction_test, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iterations": num_iterations}
    
    return d

In [129]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

#Drop columns and replace female -> 0, male -> 1
train_df = train_df.drop(["Name", "Ticket", "Embarked", "Cabin"], axis=1)
train_df = train_df.replace("female", 0)
train_df = train_df.replace("male", 1)
train_df = train_df.dropna()

test_df = test_df.drop(["Name", "Ticket", "Embarked", "Cabin"], axis=1)
test_df = test_df.replace("female", 0)
test_df = test_df.replace("male", 1)
test_df = test_df.dropna()

X_train = train_df.values
X_train = np.delete(X_train, 1, axis = 1)
Y_train = train_df.iloc[:, :1]
Y_train = Y_train.T

X_test = test_df.values
Y_test = np.zeros((X_test.shape[0], 1))
Y_test = Y_test.T

#m = no. of examples; n = no. of features
m_train, n_train = X_train.shape

m_test, n_test = X_test.shape

print("Training set shape: " + str(X_train.shape))
print("Testing set shape: " + str(X_test.shape))

#Column representation
X_train = X_train.T
X_test = X_test.T


print("Training set shape (transposed): " + str(X_train.shape))
print("Testing set shape (transposed): " + str(X_test.shape))

#Train
d = model(X_train, Y_train, X_test, Y_test, num_iterations = 50, learning_rate = 0.005)
print("Model:")
print(d)

Training set shape: (714, 7)
Testing set shape: (331, 7)
Training set shape (transposed): (7, 714)
Testing set shape (transposed): (7, 331)
Initializing parameters...
w: [[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]
b: 0
Training the model...

Iteration 0:
-- Starting propagation...
-- Set size: (7, 714)
-- Label set size: (1, 714)
Updated w: [[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]
Updated b: 0
-- Sigmoid on (1, 7), (7, 714)
-- Activations: (1, 714)
-- Cost values:
---- For term1:
------ Y: (1, 714)
------ log(1 - A).T: (714, 1)
--- For term2:
------ Y: (1, 714)
------ 1 - Y: (1, 714)
------ 1 - A: (1, 714)
------ np.log(1 - A)(1, 714)
------ transpose: (714, 1)
------ full term2: (1, 1)
---- term1: [[-222006.72416718]]
---- term2: [[221511.81708026]]
---- add_term: [[-494.90708692]]
---- sum_term: -494.90708691976033
---- mul_term: 0.6931471805598884
-- Computed cost: 0.6931471805598884
Cost after iteration 0: 0.693147

Iteration 1:
-- Starting propagation...
-- Set size: (7, 714)
-- 

  temp = np.log(1 - A).T
  c = np.log(b)
  term2 = np.dot((1 - Y), np.log(1 - A).T)
  cost = -(1 / m) * np.sum(np.dot(Y, np.log(A).T) + np.dot((1 - Y), np.log(1 - A).T))


-- Activations: (1, 714)
-- Cost values:
---- For term1:
------ Y: (1, 714)
------ log(1 - A).T: (714, 1)
--- For term2:
------ Y: (1, 714)
------ 1 - Y: (1, 714)
------ 1 - A: (1, 714)
------ np.log(1 - A)(1, 714)
------ transpose: (714, 1)
------ full term2: (1, 1)
---- term1: [[0.]]
---- term2: 0
---- add_term: [[0.]]
---- sum_term: 0.0
---- mul_term: -0.0
-- Computed cost: nan

Iteration 29:
-- Starting propagation...
-- Set size: (7, 714)
-- Label set size: (1, 714)
Updated w: [[3.88359910e+04]
 [1.44054156e+02]
 [4.16216632e+01]
 [1.94761442e+03]
 [3.03946218e+01]
 [2.76250000e+01]
 [2.27078722e+03]]
Updated b: -0.14500000000000005
-- Sigmoid on (1, 7), (7, 714)
-- Activations: (1, 714)
-- Cost values:
---- For term1:
------ Y: (1, 714)
------ log(1 - A).T: (714, 1)
--- For term2:
------ Y: (1, 714)
------ 1 - Y: (1, 714)
------ 1 - A: (1, 714)
------ np.log(1 - A)(1, 714)
------ transpose: (714, 1)
------ full term2: (1, 1)
---- term1: [[0.]]
---- term2: 0
---- add_term: [[0.]]
