In [22]:
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt


In [11]:
def sigmoid(z):
    
    sig = 1/(1+ np.exp(-z))
    
    return sig, z

def relu(z):
    
    if z > 0:
        rel = z
    else:
        rel = np.zeros(z.shape)
    
    return rel, z

In [4]:
def initialize_parameters(layer_dims):
    
    np.random.seed(1)
    parameters = {}
    
    for l in range(1, len(layer_dims)):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l],1))
    
    
    return parameters 

In [None]:
def linear_forward(A, W, b):

    Z = W@A + b
    
    cache = (A, W, b)
    
    return Z, cache

In [13]:
def linear_activation_forward(A_prev, W, b, activation):
    
    if activation == "sigmoid":

        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)

    
    elif activation == "relu":

        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)

    cache = (linear_cache, activation_cache)

    return A, cache

In [5]:
def L_model_forward(X, parameters):

    caches = []
    A = X
    L = len(parameters) // 2                
    
    for l in range(1, L):
        A_prev = A 
 
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation = "relu")
        caches.append(cache)


    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], activation = "sigmoid")
    caches.append(cache)

                
    return AL, caches

In [6]:
def compute_cost(AL, Y):
    
    m = Y.shape[1]

    cost = (-1/m)*np.sum(Y@np.log(AL).T + (1-Y)@np.log(1 - AL).T)

    return cost

In [None]:
def sigmoid_backward(A, activation_cahce):
    
    z = activation_cache
    sig = 1/(1+ np.exp(-z))
    dsig = sig*(1 - sig)
    
    return dsig

def relu_backward(A, activation_cache):
    
    z = activation_cache
    if z > 0:
        rel = z
        drel = np.ones(z.shape)
    else:
        rel = np.zeros(z.shape)
        drel = np.zeros(z.shape)
    
    return drel

In [16]:
def linear_backward(dZ, cache):

    A_prev, W, b = cache
    m = A_prev.shape[1]


    dW = (1/m)*(dZ@cache[0].T)
    db = (1/m)*np.sum(dZ, axis=1, keepdims=True)
    dA_prev = cache[1].T@dZ

    
    return dA_prev, dW, db

In [17]:
def linear_activation_backward(dA, cache, activation):

    linear_cache, activation_cache = cache
    
    if activation == "relu":

        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

        
    elif activation == "sigmoid":

        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
    
    return dA_prev, dW, db

In [18]:
def L_model_backward(AL, Y, caches):

    grads = {}
    L = len(caches) 
    m = AL.shape[1]
    Y = Y.reshape(AL.shape) 
    

    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) # derivative of cost with respect to AL

    current_cache = caches[-1]
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, 'sigmoid')

    for l in reversed(range(L-1)):

        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads['dA'+str(l+1)], current_cache, 'relu')
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads

In [19]:
def update_parameters(parameters, grads, learning_rate):

    L = len(parameters) // 2

    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate*grads['dW'+str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate*grads['db'+str(l+1)]

    return parameters

In [20]:
def L_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):

    costs = []                         
    parameters = initialize_parameters_deep(layers_dims)

    for i in range(0, num_iterations):


        AL, caches = L_model_forward(X, parameters)

        cost = compute_cost(AL, Y)

        grads = L_model_backward(AL, Y, caches)

        parameters = update_parameters(parameters, grads, learning_rate)

        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
        if print_cost and i % 100 == 0:
            costs.append(cost)
            
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per hundreds)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()
    
    return parameters

In [114]:
raw_data = pd.read_csv("/Users/chaitanyabalasankula/Desktop/Machine Learning/Kaggle/Data_Sets/Titanic/train_data.csv", index_col = 0)

In [127]:
data = raw_data.iloc[:,1:]

X = data.iloc[:,1:]
X = np.array(X).reshape(-1,14)

Y = data.Survived
Y = np.array(Y).reshape(-1,1)



In [None]:
# parameters = L_layer_model(train_x, train_y, layers_dims, num_iterations = 2500, print_cost = True)
# pred_test = predict(test_x, test_y, parameters)