In [45]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Defining the Activation Function

In [46]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [47]:
def tanh(x):
    t=(np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
    return t

## Defining the Network Structure

In [48]:
def layer_sizes(X, Y):
    n_x = X.shape[0] # size of input layer
    n_h = 4
    n_y = Y.shape[0] # size of output layer
    return (n_x, n_h, n_y)

In [49]:
X = np.array([[0.9,0]]).T
y = np.array([1])
(n_x, n_h, n_y) = layer_sizes(X, y)
print("The size of the input layer is: n_x = " + str(n_x))
print("The size of the hidden layer is: n_h = " + str(n_h))
print("The size of the output layer is: n_y = " + str(n_y))

The size of the input layer is: n_x = 2
The size of the hidden layer is: n_h = 4
The size of the output layer is: n_y = 1


## Initializing the Network Parameters

In [50]:
def initialize_parameters(n_x, n_h, n_y):
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h,1))
    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros((n_y, 1))
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [51]:
parameters = initialize_parameters(n_x, n_h, n_y)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

W1 = [[ 0.00881318  0.01709573]
 [ 0.00050034 -0.00404677]
 [-0.0054536  -0.01546477]
 [ 0.00982367 -0.01101068]]
b1 = [[0.]
 [0.]
 [0.]
 [0.]]
W2 = [[-0.01185047 -0.0020565   0.01486148  0.00236716]]
b2 = [[0.]]


## Forward Propogation: Learn from the data

In [52]:
def forward_propagation(X, parameters):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    Z1 = np.dot(W1,X) + b1
    A1 = tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    
    return A2, cache

In [53]:
A2, cache = forward_propagation(X, parameters)
print(cache['Z1'], cache['A1'], cache['Z2'], cache['A2'])

[[ 0.00793186]
 [ 0.0004503 ]
 [-0.00490824]
 [ 0.00884131]] [[ 0.0079317 ]
 [ 0.0004503 ]
 [-0.0049082 ]
 [ 0.00884108]] [[-0.00014694]] [[0.49996327]]


## Compute Cost of your network

In [54]:
def compute_cost(A2, Y, parameters):
    if len(Y.shape)==2:
        m = Y.shape[1] # number of example
    else:
        m = 1

    logprobs = np.multiply(np.log(A2),Y) + np.multiply(np.log(1-A2),(1-Y))
    cost = - (np.sum(logprobs)/m)
    cost = float(np.squeeze(cost))  # makes sure cost is the dimension we expect. 
                                    # E.g., turns [[17]] into 17 
    return cost

In [55]:
print("cost = " + str(compute_cost(A2, y, parameters)))

cost = 0.6932206508609429


## Back Propogate the Cost

In [56]:
def backward_propagation(parameters, cache, X, Y):
    m = X.shape[1]
    
    W1 = parameters["W1"]
    W2 = parameters["W2"]
    A1 = cache['A1']
    A2 = cache['A2']
    
    dZ2 = A2-Y
    dW2 = (np.dot(dZ2,A1.T)/m)
    db2 = (np.sum(dZ2, axis=1,keepdims=True)/m)
    dZ1 = np.dot(W2.T, dZ2) * (1-np.power(A1,2))
    dW1 = (np.dot(dZ1, X.T)/m)
    db1 = (np.sum(dZ1, axis=1,keepdims=True)/m)
    
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    return grads

In [57]:
grads = backward_propagation(parameters, cache, X, y)
print ("dW1 = "+ str(grads["dW1"]))
print ("db1 = "+ str(grads["db1"]))
print ("dW2 = "+ str(grads["dW2"]))
print ("db2 = "+ str(grads["db2"]))

dW1 = [[ 0.00533277  0.        ]
 [ 0.00092549  0.        ]
 [-0.006688   -0.        ]
 [-0.00106522 -0.        ]]
db1 = [[ 0.0059253 ]
 [ 0.00102832]
 [-0.00743111]
 [-0.00118358]]
dW2 = [[-0.00396614 -0.00022517  0.00245428 -0.00442086]]
db2 = [[-0.50003673]]


## Update Parameters using Gradients

In [58]:
def update_parameters(parameters, grads, learning_rate = 1.2):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    dW1 = grads['dW1']
    db1 = grads['db1']
    dW2 = grads['dW2']
    db2 = grads['db2']
    
    W1 = W1 - (learning_rate*dW1)
    b1 = b1 - (learning_rate*db1)
    W2 = W2 - (learning_rate*dW2)
    b2 = b2 - (learning_rate*db2)
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    return parameters

In [59]:
parameters = update_parameters(parameters, grads)

print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

W1 = [[ 0.00241386  0.01709573]
 [-0.00061025 -0.00404677]
 [ 0.002572   -0.01546477]
 [ 0.01110194 -0.01101068]]
b1 = [[-0.00711035]
 [-0.00123399]
 [ 0.00891733]
 [ 0.00142029]]
W2 = [[-0.0070911  -0.0017863   0.01191635  0.0076722 ]]
b2 = [[0.60004408]]


## Integrating Everything

In [79]:
def nn_model(X, Y, n_h, num_iterations = 10000, print_cost=False):
    np.random.seed(3)
    n_x = layer_sizes(X, Y)[0]
    n_y = layer_sizes(X, Y)[2]
    
    parameters = initialize_parameters(X.shape[0], n_h, Y.shape[0])
    
    # Loop (gradient descent)
    for i in range(0, num_iterations): 
        # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".
        A2, cache = forward_propagation(X, parameters)
        
        # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".
        cost = compute_cost(A2, Y, parameters)
 
        # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
        grads = backward_propagation(parameters, cache, X, Y)
 
        # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
        parameters = update_parameters(parameters, grads)
        
        # Print the cost every 1000 iterations
        if print_cost:
            print ("Cost after iteration %i: %f" %(i, cost))
    return parameters

In [61]:
parameters = nn_model(X, y, 4, num_iterations=1, print_cost=True)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

Cost after iteration 0: 0.693140
W1 = [[ 0.01764973  0.0043651 ]
 [-0.00161198 -0.01863493]
 [-0.00986865 -0.00354759]
 [ 0.00394951 -0.00627001]]
b1 = [[-0.00026284]
 [-0.00286328]
 [-0.00788308]
 [ 0.00530769]]
W2 = [[ 0.00921951 -0.0042511  -0.01463653  0.00839942]]
b2 = [[0.5999955]]


## Run Prediction

In [62]:
def predict(parameters, X):
    # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold.
    A2, cache = forward_propagation(X, parameters)
    predictions = np.where(A2 > 0.5, 1, 0)
    return predictions

In [63]:
predictions = predict(parameters, X)
print("predictions = " + str(predictions))

predictions = [[1]]


## Classification on a sample Kaggle Dataset

In [91]:
import pandas as pd
data = pd.read_csv("./datasets_4753_8110_ex2data1.csv")
X = data.drop("label", axis=1)
y = data['label']

In [92]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [93]:
X_train = X_train.values.T
X_test = X_test.values.T
y_train = y_train.values.T
y_test = y_test.values.T

In [94]:
X_train.shape

(2, 90)

In [95]:
X_test.shape

(2, 10)

In [97]:
y_train = y_train.reshape(1,y_train.shape[0])
y_train.shape

(1, 90)

In [98]:
y_test = y_test.reshape(1,y_test.shape[0])
y_test.shape

(1, 10)

In [99]:
parameters = nn_model(X_train, y_train, 4, num_iterations=10, print_cost=True)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

Cost after iteration 0: 0.692555
Cost after iteration 1: 0.675788
Cost after iteration 2: 0.709887
Cost after iteration 3: 0.687753
Cost after iteration 4: 0.682276
Cost after iteration 5: 0.681284
Cost after iteration 6: 0.681059
Cost after iteration 7: 0.681012
Cost after iteration 8: 0.681002
Cost after iteration 9: 0.681000
W1 = [[-0.15377971 -0.24994848]
 [ 0.18111922  0.149487  ]
 [-0.11658628 -0.11109082]
 [ 0.06943693  0.05971805]]
b1 = [[-0.0058268 ]
 [ 0.00492081]
 [-0.00070213]
 [ 0.00040817]]
W2 = [[ 0.04794049 -0.04228046 -0.17835274  0.03456175]]
b2 = [[0.18993889]]


In [111]:
predictions = predict(parameters, X_test)

In [112]:
predictions

array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])

In [113]:
y_test

array([[1, 1, 1, 0, 1, 0, 1, 1, 1, 1]], dtype=int64)

In [114]:
print('Accuracy: %d' % float((np.dot(y_test,predictions.T) + np.dot(1-y_test,1-predictions.T))/float(y_test.size)*100) + '%')

Accuracy: 80%
