In [1]:
#import libraries
import numpy as np

In [2]:
#define the sigmoid activation function for output layer
def sigmoid(z):
  return 1/(1 + np.exp(-z))

In [3]:
#initialize the parameters for 2 hidden layers
def initialize_parameters(n_x, n_h1, n_h2, n_y):
  
  #We do random assignment to parameters instead of zero
  W1 = np.random.randn(n_h1, n_x)
  b1 = np.random.randn(n_h1, 1)
  W2 = np.random.randn(n_h2, n_h1)
  b2 = np.random.randn(n_h2, 1)
  W3 = np.random.randn(n_y, n_h2)
  b3 = np.random.randn(n_y, 1)
    
  parameters = {
    "W1": W1,
    "b1" : b1,
    "W2": W2,
    "b2" : b2,
    "W3": W3,
    "b3" : b3
  }
  return parameters

In [4]:
#perform forward propagation
def forward_prop(X, parameters):
  W1 = parameters["W1"]
  b1 = parameters["b1"]
  W2 = parameters["W2"]
  b2 = parameters["b2"]
  W3 = parameters["W3"]
  b3 = parameters["b3"]
    
  #b is resized by broadcasting

  #tanh is used as activation ftn for hidden layers and sigmoid for output layer
  Z1 = np.dot(W1, X) + b1
  A1 = np.tanh(Z1)
  Z2 = np.dot(W2, A1) + b2
  A2 = np.tanh(Z2)    
  Z3 = np.dot(W3, A2) + b3
  A3 = sigmoid(Z3)
  
  #cache is used in back propagation
  cache = {
    "A1": A1,
    "A2": A2,
    "A3": A3  
  }
  return A3, cache

In [5]:
#determine the loss function to retune the parameters
def calculate_cost(A3, Y):
    
  #cost ftn of logisitic regression
  cost = -np.sum(np.multiply(Y, np.log(A3)) +  np.multiply(1-Y, np.log(1-A3)))/m
  cost = np.squeeze(cost)

  return cost

In [6]:
#perform back propagation
def backward_prop(X, Y, cache, parameters):
  A1 = cache["A1"]
  A2 = cache["A2"]
  A3 = cache["A3"]

  W3 = parameters["W3"]
  W2 = parameters["W2"]

  #backpropagation for output layer
  dZ3 = A3 - Y
  dW3 = np.dot(dZ3, A2.T)/m
  db3 = np.sum(dZ3, axis=1, keepdims=True)/m

  #backpropagation for 2nd hidden layer
  dZ2 = np.multiply(np.dot(W3.T, dZ3), 1-np.power(A2, 2))
  dW2 = np.dot(dZ2, A1.T)/m
  db2 = np.sum(dZ2, axis=1, keepdims=True)/m
    
  #backpropagation for 1st hidden layer 
  dZ1 = np.multiply(np.dot(W2.T, dZ2), 1-np.power(A1, 2))
  dW1 = np.dot(dZ1, X.T)/m
  db1 = np.sum(dZ1, axis=1, keepdims=True)/m

  grads = {
    "dW1": dW1,
    "db1": db1,
    "dW2": dW2,
    "db2": db2,
    "dW3": dW3,
    "db3": db3
  }

  return grads

In [7]:
#We update the parameters
def update_parameters(parameters, grads, learning_rate):
  W1 = parameters["W1"]
  b1 = parameters["b1"]
  W2 = parameters["W2"]
  b2 = parameters["b2"]
  W3 = parameters["W3"]
  b3 = parameters["b3"]
    
  dW1 = grads["dW1"]
  db1 = grads["db1"]
  dW2 = grads["dW2"]
  db2 = grads["db2"]
  dW3 = grads["dW3"]
  db3 = grads["db3"]
    
  #performing gradient descenet
  W1 = W1 - learning_rate * dW1
  b1 = b1 - learning_rate * db1
  W2 = W2 - learning_rate * dW2
  b2 = b2 - learning_rate * db2
  W3 = W3 - learning_rate * dW3
  b3 = b3 - learning_rate * db3

  new_parameters = {
    "W1": W1,
    "W2": W2,
    "W3": W3,
    "b1" : b1,
    "b2" : b2,
    "b3" : b3
  }

  return new_parameters

In [8]:
#model to run neural network
def model(X, Y, n_x, n_h1, n_h2, n_y, num_of_iters, learning_rate):
  parameters = initialize_parameters(n_x, n_h1, n_h2, n_y)

  #Running the model for a number of iterations
  for i in range(0, num_of_iters+1):
    a2, cache = forward_prop(X, parameters)

    cost = calculate_cost(a2, Y)

    grads = backward_prop(X, Y, cache, parameters)

    parameters = update_parameters(parameters, grads, learning_rate)

    if(i % 100 == 0):
      print('Cost after iteration# {:d}: {:f}'.format(i, cost))

  return parameters

In [9]:
#model for prediction
def predict(X, parameters):
  a2, cache = forward_prop(X, parameters)
  yhat = a2
  yhat = np.squeeze(yhat)

  #If prediction is more than 0.5 we classify as 1 otherwise as 0
  if(yhat >= 0.5):
    y_predict = 1
  else:
    y_predict = 0

  return y_predict

In [10]:
#main code starts here
if __name__ == "__main__":
    np.random.seed(2)

    # The 8 training examples by columns
    X = np.array([[0,1,0,1,1,0,0,1]
                , [0,0,1,0,1,1,0,1]
                , [0,0,0,1,0,1,1,1]
                , [0,1,0,0,1,0,1,1]])

    # The outputs of the 4 input AND for every example in X
    Y = np.array([[0, 0, 0, 0, 0, 0, 0, 1]])
    
    m = X.shape[1]

    # Set the hyperparameters
    n_x = 4      #No. of neurons in first layer
    n_h1 = 2     #No. of neurons in 1st hidden layer
    n_h2 = 2     #No. of neurons in 2nd hidden layer
    n_y = 1      #No. of neurons in output layer
    num_of_iters = 1000
    learning_rate = 0.3
    
    trained_parameters = model(X, Y, n_x, n_h1, n_h2, n_y, num_of_iters, learning_rate)

Cost after iteration# 0: 0.592205
Cost after iteration# 100: 0.165568
Cost after iteration# 200: 0.026749
Cost after iteration# 300: 0.013933
Cost after iteration# 400: 0.009449
Cost after iteration# 500: 0.007151
Cost after iteration# 600: 0.005746
Cost after iteration# 700: 0.004795
Cost after iteration# 800: 0.004106
Cost after iteration# 900: 0.003581
Cost after iteration# 1000: 0.003168


In [11]:
# Test 4X1 vector to calculate the 4 input AND of its elements. 
X_test = np.array([[0], [1], [1], [1]])
y_predict = predict(X_test, trained_parameters)

# Print the result
print('Neural Network prediction is {:d}'.format(y_predict))

Neural Network prediction is 0


In [12]:
X_test = np.array([[1], [1], [1], [1]])
y_predict = predict(X_test, trained_parameters)

# Print the result
print('Neural Network prediction is {:d}'.format(y_predict))

Neural Network prediction is 1
