In [0]:
import numpy as np


In [0]:
X = np.array([[1,-1,-3],[1,2,1]])
Y = np.array([[0,1,0]])

In [0]:
def sigmoid(z):
  s=1/(1+np.exp(-z))
  return s

In [0]:
def layer_sizes(X,Y):
  n_x=X.shape[0]
  n_h=4
  n_y=Y.shape[0]
  return n_x,n_h,n_y

In [0]:
def initialize_parameters(n_x, n_h, n_y):
  W1 = np.random.randn(n_h,n_x) * 0.01 #not initializing as zero, as it gives symmetric hidden units
  b1 = np.zeros((n_h,1))
  W2 = np.random.randn(n_y,n_h) * 0.01 #not initializing as zero, as it gives symmetric hidden units
  b2 = np.zeros((n_y,1))
  #checking the shape of the parameters
  assert (W1.shape == (n_h, n_x))
  assert (b1.shape == (n_h, 1))
  assert (W2.shape == (n_y, n_h))
  assert (b2.shape == (n_y, 1))
    
  parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
  return parameters


In [0]:
def forward_propagation(X, parameters):
  #Retrive each parameter using a dictionary "parameters"
  W1 = parameters["W1"]
  b1 = parameters["b1"]
  W2 = parameters["W2"]
  b2 = parameters["b2"]
    
  #Implement forward implementation
  Z1=np.dot(W1,X)+b1
  A1=np.tanh(Z1)
  Z2=np.dot(W2,A1)+b2
  A2=sigmoid(Z2)
    
    
  cache= {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    
  return A2, cache


In [0]:
def compute_cost(Y,A2,X):
  m = X.shape[1] #number of training examples
  cost=-(np.sum(np.dot(Y,np.log(A2).T)+np.dot((1-Y),np.log(1-A2).T)))/m #compute cost
    
  cost = np.squeeze(cost) #remove axis with singleton element
  return cost

In [0]:
def backward_propagation(cache,parameters,X):
  m = X.shape[1] #number of training examples
    
  #Retrive w1 and w2 from dictionary "parameters"
  W1 = parameters["W1"]
  W2 = parameters["W2"]
  #Retrive A1 and A2 from dictionary "cache"
  A1=cache["A1"]
  A2=cache["A2"]
    
  #Implementing backward propagation
  dZ2=A2-Y
  dW2=(np.dot(dZ2,A1.T))/m
  db2 = (np.sum(dZ2,axis = 1,keepdims = True))/m
  dZ1= np.dot(W2.T,dZ2) * (1-np.power(A2,2))
  dW1=(np.dot(dZ1,X.T))/m
  db1= (np.sum(dZ1,axis = 1,keepdims = True))/m
      
  grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    
  return grads
    

    
    
    


In [0]:
def update_parameters(X,parameters,num_iterations, learning_rate = 1.2):
  costs=[]
  for i in range(num_iterations):
    A2,cache=forward_propagation(X,parameters)
    grads=backward_propagation(cache,parameters,X)
    # Retrieve each parameter from the dictionary "parameters"
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    # Retrieve each gradient from the dictionary "grads"
    
    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]
  
    # Update rule for each parameter
    W1 = W1-learning_rate*dW1
    b1 = b1-learning_rate*db1
    W2 = W2-learning_rate*dW2
    b2 = b2-learning_rate*db2

    cost=compute_cost(Y,A2,X)
    if i%1000==0:
      costs.append(cost)
      
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
  return parameters,costs

In [0]:
def predict(parameters, X):
  
  # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold.
  A2, cache =   forward_propagation(X, parameters)
  predictions =(A2>0.5)*1 #for changing bollean array to integer form
    
  return predictions

In [61]:
n_x,n_h,n_y=layer_sizes(X,Y)
parameters=initialize_parameters(n_x,n_h,n_y)
parameters,costs=update_parameters(X,parameters,10000,learning_rate=0.04)
print(costs)
print ("predictions = " + str(predict(parameters,X)))

[0.6931439140398292, 0.21201326852244126, 0.033430981777228745, 0.020031723080977636, 0.015434986995022727, 0.013164552956134312, 0.01187256412748249, 0.011104740494616532, 0.0106691978107435, 0.010476711136510329]
predictions = [[0 1 0]]
