In [2]:
import time
import random
import numpy as np
import matplotlib.pyplot as plt


### Activation Functions

In [44]:
def sigmoid(Z):
    A = 1/(1+np.exp(-Z)) 
    return A


def softmax(z):
    # Subtract the maximum value in z for numerical stability
    shift_z = z - np.max(z, axis=0, keepdims=True)
    exp_z = np.exp(shift_z)
    return exp_z / np.sum(exp_z, axis=0, keepdims=True)

def relu(Z):
    A = np.maximum(0,Z)
    return A

def tanh(x):
    return np.tanh(x)

def derivative_relu(Z):
    return np.array(Z > 0, dtype = 'float')

def derivative_tanh(x):
    return (1 - np.power(x, 2))

### Initialize parameters

In [5]:
def initialize_parameters(layer_dims):
    
    parameters = {}
    L = len(layer_dims)            

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) / np.sqrt(layer_dims[l-1]) #for avoiding vanishing gradiennt problem
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
        
    return parameters

In [6]:

layer_dims = [14, 100, 40, 4]
params = initialize_parameters(layer_dims)

for l in range(1, len(layer_dims)):
    print("Shape of W" + str(l) + ":", params['W' + str(l)].shape)
    print("Shape of B" + str(l) + ":", params['b' + str(l)].shape, "\n")

Shape of W1: (100, 14)
Shape of B1: (100, 1) 

Shape of W2: (40, 100)
Shape of B2: (40, 1) 

Shape of W3: (4, 40)
Shape of B3: (4, 1) 



In [8]:
len(params)

6

### Forward Propagation

In [36]:
def forward_propagation(X, parameters, activation):
   
    forward_cache = {}
    L = len(parameters) // 2                  
    
    forward_cache['A0'] = X

    for l in range(1, L):
        forward_cache['Z' + str(l)] = parameters['W' + str(l)].dot(forward_cache['A' + str(l-1)]) + parameters['b' + str(l)]
        
        if activation == 'tanh':
            forward_cache['A' + str(l)] = tanh(forward_cache['Z' + str(l)])
        else:
            forward_cache['A' + str(l)] = relu(forward_cache['Z' + str(l)])
            

    forward_cache['Z' + str(L)] = parameters['W' + str(L)].dot(forward_cache['A' + str(L-1)]) + parameters['b' + str(L)]
    
    if forward_cache['Z' + str(L)].shape[0] == 1:
        forward_cache['A' + str(L)] = sigmoid(forward_cache['Z' + str(L)])
    else :
        forward_cache['A' + str(L)] = softmax(forward_cache['Z' + str(L)])
    
    return forward_cache['A' + str(L)], forward_cache

### Cost function

In [9]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    
    if Y.shape[0] == 1:
        cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T))
    else:
        cost = -(1./m) * np.sum(Y * np.log(AL))
        
    cost = np.squeeze(cost)      # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
    
    return cost

In [14]:
def backward_propagation(AL, Y, parameters, forward_cache, activation):
    
    grads = {}
    L = len(parameters)//2
    m = AL.shape[1]
    
    grads["dZ" + str(L)] = AL - Y  # After derivation of cross entropy loss and softmax activation function
    grads["dW" + str(L)] = 1./m * np.dot(grads["dZ" + str(L)],forward_cache['A' + str(L-1)].T)
    grads["db" + str(L)] = 1./m * np.sum(grads["dZ" + str(L)], axis = 1, keepdims = True)
    
    for l in reversed(range(1, L)):
        if activation == 'tanh':
            grads["dZ" + str(l)] = np.dot(parameters['W' + str(l+1)].T,grads["dZ" + str(l+1)])*derivative_tanh(forward_cache['A' + str(l)])
        else:
            grads["dZ" + str(l)] = np.dot(parameters['W' + str(l+1)].T,grads["dZ" + str(l+1)])*derivative_relu(forward_cache['A' + str(l)])
            
        grads["dW" + str(l)] = 1./m * np.dot(grads["dZ" + str(l)],forward_cache['A' + str(l-1)].T)
        grads["db" + str(l)] = 1./m * np.sum(grads["dZ" + str(l)], axis = 1, keepdims = True)

    return grads

In [15]:
def update_parameters(parameters, grads, learning_rate):

    L = len(parameters) // 2 
    
    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]
        
    return parameters

In [13]:
def predict(X, y, parameters, activation):

    m = X.shape[1]
    y_pred, caches = forward_propagation(X, parameters, activation)
    
    if y.shape[0] == 1:
        y_pred = np.array(y_pred > 0.5, dtype = 'float')
    else:
        y = np.argmax(y, 0)
        y_pred = np.argmax(y_pred, 0)
    
    return np.round(np.sum((y_pred == y)/m), 2)

In [38]:
def model(X, Y, layers_dims, learning_rate = 0.03, activation = 'relu', num_iterations = 3000):

    np.random.seed(1)
    costs = []              
    
    parameters = initialize_parameters(layers_dims)
    # print("parameters", parameters)

    for i in range(0, num_iterations):

        AL, forward_cache = forward_propagation(X, parameters, activation)
        print("dkjjkf",AL)

        cost = compute_cost(AL, Y)

        grads = backward_propagation(AL, Y, parameters, forward_cache, activation)

        parameters = update_parameters(parameters, grads, learning_rate)
        

       
    return parameters

In [48]:
X=[-1, 1, 1, 1, -1, -1, 1, -1, 1, 1, -1, -1, 1, 1]
y = [0,0,0,1]

# make this suitable for input as NN
X_train = np.array(X).reshape(-1, 1)
Y_train = np.array(y).reshape(-1, 1)

print(X_train.shape, Y_train.shape)

layers_dims = [14, 100, 40, 4]
lr = 0.01
iters = 100

(14, 1) (4, 1)


In [49]:
parameters = model(X_train, Y_train, layers_dims, learning_rate = lr, activation = 'relu', num_iterations = iters)

dkjjkf [[0.26385234]
 [0.25950063]
 [0.05242399]
 [0.42422303]]
dkjjkf [[0.21521787]
 [0.21539771]
 [0.04748174]
 [0.52190268]]
dkjjkf [[0.17595881]
 [0.17831228]
 [0.0420422 ]
 [0.60368671]]
dkjjkf [[0.14588069]
 [0.14994819]
 [0.03696157]
 [0.66720954]]
dkjjkf [[0.12298827]
 [0.12733634]
 [0.03249737]
 [0.71717803]]
dkjjkf [[0.10500874]
 [0.10933851]
 [0.02871268]
 [0.75694007]]
dkjjkf [[0.09197021]
 [0.0957009 ]
 [0.02568248]
 [0.78664641]]
dkjjkf [[0.0823465 ]
 [0.08532501]
 [0.02322841]
 [0.80910007]]
dkjjkf [[0.07412564]
 [0.07676476]
 [0.02108528]
 [0.82802432]]
dkjjkf [[0.06707533]
 [0.06962771]
 [0.01921892]
 [0.84407804]]
dkjjkf [[0.06103591]
 [0.06348918]
 [0.01760237]
 [0.85787254]]
dkjjkf [[0.0558281 ]
 [0.05817737]
 [0.01619498]
 [0.86979955]]
dkjjkf [[0.05134806]
 [0.05357656]
 [0.01501633]
 [0.88005905]]
dkjjkf [[0.04746595]
 [0.04955909]
 [0.0140125 ]
 [0.88896247]]
dkjjkf [[0.04405626]
 [0.04601728]
 [0.0131226 ]
 [0.89680386]]
dkjjkf [[0.0411221 ]
 [0.04304368]
 [0.0

In [50]:
parameters

{'W1': array([[ 0.43567627, -0.16505049, -0.14271155, ..., -0.54904406,
         -0.08772133, -0.10419455],
        [ 0.30301263, -0.29395831, -0.04608338, ..., -0.18273396,
         -0.03284379, -0.2500949 ],
        [-0.0715961 ,  0.14174346, -0.18485411, ...,  0.19831964,
         -0.05127021, -0.23722882],
        ...,
        [-0.16746788,  0.20635766,  0.12756443, ...,  0.35586704,
          0.13919292,  0.01560054],
        [ 0.19261142, -0.41303825,  0.43803091, ...,  0.10868329,
         -0.16049154, -0.1733552 ],
        [-0.19587037,  0.53237582, -0.15357787, ...,  0.19390227,
          0.17632195, -0.05984731]]),
 'b1': array([[-0.00155171],
        [ 0.        ],
        [ 0.        ],
        [-0.00741243],
        [ 0.00619877],
        [ 0.00579899],
        [ 0.        ],
        [ 0.        ],
        [ 0.        ],
        [ 0.        ],
        [ 0.00776821],
        [-0.0034506 ],
        [ 0.        ],
        [ 0.        ],
        [ 0.        ],
        [-0.0014