In [17]:
import numpy as np
import matplotlib.pyplot as plt

from mnist_nn_utils import load_train_test_dataset, softmax, relu, relu_backward, softmax_backward

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
X_train, Y_train, X_test, Y_test= load_train_test_dataset()

-------------Train Dataframe:---------

    label  1x1  1x2  1x3  1x4  1x5  1x6  1x7  1x8  1x9  ...  28x19  28x20  \
0      5    0    0    0    0    0    0    0    0    0  ...      0      0   
1      0    0    0    0    0    0    0    0    0    0  ...      0      0   
2      4    0    0    0    0    0    0    0    0    0  ...      0      0   
3      1    0    0    0    0    0    0    0    0    0  ...      0      0   
4      9    0    0    0    0    0    0    0    0    0  ...      0      0   

   28x21  28x22  28x23  28x24  28x25  28x26  28x27  28x28  
0      0      0      0      0      0      0      0      0  
1      0      0      0      0      0      0      0      0  
2      0      0      0      0      0      0      0      0  
3      0      0      0      0      0      0      0      0  
4      0      0      0      0      0      0      0      0  

[5 rows x 785 columns]


--------------Test Dataframe:---------

    label  1x1  1x2  1x3  1x4  1x5  1x6  1x7  1x8  1x9  ...  28x19  28x20  \

In [19]:
'''########### ONE HOT ENCODING OF Y_TRAIN #################

https://kite.com/python/answers/how-to-do-one-hot-encoding-with-numpy-in-python
'''
shape=(10,Y_train.size)

one_hot=np.zeros(shape)

rows=np.arange(Y_train.size)

'''
Now the one_hot is a matrix with rows equal to Y_train and the number of columns is same as number of classes.
one_hot[rows,Y_train]=1 ---- puts the value 1 at (row_number=rows,column_number=Y_train_value)
'''
one_hot[Y_train,rows]=1

print("Shape of One Hot encoding",one_hot.shape)

Y_train=one_hot  #Set Y_train to the encoded variable

Shape of One Hot encoding (10, 60000)


In [20]:
''' ########### INITIALIZATION OF L LAYERS IN DEEP NN ##############'''
def initialize_parameters_deep(layer_dims):
    
    '''Here layer_dims is a python list conataining the dimensions of all the layers'''
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)
    
    for l in range(1, L):
        
        parameters["W"+str(l)]= np.random.randn(layer_dims[l], layer_dims[l-1])*0.01
        parameters["b"+str(l)]= np.zeros((layer_dims[l],1))
        
        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))
        
    return parameters

In [26]:
'''############### FORWARD PROPAGATION MODULE for L Layers in "DEEP NN" ##################'''

def linear_forward(A, W, b):
    '''We build the LINEAR PART of the Forward Propagation'''
    Z= np.dot(W,A)+b
    
    cache = (A, W, b)
    
    return Z, cache

def linear_activation_forward(A_prev, W, b, activation):
    '''This function implements the ACTIVATION PART of the Forward Propagation'''
    
    if activation =="softmax":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = softmax(Z)
        
    elif activation =="relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)
        
    cache = (linear_cache, activation_cache)
    return A, cache

def L_model_forward(X, parameters):
    '''
    Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computation
    We use the above two functions in this function
    '''
    caches=[]
    A = X
    L = len(parameters)//2    ##number of layers in the neural net
    
    # Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.
    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev, parameters["W"+str(l)], parameters["b"+str(l)], "relu")
        caches.append(cache)
        
    AL, cache = linear_activation_forward(A, parameters["W"+str(L)], parameters["b"+str(L)], "softmax")
    caches.append(cache)
    
    return AL, caches

In [27]:
'''############### COMPUTE COST ###############'''
def compute_cost(AL, Y):
    
    m = Y.shape[1]
    cost = -(np.sum(np.multiply(Y,np.log(AL))))/m
    
    cost = np.squeeze(cost)
    
    return cost

In [28]:
'''################ BACKWARD PROPAGATION MODULE #############'''
def linear_backward(dZ, cache):
    '''Implement the linear portion of the Backward Propagation'''
    A_prev, W, b = cache
    m = A_prev.shape[1]
    
    dW = np.dot(dZ, A_prev.T)/m
    db = np.sum(dZ, axis =1, keepdims= True)/m
    dA_prev = np.dot(W.T, dZ)
    
    return dA_prev, dW, db

def linear_activation_backward(dA, cache, activation):
    
    linear_cache, activation_cache = cache
    
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
    elif activation == "softmax":
        dZ = softmax_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
    return dA_prev, dW, db

def L_model_backward(AL, Y, caches):
    
    grads={}
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    
    dAL = -np.divide(AL, Y)
    
    current_cache = caches[L-1]
    
    grads["dA"+str(L-1)], grads["dW"+str(L)], grads["db"+str(L)] = linear_activation_backward(dAL, current_cache, activation="softmax")
    
    #Loop from L-2 to 0
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        
        dA_prev_temp,dW_temp, db_temp = linear_activation_backward(grads["dA"+str(l)], current_cache, activation="relu")
        grads["dA"+str(l)] = dA_prev_temp
        grads["W"+str(l+1)] = dW_temp
        grads["b"+str(l+1)] = db_temp
        
    return grads

In [29]:
'''################ UPDATE PARAMETERS ###################'''
def update_parameters(parameters, grads, learning_rate):
    
    L= len(parameters)//2
    for l in range(L):
        parameters["W"+str(l+1)] = parameters["W"+str(l+1)] - learning_rate*grads["dW"+str(l+1)]
        parameters["b"+str(l+1)] = parameters["b"+str(l+1)] - learning_rate*grads["db"+str(l+1)]
        
    return parameters

In [30]:
'''############# DEFINING DIMENSIONS FOR THE LAYERS ##############'''
layers_dims = [X_train.shape[0], 10, 7, 5, Y_train.shape[0]] #  4-layer model

In [31]:
'''################## L LAYER MODEL ###############'''
def L_layer_model(X, Y, layer_dims,learning_rate=0.075, num_iterations=100, print_cost=False):
    
    np.random.seed(3)
    costs=[] #to plot the graph
    
    parameters = initialize_parameters_deep(layer_dims)
    
    for i in range(0, num_iterations):
        '''Forward Propagation'''
        AL, caches = L_model_forward(X, parameters)
        
        '''Compute Cost'''
        cost = compute_cost(AL, Y)
        
        '''Backward Propagation'''
        grads = L_model_backward(AL, Y, caches)
        
        '''Update Parameters'''
        parameters = update_parameters(parameters, grads, learning_rate)
        
        if print_cost and i%100 == 0:
            print("Cost after iteration %i: %f"%(i, cost))
            costs.append(cost)
    
    '''Plot the Cost'''
    plt.plot(np.squeeze(costs))
    plt.ylabel("cost")
    plt.xlabel("Iterations (per hundred)")
    plt.title("Learning rate: "+str(learning_rate))
    plt.show()
    
    return parameters

In [32]:
parameters = L_layer_model(X_train, Y_train, layers_dims, num_iterations = 100, print_cost = True)

MemoryError: 