# Deep Learning Tutorial - Modeling

In [1]:
import numpy as np
import pprint

In [55]:
INPUT_SIZE = 2
NUM_CLASSES = 7
LAYER_SIZES = [5, 5]

LAYER_SIZES.insert(0, INPUT_SIZE)

LAYER_SIZES.append(NUM_CLASSES)

LAYER_ACTIVATIONS = ['relu', 'relu', 'softmax']

In [56]:
LAYER_SIZES

[2, 5, 5, 7]

In [57]:
def initialize_network():
    architecture = {}
    for layer in range(1, len(LAYER_SIZES)):
        architecture[f'layer_{layer}'] = {
            'w': np.random.randn(LAYER_SIZES[layer],
                                 LAYER_SIZES[layer-1]) * 1,
            'b': np.zeros(LAYER_SIZES[layer]),
            'activation': LAYER_ACTIVATIONS[layer-1]
        }
    return architecture

In [58]:
network = initialize_network()
pprint.pprint(network)

{'layer_1': {'activation': 'relu',
             'b': array([0., 0., 0., 0., 0.]),
             'w': array([[ 1.44419796,  1.16303701],
       [-2.00961116,  0.28431591],
       [ 1.01256116, -2.26256021],
       [ 0.61697781,  1.09119743],
       [-1.04527511, -0.05313304]])},
 'layer_2': {'activation': 'relu',
             'b': array([0., 0., 0., 0., 0.]),
             'w': array([[-1.45764665, -1.06854323,  0.62869694,  0.47899202,  0.66973295],
       [-1.13236351, -0.26974942,  0.89164398, -0.2906901 ,  0.68116742],
       [-0.39036184,  0.44679897,  0.28661447, -1.21811627,  1.04665421],
       [ 0.76816251, -0.64380966, -0.20099761,  0.20128848,  0.56124158],
       [-0.25497632,  1.16944933, -0.98148751, -0.47203194,  2.00213812]])},
 'layer_3': {'activation': 'softmax',
             'b': array([0., 0., 0., 0., 0., 0., 0.]),
             'w': array([[-1.84942065, -0.41903513,  1.58773187, -0.0658918 ,  0.29928075],
       [-1.46360093, -0.44708949, -0.5382321 ,  0.66107264, -0.4

In [5]:
def sigmoid_activation(Z):
    activation = 1/(1 + np.exp(-1*Z))
    return activation

In [None]:
def softmax_activation(Z):
    activation = np.exp(Z) / np.sum(np.exp(Z))
    return activation

In [6]:
def relu_activation(Z):
    activation = np.max(0, Z)
    return activation

In [7]:
def dZ_sigmoid(dA, Z):
    sigmoid = sigmoid_activation(Z)
    dZ = dA * sigmoid * (1 - sigmoid)
    return dZ

In [41]:
def dZ_softmax(Z):
    softmax = softmax_activation(Z)
    softmax_matrix = np.tile(softmax)
    dZ = np.diag(softmax) - (softmax_matrix*np.transpose(softmax_matrix))

In [8]:
def dZ_relu(dA, Z):
    dZ = np.copy(dA)
    dZ[Z <= 0] = 0
    return dZ

In [9]:
act_map = {
    'sigmoid': sigmoid_activation,
    'relu': relu_activation,
    'softmax': softmax_activation
}

In [10]:
dZ_map = {
    'sigmoid': dZ_sigmoid,
    'relu': dZ_relu,
    'softmax': dZ_softmax
}

In [11]:
def single_forward_pass(A_previous, W, b, activation):
    try:
        act_function = act_map[activation]
    except KeyError:
        print(f'The activation {activation} is not recognized.\nIt must be one of the following: {list(act_map.keys())}')
        return None
    
    Z = np.dot(W, A_previous) + b
    A = act_function(Z)
    
    return A, Z

In [40]:
def full_forward_pass(X, network):
    
    cache = {}
    A = X
    
    for layer in range(1, len(network) + 1):
        
        A_previous = A
        A, Z = single_forward_pass(A_previous, network[layer]['W'], network[layer]['b'], network[layer]['activation'])
        
        cache[f'A_{layer-1}'] = A_previous
        cache[f'Z_{layer}'] = Z
        
    return A, cache

In [64]:
for prev, current in reversed(list(enumerate(network))):
    print(prev, current)

2 layer_3
1 layer_2
0 layer_1


In [65]:
for layer in reversed(range(1, len(network) + 1)):
    print(layer)

3
2
1


In [12]:
def compute_cross_entropy_cost(y_pred, y):
    
    cost = np.sum(-1*(y * np.log(y_pred)))
    
    return cost

In [43]:
def single_backward_pass(dA, W, b, Z, A_previous, activation):
    
    try:
        backprop_activation = dZ_map[activation]
    except KeyError:
        print(f'The backprop activation {activation} is not recognized.\nIt must be one of the following: {list(dZ_map.keys())}')
        return None
    
    m = A_previous.shape[1]
    
    if activations == 'softmax':
        dZ = backprop_activation(Z)
    else:
        dZ = backprop_activation(dA, Z)
    
    dW = np.dot(dZ, np.transpose(A_previous)) / m
    db = np.sum(dZ, axis=1, keepdims=True) / m
    dA_previous = np.dot(np.transpose(dW), dZ)
    
    return dA_previous, dW, db

In [66]:
def full_backward_pass(y_pred, y, cache, network):
    
    stored_grads = {}
    m = y.shape[1]
    
    dA_previous = A - y
    
    for layer in reversed(range(1, len(network) + 1)):
        activation = network[layer]['activation']
        layer_previous = layer - 1
        
        dA = dA_previous
        
        A_previous = cache[f'A_{layer_previous}']
        Z = cache[f'Z_{layer}']
        W = network[layer]['W']
        b = network[layer]['b']
        
        dA_previous, dW, db = single_backward_pass(dA, W, b, Z, A_previous, activation)
        stored_grads[f'dW_{layer}'] = dW
        stored_grads[f'db_{layer}'] = db
        
    return stored_grads

In [15]:
def train_nn():
    return None

### Resources
This notebook has been inspired by the Towards Data Science post [Let’s code a Neural Network in plain NumPy](https://towardsdatascience.com/lets-code-a-neural-network-in-plain-numpy-ae7e74410795).

* [The Softmax Function Derivative (Part 1)](https://aimatters.wordpress.com/2019/06/17/the-softmax-function-derivative/).