In [2]:
import numpy as np
import matplotlib.pyplot as plt


%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

In [11]:
c = [1,2 ,3, 4, 5, 6]
c.pop(0)

1

In [14]:
c[1:-1]

[3, 4, 5]

In [46]:
def initialize_parameters(layer_dims, seed=3):
    """
    Arguments:
    layer_dims -- python array (list) containing the dimensions of each layer in our network
    
    Returns:
    parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
                    Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
                    bl -- bias vector of shape (layer_dims[l], 1)
    """
    
    np.random.seed(seed)
    parameters = {}
    L = len(layer_dims)            # number of layers in the network

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l],1)) * 0.01
        
        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))

        
    return parameters

In [34]:
p = initialize_parameters(5, 3, layer_sizes=[4])

In [35]:
for k in p.keys():
    print(f"{k} shape: {p[k].shape}")

W1 shape: (4, 5)
b1 shape: (4, 1)
W4 shape: (3, 4)
b4 shape: (3, 1)


In [50]:
def sigmoid(Z):
    
    A = 1 / (1 + np.exp(-Z))
    
    ## return Z as the "cache"
    return A, Z

def relu(Z):
    
    A = np.maximum(0, Z)
    
    ## Return Z as the "cache"   
    return A, Z
    

In [77]:
def linear_forward(A, W, b):
    """
    Arguments:
    A -- activations from previous layer (or input data): (size of previous layer, number of examples)
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
    b -- bias vector, numpy array of shape (size of the current layer, 1)

    Returns:
    Z -- the input of the activation function, also called pre-activation parameter 
    cache -- a python tuple containing "A", "W" and "b" ; stored for computing the backward pass efficiently
    """
    
    print(W.shape)
    print(A.shape)
    print(b.shape)
    
    Z = np.dot(W, np.transpose(A)) + b
    
    cache = (A, W, b) ## for backward prop
    
    return Z, cache

In [78]:
def activation_forward(A_prev, W, b, activation):
    
    Z, linear_cache = linear_forward(A_prev, W, b)
    
    A, activation_cache = activation(Z)
    
    cache = (linear_cache, activation_cache)
    return A, cache

In [91]:
def L_model_forward(X, parameters, output_activation, activations: dict):
    
    caches = []
    A = X
    L = len(parameters) // 2 
    
    assert L == len(activations)
    
    for l in range(1, L):
        A_prev = A
        A, cache = activation_forward(A_prev, 
                                      parameters['W' + str(l)], 
                                      parameters['b' + str(l)], 
                                      activations["a" + str(l)])
        caches.append(cache)
        
    AL, cache = activation_forward(A, 
                                   parameters['W' + str(L)], 
                                   parameters['b' + str(L)], 
                                   output_activation)
    caches.append(cache)
    
    assert(AL.shape == (1, X.shape[0]))
    
    return AL, caches
    

In [129]:
def BinaryCrossEntropy(Y_pred, Y_true):
    m = Y_true.shape[0]
    return np.squeeze(- 1 / m * ( np.dot(np.log(Y_pred), Y_true) + np.dot(np.log(1-Y_pred), (1 - Y_true))))

In [131]:
def compute_cost(AL, Y, f):
    m = Y.shape[0]
    
    cost = f(AL, Y)
    
    assert cost.shape == ()
    return cost

In [None]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    
    dW = 1 / m * np.dot(dZ, A_prev.T)
    db = 1 / m * np.sum(dZ, axis=1, keepdims=True)

In [133]:
compute_cost(a, Y, BinaryCrossEntropy)

array(0.69546216)

In [92]:
X = np.random.randn(1000, 50)
Y = np.random.randn(1000, 1)

In [93]:
p = initialize_parameters(layer_dims = [X.shape[1], Y.shape[1]], seed=1)

In [94]:
activations = {'a1' : relu}

In [95]:
p

{'W1': array([[ 0.01624345, -0.00611756, -0.00528172, -0.01072969,  0.00865408,
         -0.02301539,  0.01744812, -0.00761207,  0.00319039, -0.0024937 ,
          0.01462108, -0.02060141, -0.00322417, -0.00384054,  0.01133769,
         -0.01099891, -0.00172428, -0.00877858,  0.00042214,  0.00582815,
         -0.01100619,  0.01144724,  0.00901591,  0.00502494,  0.00900856,
         -0.00683728, -0.0012289 , -0.00935769, -0.00267888,  0.00530355,
         -0.00691661, -0.00396754, -0.00687173, -0.00845206, -0.00671246,
         -0.00012665, -0.0111731 ,  0.00234416,  0.01659802,  0.00742044,
         -0.00191836, -0.00887629, -0.00747158,  0.01692455,  0.00050808,
         -0.00636996,  0.00190915,  0.02100255,  0.00120159,  0.00617203]]),
 'b1': array([[0.]])}

In [97]:
a, b=L_model_forward(X, p, sigmoid, activations)

(1, 50)
(1000, 50)
(1, 1)
(1, 1000)


In [114]:
a.shape

(1, 1000)

In [119]:
t  = np.log(a)

In [120]:
t.shape

(1, 1000)

In [122]:
np.dot(t, Y)

array([[3.81729458]])