In [2]:
import numpy as np
import h5py
import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)

In [68]:
def zero_pad(X, pad):
    X_pad = np.pad(X, ((0,0), (pad, pad), (pad, pad), (0,0)), 'constant', constant_values = 0)
    return X_pad

In [69]:
x = np.random.randn(4, 3, 3, 2)
x_pad = zero_pad(x, 2)
print ("x.shape =", x.shape)
print ("x_pad.shape =", x_pad.shape)

print(x[1,1])
print()
print(x_pad[1,1])

x.shape = (4, 3, 3, 2)
x_pad.shape = (4, 7, 7, 2)
[[ 0.90085595 -0.68372786]
 [-0.12289023 -0.93576943]
 [-0.26788808  0.53035547]]

[[0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]


In [70]:
def conv_single_step(a_slice_prev, W, b):
    temp_ans = np.multiply(a_slice_prev, W) + b
    '''
    Basically take dot product of filter with extract of image and add bias and bam, you've got yourself an element
    Add all of them
    '''
    return np.sum(temp_ans)

In [71]:
np.random.seed(1)
a_slice_prev = np.random.randn(4, 4, 3)
#Basically think of mini-slice with many slides or facets to contribute to volume (referred to as Nc)
W = np.random.randn(4, 4, 3)
b = np.random.randn(1, 1, 1)

Z = conv_single_step(a_slice_prev, W, b)
print("Z =", Z)

Z = -23.16021220252078


In [72]:
def conv_forward(A_prev, W, b, hparameters):
    (m, nh_prev, nw_prev, nc_prev) = A_prev.shape
    (f, f, nc_prev, nc_new) = W.shape
    stride = hparameters['stride']
    pad = hparameters['pad']
    
    nh_new = int((nh_prev - f + 2 * pad) / stride) + 1
    nw_new = int((nw_prev - f + 2 * pad) / stride) + 1
    
    Z = np.zeros((m, nh_new, nw_new, nc_new))
    
    A_prev_padded = zero_pad(A_prev, pad)
    
    for i in range(m):
        a_prev_temp = A_prev_padded[i]
        for j in range(nh_new):
            for k in range(nw_new):
                for l in range(nc_new):
                    top = j * stride
                    bottom = top + f
                    left = k * stride
                    right = left + f
                    
                    a_slice = a_prev_temp[top:bottom, left:right, :]
                    
                    Z[i, j, k, l] = conv_single_step(a_slice, W[...,l], b[...,l])
    assert(Z.shape == (m, nh_new, nw_new, nc_new))
    
    cache = (A_prev, W, b, hparameters)
    return Z, cache

In [73]:
np.random.seed(1)
A_prev = np.random.randn(10, 4, 4, 3)
W = np.random.randn(2, 2, 3, 8)
b = np.random.randn(1, 1, 1, 8)
hparameters = {"pad" : 2,
               "stride": 1}

Z, cache_conv = conv_forward(A_prev, W, b, hparameters)
print("Z's mean =", np.mean(Z))
print("cache_conv[0][1][2][3] =", cache_conv[0][1][2][3])

Z's mean = 0.15585932488906465
cache_conv[0][1][2][3] = [-0.20075807  0.18656139  0.41005165]


In [74]:
def pool_forward(A_prev, hparameters, mode = "max"):
    A_new = []
    (m, nh_old, nw_old, nc) = A_prev.shape
    stride = hparameters["stride"]
    f = hparameters["f"]
    nh_new = int((nh_old - f) / stride) + 1
    nw_new = int((nw_old - f) / stride) + 1
    for i in range(m):
        a_temp = A_prev[i]
        a_new_temp = []
        for j in range(nh_new):
            a_mat = []
            for k in range(nw_new):
                a_row = []
                for l in range(nc):
                    top = j * stride
                    bottom = top + f
                    left = k * stride
                    right = left + f
                    a_slice = a_temp[top:bottom, left:right, l]    
                    if(mode == "max"):
                        num = np.max(a_slice)
                    else:
                        num = np.mean(a_slice)
                    a_row.append(num)
                a_mat.append(a_row)
            a_new_temp.append(a_mat)
        A_new.append(a_new_temp)
        
    cache = (A_prev, hparameters)
    A_new = np.array(A_new)
    assert(A_new.shape == (m, nh_new, nw_new, nc))    
    return A_new, cache

In [75]:
np.random.seed(1)
A_prev = np.random.randn(2, 4, 4, 3)
hparameters = {"stride" : 1, "f": 4}

A, cache = pool_forward(A_prev, hparameters)
print("mode = max")
print("A =", A)
print()
A, cache = pool_forward(A_prev, hparameters, mode = "average")
print("mode = average")
print("A =", A)

mode = max
A = [[[[1.74481176 1.6924546  2.10025514]]]


 [[[1.19891788 1.51981682 2.18557541]]]]

mode = average
A = [[[[-0.09498456  0.11180064 -0.14263511]]]


 [[[-0.09525108  0.28325018  0.33035185]]]]


In [76]:
def conv_backward(dZ, cache):
    (A_prev, W, b, hparameters) = cache
    (m, nh_old, nw_old, nc_old) = A_prev.shape
    (f, f, nc_old, nc) = W.shape
    (m, nh_new, nw_new, nc) = dZ.shape
    print(dZ.shape)
    stride = hparameters["stride"]
    pad = hparameters["pad"]
    dA_prev = np.zeros(A_prev.shape)
    dW = np.zeros(W.shape)
    db = np.zeros(b.shape)
    print(db.shape)
    dA_padded = zero_pad(dA_prev, pad)
    A_padded = zero_pad(A_prev, pad)
    
    for i in range(m):
        for j in range(nh_new):
            for k in range(nw_new):
                top = j * stride
                bottom = top + f
                left = k * stride
                right = left + f
                for l in range(nc):
                    for l1 in range(nc_old):
                        #print(dA_padded[i, top:bottom, left:right, l1].shape)
                        #print(dZ[i,j,k,l].shape)
                        #print(W[:, :, l1, l].shape)
                        dA_padded[i, top:bottom, left:right, l1] += dZ[i,j,k,l] * W[:, :, l1, l]
                    dW[:, :, :, l] += dZ[i,j,k,l] * A_padded[i, top:bottom, left:right, :]
                    db[:, :, :, l] += dZ[i,j,k,l]
    dA_prev = dA_padded[:, pad:-pad, pad:-pad, :]
    assert(dA_prev.shape == (m, nh_old, nw_old, nc_old))
    return dA_prev, dW, db

In [77]:
np.random.seed(1)
dA, dW, db = conv_backward(Z, cache_conv)
print("dA_mean =", np.mean(dA))
print("dW_mean =", np.mean(dW))
print("db_mean =", np.mean(db))
# print(dA.shape)

(10, 7, 7, 8)
(1, 1, 1, 8)
dA_mean = 9.608990675868995
dW_mean = 10.581741275547566
db_mean = 76.37106919563735


In [78]:
def create_mask_from_window(x):
    mask = x == np.max(x)
    return mask

In [79]:
np.random.seed(1)
x = np.random.randn(2,3)
mask = create_mask_from_window(x)
print('x = ', x)
print("mask = ", mask)

x =  [[ 1.62434536 -0.61175641 -0.52817175]
 [-1.07296862  0.86540763 -2.3015387 ]]
mask =  [[ True False False]
 [False False False]]


In [80]:
def distribute_value(dz, shape):
    (nh, nw) = shape
    
    average = dz / (nh * nw)
    a = np.ones(shape) * average
    return a

In [81]:
a = distribute_value(2, (2,2))
print('distributed value =', a)

distributed value = [[0.5 0.5]
 [0.5 0.5]]


In [82]:
def pool_backward(dA, cache, mode = "max"):
    (A_prev, hparameters) = cache
    stride = hparameters["stride"]
    f = hparameters["f"]
    
    (m, nh_old, nw_old, nc) = A_prev.shape
    (m, nh_new, nw_new, nc) = dA.shape
    
    dA_prev = np.zeros(A_prev.shape)
    for i in range(m):
        for j in range(nh_new):
            for k in range(nw_new):
                top = j * stride
                bottom = top + f
                left = k * stride
                right = left + f
                for l in range(nc):
                    temp_mat = dA_prev[i, top:bottom, left:right, l]
                    if(mode == "max"):
                        temp_mat = create_mask_from_window(temp_mat)
                        dA_prev[i, top:bottom, left:right, l] += np.multiply(temp_mat, dA[i, j, k, l])
                    else:
                        shape = (f, f)
                        dA_prev[i, top:bottom, left:right, l] += distribute_value(dA[i, j, k, l], shape)
    return dA_prev

In [83]:
np.random.seed(1)
A_prev = np.random.randn(5, 5, 3, 2)
hparameters = {"stride" : 1, "f": 2}
A, cache = pool_forward(A_prev, hparameters)
dA = np.random.randn(5, 4, 2, 2)

dA_prev = pool_backward(dA, cache, mode = "max")
print("mode = max")
print('mean of dA = ', np.mean(dA))
print('dA_prev[1,1] = ', dA_prev[1,1])  
print()
dA_prev = pool_backward(dA, cache, mode = "average")
print("mode = average")
print('mean of dA = ', np.mean(dA))
print('dA_prev[1,1] = ', dA_prev[1,1])

mode = max
mean of dA =  0.14571390272918056
dA_prev[1,1] =  [[ 0.98633519  1.11502079]
 [ 5.05844394 -1.68282702]
 [ 0.         -0.24863478]]

mode = average
mean of dA =  0.14571390272918056
dA_prev[1,1] =  [[ 0.08485462  0.2787552 ]
 [ 1.26461098 -0.25749373]
 [ 1.17975636 -0.53624893]]
