In [77]:
import numpy as np
import matplotlib.pyplot as plt
from Pad import PadZero

In [78]:
def conv_backward(dZ, cache):
    """
    Implement the backward propagation for a convolution function
    
    Arguments:
    dZ -- gradient of the cost with respect to the output of the conv layer (Z), numpy array of shape (m, n_H, n_W, n_C)
    cache -- cache of values needed for the conv_backward(), output of conv_forward()
    
    Returns:
    dA_prev -- gradient of the cost with respect to the input of the conv layer (A_prev),
               numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    dW -- gradient of the cost with respect to the weights of the conv layer (W)
          numpy array of shape (f, f, n_C_prev, n_C)
    db -- gradient of the cost with respect to the biases of the conv layer (b)
          numpy array of shape (1, 1, 1, n_C)
    """
    
    ### START CODE HERE ###
    # Retrieve information from "cache"
    (A_prev, W, b, hparameters) = cache
    
    # Retrieve dimensions from A_prev's shape
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    
    # Retrieve dimensions from W's shape
    (f, f, n_C_prev, n_C) = W.shape
    
    # Retrieve information from "hparameters"
    stride = hparameters["stride"]
    pad = hparameters["pad"]
    
    # Retrieve dimensions from dZ's shape
    (m, n_H, n_W, n_C) = dZ.shape
    
    # Initialize dA_prev, dW, db with the correct shapes
    dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))                           
    dW = np.zeros((f, f, n_C_prev, n_C))
    db = np.zeros((1, 1, 1, n_C))

    # Pad A_prev and dA_prev
    A_prev_pad = zero_pad(A_prev, pad)
    dA_prev_pad = zero_pad(dA_prev, pad)
    
    for i in range(m):                       # loop over the training examples
        
        # select ith training example from A_prev_pad and dA_prev_pad
        a_prev_pad = A_prev_pad[i]
        da_prev_pad = dA_prev_pad[i]
        
        for h in range(n_H):                   # loop over vertical axis of the output volume
            for w in range(n_W):               # loop over horizontal axis of the output volume
                for c in range(n_C):           # loop over the channels of the output volume
                    
                    # Find the corners of the current "slice"
                    vert_start = h * stride

                    vert_end = vert_start + f
                    horiz_start = w * stride

                    horiz_end = horiz_start + f
                    
                    # Use the corners to define the slice from a_prev_pad
                    a_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]

                    # Update gradients for the window and the filter's parameters using the code formulas given above
                    da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[:,:,:,c] += a_slice * dZ[i, h, w, c]
                    db[:,:,:,c] += dZ[i, h, w, c]
                    
        # Set the ith training example's dA_prev to the unpaded da_prev_pad (Hint: use X[pad:-pad, pad:-pad, :])
        dA_prev[i, :, :, :] = da_prev_pad[pad:-pad, pad:-pad, :]
    ### END CODE HERE ###
    
    # Making sure your output shape is correct
    assert(dA_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))
    
    return dA_prev, dW, db

In [79]:
def BackPropagationConv(cache, dZ):
    """
    Implement the backward propagation for a convolution function
    
    Arguments:
    dZ -- gradient of the cost with respect to the output of the conv layer (Z), numpy array of shape (m, nH, nW, nC)
    cache -- cache of values needed for the BackPropagationConv(), output of BackPropagationConv()
    
    Returns:
    dA_prev -- gradient of the cost with respect to the input of the conv layer (Aprev),
               numpy array of shape (m, nH_prev, nWprev, nCprev)
    dW -- gradient of the cost with respect to the weights of the conv layer (W)
          numpy array of shape (f, f, nCprev, nC)
    db -- gradient of the cost with respect to the biases of the conv layer (b)
          numpy array of shape (1, 1, 1, nC)
    """
    
    ### START CODE HERE ###
    # Retrieve information from "cache"
    (Aprev,W,b,hyperparameters) = cache
    
    # Retrieve dimensions from Aprev's shape
    (m,nHprev,nWprev,nCprev) = Aprev.shape
    
    # Retrieve dimensions from W's shape
    (f,f,nCprev,nC) = W.shape
    
    # Retrieve information from "hyperparameters"
    stride = hyperparameters["stride"]
    pad = hyperparameters["padding"]
    
    # Retrieve dimensions from dZ's shape
    (m, nH, nW, nC) = dZ.shape
    
    # Initialize dAprev, dW, db with the correct shapes
    dAprev = np.zeros(Aprev.shape)
    dW = np.zeros(W.shape)
    dB = np.zeros((1,1,1,nC))
    
    # Pad Aprev and dAprev
    AprevPad = PadZero(Aprev, pad)
    print("AprevPad{}: \n{}".format(AprevPad.shape, AprevPad[0,:,:,0]))

    dAprevPad = PadZero(dAprev, pad)
    print("dAprevPad{}: \n{}".format(dAprevPad.shape, dAprevPad[0,:,:,0]))

    
    for m1 in range(m):                                                 # loop over the training examples
        for i,h1 in enumerate(range(0,nH-f+1,stride)):                  # loop over vertical axis of the output volume
            for j,w1 in enumerate(range(0,nW-f+1,stride)):              # loop over horizontal axis of the output volume
                for c1 in range(nC):                                    # loop over the channels of the output volume
                    
                    '''print("\n\nm1: {}\ni:{} h1:{}\nj:{} w1:{}\nc1:{}".format(m1,i,h1,j,w1,c1))
                    print("W1: \n",dW[...,c1])
                    print("a: \n", dAprevPad[m1,h1:h1+f,w1:w1+f,c1])
                    print("delta: \n",dZ[m1,i,j,c1])'''
                    
                    #dAprevPad[m1,h1:h1+f,w1:w1+f,:] += W[...,c1] * dZ[m1, h1:h1+f, w1:w1+f, c1]
                    dAprevPad[m1,h1:h1+f,w1:w1+f,:] += W[...,c1] * dZ[m1, i, j, c1]
                    
                    #da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[...,c1] += AprevPad[m1,h1:h1+f,w1:w1+f,:] * dZ[m1,i,j,c1]
                    dB[...,c1] += dZ[m1,i,j,c1]
                    #print("W: \n",dW[...,c1])
    
    print("2dAprevPad{}: \n{}".format(dAprevPad.shape, dAprevPad[0,:,:,0]))
    dAprev = dAprevPad[:,pad:-pad,pad:-pad,:]
    print("dAprev{}: \n{}".format(dAprev.shape, dAprev[0,:,:,0]))

    
    # Making sure your output shape is correct
    assert (dAprev.shape == (m, nHprev,nWprev,nCprev))       
                    
    return dAprev, dW, dB

In [88]:
m = 1
nH = 4
nW = 4
nC = 1
nCprev = 1

sizeFilter = 3

stride = 1
pad = 1

y = np.random.randint(0,2,(2,m)).reshape(2,m)
print("y{}: \n{}".format(y.shape,y))
x = np.random.randint(0,16,(m,nH,nW,nC))
#x = np.array([1,2,0,2,2,1,0,0,2,0,0,0,2,2,0,0]).reshape(m,nH,nW,nC)
print("x{}: \n{}".format(x.shape,x[0,:,:,0]))


#w = np.ones((sizeFilter,sizeFilter,nCprev,nC), dtype = np.int32)
w = np.random.randint(0,2,(sizeFilter,sizeFilter,nCprev,nC))

#b = np.array([0]).reshape(1,1,1,nC)
b = np.random.randint(0,2,(1,1,1,nC))
print("b{}: \n{}".format(b.shape, b))

hyperparameters = {"stride": stride, "padding":pad}

caches = [(x,w,b,hyperparameters)]

print("caches[0]{}: \n{}".format(len(caches[0]), caches[0]))

print("\nfilter/w{}: \n{}".format(w.shape,w[:,:,0]))


y(2, 1): 
[[1]
 [1]]
x(1, 4, 4, 1): 
[[15  3  5  7]
 [ 0  1 13  2]
 [12 15  1  3]
 [11  7 15 12]]
b(1, 1, 1, 1): 
[[[[0]]]]
caches[0]4: 
(array([[[[15],
         [ 3],
         [ 5],
         [ 7]],

        [[ 0],
         [ 1],
         [13],
         [ 2]],

        [[12],
         [15],
         [ 1],
         [ 3]],

        [[11],
         [ 7],
         [15],
         [12]]]]), array([[[[0]],

        [[1]],

        [[1]]],


       [[[1]],

        [[1]],

        [[0]]],


       [[[0]],

        [[1]],

        [[1]]]]), array([[[[0]]]]), {'stride': 1, 'padding': 1})

filter/w(3, 3, 1, 1): 
[[[0]
  [1]
  [1]]

 [[1]
  [1]
  [0]]

 [[0]
  [1]
  [1]]]


In [81]:
delta = np.array([1,0,1,2,2,0,2,1,1,0,1,1,2,2,0,1]).reshape(1,4,4,1)
#delta = np.random.randint(0,3,(m,nH,nW,nC))
print("delta{}: \n{}".format(delta.shape,delta[0,:,:,0]))

delta(1, 4, 4, 1): 
[[1 0 1 2]
 [2 0 2 1]
 [1 0 1 1]
 [2 2 0 1]]


In [82]:
dA,dW,dB = BackPropagationConv(caches[0], delta)

AprevPad(1, 6, 6, 1): 
[[ 0  0  0  0  0  0]
 [ 0  7  5  6  3  0]
 [ 0 10 12 11 13  0]
 [ 0 10  7  5  7  0]
 [ 0  3 15  4  6  0]
 [ 0  0  0  0  0  0]]
dAprevPad(1, 6, 6, 1): 
[[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]
2dAprevPad(1, 6, 6, 1): 
[[1. 1. 0. 0. 0. 0.]
 [3. 3. 1. 0. 0. 0.]
 [3. 3. 2. 0. 0. 0.]
 [2. 2. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]
dAprev(1, 4, 4, 1): 
[[3. 1. 0. 0.]
 [3. 2. 0. 0.]
 [2. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [83]:
print("dA{}: \n{}".format(dA.shape, dA[0,:,:,0]))

dA(1, 4, 4, 1): 
[[3. 1. 0. 0.]
 [3. 2. 0. 0.]
 [2. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [84]:
dA,dW,dB = conv_backward(caches[0], delta)

ValueError: not enough values to unpack (expected 4, got 1)