In [1]:
import numpy as np
import matplotlib.pyplot as plt
from Pad import PadZero

In [2]:
def ConvBackward(cache, dZ):
    """
    Implement the backward propagation for a convolution function
    
    Arguments:
    dZ -- gradient of the cost with respect to the output of the conv layer (Z), numpy array of shape (m, nH, nW, nC)
    cache -- cache of values needed for the BackPropagationConv(), output of BackPropagationConv()
    
    Returns:
    dA_prev -- gradient of the cost with respect to the input of the conv layer (Aprev),
               numpy array of shape (m, nH_prev, nWprev, nCprev)
    dW -- gradient of the cost with respect to the weights of the conv layer (W)
          numpy array of shape (f, f, nCprev, nC)
    db -- gradient of the cost with respect to the biases of the conv layer (b)
          numpy array of shape (1, 1, 1, nC)
    """
    
    ### START CODE HERE ###
    # Retrieve information from "cache"
    (Aprev,W,b,hyperparameters) = cache
    
    # Retrieve dimensions from Aprev's shape
    (m,nHprev,nWprev,nCprev) = Aprev.shape

    
    # Retrieve dimensions from W's shape
    (f,f,nCprev,nC) = W.shape
    
    # Retrieve information from "hyperparameters"
    stride = hyperparameters["stride"]
    pad = hyperparameters["padding"]
    
    # Retrieve dimensions from dZ's shape
    (m, nH, nW, nC) = dZ.shape
    
    # Initialize dAprev, dW, db with the correct shapes
    dAprev = np.zeros((m,nHprev,nWprev,nCprev))
    dW = np.zeros((f,f,nCprev,nC))
    dB = np.zeros((1,1,1,nC))
    
    # Pad Aprev and dAprev
    AprevPad = PadZero(Aprev, pad)

    dAprevPad = PadZero(dAprev, pad)
    
    for m1 in range(m):                                                 # loop over the training examples
        for i,h1 in enumerate(range(0,nH,stride)):                  # loop over vertical axis of the output volume
            for j,w1 in enumerate(range(0,nW,stride)):              # loop over horizontal axis of the output volume
                for c1 in range(nC):                                    # loop over the channels of the output volume

                    dAprevPad[m1,h1:h1+f,w1:w1+f,:] += W[:,:,:,c1] * dZ[m1, i, j, c1]
                    
                    dW[...,c1] += AprevPad[m1,h1:h1+f,w1:w1+f,:] * dZ[m1,i,j,c1]
                    dB[...,c1] += dZ[m1,i,j,c1]
    
    dAprev = dAprevPad[:,pad:-pad,pad:-pad,:]

    
    # Making sure your output shape is correct
    assert (dAprev.shape == (m,nHprev,nWprev,nCprev))       
                    
    return dAprev, dW, dB

In [3]:
m = 3
nH = 10
nW = 10
nC = 3
nCprev = 3

sizeFilter = 3

stride = 1
pad = 1

y = np.random.randint(0,2,(2,m)).reshape(2,m)
print("y{}: \n{}".format(y.shape,y))
x = np.random.randint(0,16,(m,nH,nW,nC))
#x = np.array([1,2,0,2,2,1,0,0,2,0,0,0,2,2,0,0]).reshape(m,nH,nW,nC)
print("x{}: \n{}".format(x.shape,x[0,:,:,0]))


#w = np.ones((sizeFilter,sizeFilter,nCprev,nC), dtype = np.int32)
w = np.random.randint(0,2,(sizeFilter,sizeFilter,nCprev,nC))

#b = np.array([0]).reshape(1,1,1,nC)
b = np.random.randint(0,2,(1,1,1,nC))
print("b{}: \n{}".format(b.shape, b))

hyperparameters = {"stride": stride, "padding":pad}

caches = [(x,w,b,hyperparameters)]

#print("caches[0]{}: \n{}".format(len(caches[0]), caches[0]))

print("\nfilter/w{}: \n{}".format(w.shape,w[:,:,0,0]))


y(2, 3): 
[[0 0 0]
 [0 0 1]]
x(3, 10, 10, 3): 
[[ 1 13  1  8 12  4  2  4  2 12]
 [ 1  1  0 13  8  2  3  0  3  4]
 [15  5  8  0  1 14  1  7  6 11]
 [ 5 13 10  7 15 12  2  6  4  1]
 [ 7 10 11  1  7  9  5 10  1  3]
 [ 5  3  9 13  6  1  7  5  3  1]
 [ 8 10 10  9  2 13  0  2 12  5]
 [ 2  4  4 13  1  6 10  5 13 15]
 [ 6  4  3 14  8 11  8  4  3  3]
 [15  2 13  1  7  9 14 15  5 14]]
b(1, 1, 1, 3): 
[[[[1 0 1]]]]

filter/w(3, 3, 3, 3): 
[[0 0 0]
 [1 1 1]
 [0 0 1]]


In [4]:
#delta = np.array([1,0,1,2,2,0,2,1,1,0,1,1,2,2,0,1]).reshape(1,4,4,1)
delta = np.random.randint(0,3,(m,nH,nW,nC))
print("delta{}: \n{}".format(delta.shape,delta[0,:,:,0]))

delta(3, 10, 10, 3): 
[[2 0 0 0 1 2 0 1 0 2]
 [1 2 1 1 2 2 0 1 0 0]
 [0 1 1 1 0 1 2 1 2 1]
 [0 0 0 2 0 2 2 0 1 2]
 [0 0 2 2 2 1 1 0 0 0]
 [0 2 0 0 0 1 0 2 1 1]
 [1 1 1 1 0 1 0 2 1 2]
 [2 0 1 0 0 1 0 2 2 2]
 [1 0 0 0 2 0 0 2 0 0]
 [2 1 1 1 2 2 2 1 2 1]]


In [5]:
dA,dW,dB = ConvBackward(caches[0], delta)