In [409]:
import numpy as np
import matplotlib.pyplot as plt
from Pad import PadZero

In [410]:
def conv_backward(dZ, cache):
    """
    Implement the backward propagation for a convolution function
    
    Arguments:
    dZ -- gradient of the cost with respect to the output of the conv layer (Z), numpy array of shape (m, n_H, n_W, n_C)
    cache -- cache of values needed for the conv_backward(), output of conv_forward()
    
    Returns:
    dA_prev -- gradient of the cost with respect to the input of the conv layer (A_prev),
               numpy array of shape (m, n_H_prev, n_W_prev, n_C_prev)
    dW -- gradient of the cost with respect to the weights of the conv layer (W)
          numpy array of shape (f, f, n_C_prev, n_C)
    db -- gradient of the cost with respect to the biases of the conv layer (b)
          numpy array of shape (1, 1, 1, n_C)
    """
    
    ### START CODE HERE ###
    # Retrieve information from "cache"
    (A_prev, W, b, hparameters) = cache
    
    # Retrieve dimensions from A_prev's shape
    (m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
    
    # Retrieve dimensions from W's shape
    (f, f, n_C_prev, n_C) = W.shape
    
    # Retrieve information from "hparameters"
    stride = hparameters["stride"]
    pad = hparameters["padding"]
    
    # Retrieve dimensions from dZ's shape
    (m, n_H, n_W, n_C) = dZ.shape
    
    # Initialize dA_prev, dW, db with the correct shapes
    dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))                           
    dW = np.zeros((f, f, n_C_prev, n_C))
    db = np.zeros((1, 1, 1, n_C))

    # Pad A_prev and dA_prev
    A_prev_pad = PadZero(A_prev, pad)
    print("A_prev_pad{}: \n{}".format(A_prev_pad.shape, A_prev_pad[0,:,:,0]))

    dA_prev_pad = PadZero(dA_prev, pad)
    print("dA_prev_pad{}: \n{}".format(dA_prev_pad.shape, dA_prev_pad[0,:,:,0]))

    
    for i in range(m):                       # loop over the training examples
        
        # select ith training example from A_prev_pad and dA_prev_pad
        a_prev_pad = A_prev_pad[i]
        da_prev_pad = dA_prev_pad[i]
        
        for h in range(n_H):                   # loop over vertical axis of the output volume
            for w in range(n_W):               # loop over horizontal axis of the output volume
                for c in range(n_C):           # loop over the channels of the output volume
                    
                    # Find the corners of the current "slice"
                    vert_start = h * stride

                    vert_end = vert_start + f
                    horiz_start = w * stride

                    horiz_end = horiz_start + f
                    
                    # Use the corners to define the slice from a_prev_pad
                    a_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
                    #print("w{}: \n{}".format( W[:,:,0,0].shape, W[:,:,0,0]))
                    # Update gradients for the window and the filter's parameters using the code formulas given above
                    print("{}.{}.{}.{})dAprevPad{}: ".format(i,h,w,c,da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :].shape))
                    da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[:,:,:,c] += a_slice * dZ[i, h, w, c]
                    db[:,:,:,c] += dZ[i, h, w, c]
                    
        # Set the ith training example's dA_prev to the unpaded da_prev_pad (Hint: use X[pad:-pad, pad:-pad, :])
        print("1da_prev_pad{}: \n{}".format(da_prev_pad.shape, da_prev_pad[...,0]))
        dA_prev[i, :, :, :] = da_prev_pad[pad:-pad, pad:-pad, :]
        print("dA_prev{}: \n{}".format(dA_prev.shape, dA_prev[i, :, :, 0]))

    ### END CODE HERE ###
    

    # Making sure your output shape is correct
    assert(dA_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))
    
    return dA_prev, dW, db

In [411]:
def BackPropagationConv(cache, dZ):
    """
    Implement the backward propagation for a convolution function
    
    Arguments:
    dZ -- gradient of the cost with respect to the output of the conv layer (Z), numpy array of shape (m, nH, nW, nC)
    cache -- cache of values needed for the BackPropagationConv(), output of BackPropagationConv()
    
    Returns:
    dA_prev -- gradient of the cost with respect to the input of the conv layer (Aprev),
               numpy array of shape (m, nH_prev, nWprev, nCprev)
    dW -- gradient of the cost with respect to the weights of the conv layer (W)
          numpy array of shape (f, f, nCprev, nC)
    db -- gradient of the cost with respect to the biases of the conv layer (b)
          numpy array of shape (1, 1, 1, nC)
    """
    
    ### START CODE HERE ###
    # Retrieve information from "cache"
    (Aprev,W,b,hyperparameters) = cache
    
    # Retrieve dimensions from Aprev's shape
    (m,nHprev,nWprev,nCprev) = Aprev.shape
    print("ncPrev2: ", nCprev)

    
    # Retrieve dimensions from W's shape
    (f,f,nCprev,nC) = W.shape
    print("ncPrev1: ", nCprev)
    
    # Retrieve information from "hyperparameters"
    stride = hyperparameters["stride"]
    pad = hyperparameters["padding"]
    
    # Retrieve dimensions from dZ's shape
    (m, nH, nW, nC) = dZ.shape
    
    # Initialize dAprev, dW, db with the correct shapes
    dAprev = np.zeros((m,nHprev,nWprev,nCprev))
    dW = np.zeros((f,f,nCprev,nC))
    dB = np.zeros((1,1,1,nC))
    
    # Pad Aprev and dAprev
    AprevPad = PadZero(Aprev, pad)
    print("AprevPad{}: \n{}".format(AprevPad.shape, AprevPad[0,:,:,0]))

    dAprevPad = PadZero(dAprev, pad)
    print("dAprevPad{}: \n{}".format(dAprevPad.shape, dAprevPad[0,:,:,0]))

    (newM, newNH, newNW, newNC) = dAprevPad.shape
    
    for m1 in range(m):                                                 # loop over the training examples
        #daprevPad = dAprevPad[m1]
        for i,h1 in enumerate(range(0,newNH,stride)):                  # loop over vertical axis of the output volume
            for j,w1 in enumerate(range(0,newNW,stride)):              # loop over horizontal axis of the output volume
                for c1 in range(nC):                                    # loop over the channels of the output volume
                    
                    '''print("\n\nm1: {}\ni:{} h1:{}\nj:{} w1:{}\nc1:{}".format(m1,i,h1,j,w1,c1))
                    print("W1: \n",dW[...,c1])
                    print("a: \n", dAprevPad[m1,h1:h1+f,w1:w1+f,c1])
                    print("delta: \n",dZ[m1,i,j,c1])'''
                    #print("w{}: \n{}".format(W[:,:,0,0].shape,  W[:,:,0,0]))
                    
                    #dAprevPad[m1,h1:h1+f,w1:w1+f,:] += W[...,c1] * dZ[m1, h1:h1+f, w1:w1+f, c1]
                    print("{}.{}.{}.{})dAprevPad{}: ".format(m1,h1,w1,c1,dAprevPad[m1,h1:h1+f,w1:w1+f,:].shape))
                    dAprevPad[m1,h1:h1+f,w1:w1+f,:] += W[:,:,:,c1] * dZ[m1, i, j, c1]
                    #daprevPad[h1:h1+f,w1:w1+f,:] += W[:,:,:,c1] * dZ[m1,i,j,c1]
                    
                    #da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[...,c1] += AprevPad[m1,h1:h1+f,w1:w1+f,:] * dZ[m1,i,j,c1]
                    dB[...,c1] += dZ[m1,i,j,c1]
                    #print("W: \n",dW[...,c1])
    
    print("2dAprevPad{}: \n{}".format(dAprevPad.shape, dAprevPad[0,:,:,0]))
    dAprev = dAprevPad[:,pad:-pad,pad:-pad,:]
    print("dAprev{}: \n{}".format(dAprev.shape, dAprev[0,:,:,0]))

    
    # Making sure your output shape is correct
    assert (dAprev.shape == (m,nHprev,nWprev,nCprev))       
                    
    return dAprev, dW, dB

In [412]:
m = 3
nH = 10
nW = 10
nC = 3
nCprev = 3

sizeFilter = 4

stride = 1
pad = 1

y = np.random.randint(0,2,(2,m)).reshape(2,m)
print("y{}: \n{}".format(y.shape,y))
x = np.random.randint(0,16,(m,nH,nW,nC))
#x = np.array([1,2,0,2,2,1,0,0,2,0,0,0,2,2,0,0]).reshape(m,nH,nW,nC)
print("x{}: \n{}".format(x.shape,x[0,:,:,0]))


#w = np.ones((sizeFilter,sizeFilter,nCprev,nC), dtype = np.int32)
w = np.random.randint(0,2,(sizeFilter,sizeFilter,nCprev,nC))

#b = np.array([0]).reshape(1,1,1,nC)
b = np.random.randint(0,2,(1,1,1,nC))
print("b{}: \n{}".format(b.shape, b))

hyperparameters = {"stride": stride, "padding":pad}

caches = [(x,w,b,hyperparameters)]

#print("caches[0]{}: \n{}".format(len(caches[0]), caches[0]))

print("\nfilter/w{}: \n{}".format(w.shape,w[:,:,0,0]))


y(2, 3): 
[[1 0 0]
 [1 0 1]]
x(3, 10, 10, 3): 
[[ 7 15 11  5 14 15  2 11  8 13]
 [11  5 11  9 13  0 13 11  0 11]
 [ 4  4  7 15  1 13  6 11  5 12]
 [ 3 14  9 14 15  7  8  7  6  9]
 [12  3  7 15  0  9 10  6  8  8]
 [ 7  8  5 15  0  1  6  7  2 15]
 [ 2  5  5  4  7 11  8 13  5  3]
 [11  6  2  4  2  1  5 12  4  7]
 [ 6 10  4  3 14  2 15  0 14  3]
 [ 8 12 11 13 11 15  7 12  4  5]]
b(1, 1, 1, 3): 
[[[[0 1 1]]]]

filter/w(4, 4, 3, 3): 
[[0 1 1 1]
 [0 1 1 0]
 [1 0 0 1]
 [0 1 0 1]]


In [413]:
#delta = np.array([1,0,1,2,2,0,2,1,1,0,1,1,2,2,0,1]).reshape(1,4,4,1)
delta = np.random.randint(0,3,(m,nH,nW,nC))
print("delta{}: \n{}".format(delta.shape,delta[0,:,:,0]))

delta(3, 10, 10, 3): 
[[1 0 2 0 0 1 0 1 1 2]
 [1 1 0 1 2 0 2 0 2 1]
 [1 0 2 1 2 1 2 1 1 1]
 [2 0 0 2 1 0 0 2 1 2]
 [1 1 2 2 1 0 1 0 1 2]
 [0 0 0 1 2 1 1 0 1 2]
 [2 0 1 0 1 2 1 1 1 0]
 [0 2 2 0 1 2 2 0 0 2]
 [0 0 2 0 1 0 0 0 2 2]
 [2 1 2 2 0 0 2 0 0 0]]


In [414]:
dA,dW,dB = BackPropagationConv(caches[0], delta)

ncPrev2:  3
ncPrev1:  3
AprevPad(3, 12, 12, 3): 
[[ 0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  7 15 11  5 14 15  2 11  8 13  0]
 [ 0 11  5 11  9 13  0 13 11  0 11  0]
 [ 0  4  4  7 15  1 13  6 11  5 12  0]
 [ 0  3 14  9 14 15  7  8  7  6  9  0]
 [ 0 12  3  7 15  0  9 10  6  8  8  0]
 [ 0  7  8  5 15  0  1  6  7  2 15  0]
 [ 0  2  5  5  4  7 11  8 13  5  3  0]
 [ 0 11  6  2  4  2  1  5 12  4  7  0]
 [ 0  6 10  4  3 14  2 15  0 14  3  0]
 [ 0  8 12 11 13 11 15  7 12  4  5  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0]]
dAprevPad(3, 12, 12, 3): 
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0

ValueError: operands could not be broadcast together with shapes (4,3,3) (4,4,3) (4,3,3) 

In [None]:
dA2,dW2,dB2 = conv_backward(delta,caches[0])

In [None]:
print("dA{}: \n{}".format(dA.shape, dA[0,:,:,0]))

In [None]:
print("dA2{}: \n{}".format(dA2.shape, dA2[0,:,:,0]))

In [None]:
print("dW{}: \n{}".format(dW.shape, dW[0,:,:,0]))

In [None]:
print("dW2{}: \n{}".format(dW2.shape, dW2[0,:,:,0]))

In [None]:
print("dB{}: \n{}".format(dB.shape, dB))

In [None]:
print("dB2{}: \n{}".format(dB2.shape, dB2))