In [16]:
import numpy as np

In [17]:
def CreateMaskForWindow(x):
    """
    Creates a mask from an input matrix x, to identify the max entry of x.
    
    Arguments:
    x -- Array of shape (f, f)
    
    Returns:
    mask -- Array of the same shape as window, contains a True at the position corresponding to the max entry of x.
    """
    
    ### START CODE HERE ### (≈1 line)
    mask = x == np.max(x)
    ### END CODE HERE ###
    
    return mask

In [18]:
np.random.seed(1)
x = np.random.randn(2,3)
mask = CreateMaskForWindow(x)
print("x{}: \n{}".format(x.shape,x))
print("mask{}: \n{}".format(mask.shape, mask))

x(2, 3): 
[[ 1.62434536 -0.61175641 -0.52817175]
 [-1.07296862  0.86540763 -2.3015387 ]]
mask(2, 3): 
[[ True False False]
 [False False False]]


In [19]:
def DistributeValue(dz, shape):
    """
    Distributes the input value in the matrix of dimension shape
    
    Arguments:
    dz -- input scalar
    shape -- the shape (n_H, n_W) of the output matrix for which we want to distribute the value of dz
    
    Returns:
    a -- Array of size (n_H, n_W) for which we distributed the value of dz
    """
    ### START CODE HERE ###
    # Retrieve dimensions from shape (≈1 line)
    (n_H, n_W) = shape
    
    # Compute the value to distribute on the matrix (≈1 line)
    average = dz / (n_H * n_W)
    
    # Create a matrix where every entry is the "average" value (≈1 line)
    a = np.ones(shape) * average
    ### END CODE HERE ###
    
    return a

In [20]:
dz = 2
prevShape = (2,2)
a = DistributeValue(dz,prevShape)
print("a{}: \n{}".format(a.shape,a))

a(2, 2): 
[[0.5 0.5]
 [0.5 0.5]]


In [21]:
# GRADED FUNCTION: pool_forward

def PoolForward(Aprev, hyperparameters, mode = "max"):
    """
    Implements the forward pass of the pooling layer
    
    Arguments:
    A_prev -- Input data, numpy array of shape (m, nH, nW, nC)
    hparameters -- python dictionary containing "f" and "stride"
    mode -- the pooling mode you would like to use, defined as a string ("max" or "average")
    
    Returns:
    A -- output of the pool layer, a numpy array of shape (m, nH, nW, nC)
    cache -- cache used in the backward pass of the pooling layer, contains the input and hparameters 
    """
    
    # Retrieve hyperparameters from "hparameters"
    stride = hyperparameters["stride"]
    f= hyperparameters['f']
    
    # Retrieve dimensions from the input shape
    (m, nH, nW, nC) = Aprev.shape#Input shape of the tensor
    
    # Define the dimensions of the output
    newNH = int((nH - f)/stride)+1#Output height of the tensor
    newNW = int((nH - f)/stride)+1#Output width of the tensor
    
    # Initialize output matrix A
    A = np.zeros((m,newNH, newNW, nC), dtype=np.int32)#Output tensor
    
    
    ### START CODE HERE ###
    for m1 in range(m):                                             # loop over the training examples
        for i,h1 in enumerate(range(0,nH,stride)):                  # loop on the vertical axis of the output volume
            for j,w1 in enumerate(range(0,nW,stride)):              # loop on the horizontal axis of the output volume
                for c1 in range(nC):                                # loop over the channels of the output volume
                    try:
                        if mode == "max":
                            A[m1,i,j,c1] = np.max(Aprev[m1,h1:h1+f,w1:w1+f,c1]) 
                        elif mode == "average":
                            A[m1,i,j,c1] = int(np.mean(Aprev[m1,h1:h1+f,w1:w1+f,c1]))
                    except:
                        break
    ### END CODE HERE ###
    
    # Store the input and hparameters in "cache" for BackPropagationPool()
    cache = (Aprev, hyperparameters, mode)# Parameters for back propagation
    
    return A, cache
    
    
    

In [32]:
def BackPropagationPool(dA, cache, mode = None):
    """
    Implements the backward pass of the pooling layer
    
    Arguments:
    dA -- gradient of cost with respect to the output of the pooling layer, same shape as A
    cache -- cache output from the forward pass of the pooling layer, contains the layer's input and hparameters 
    mode -- the pooling mode you would like to use, defined as a string ("max" or "average")
    
    Returns:
    dA_prev -- gradient of cost with respect to the input of the pooling layer, same shape as A_prev
    """
    ### START CODE HERE ###
    
    # Retrieve information from cache (≈1 line)
    (Aprev, hyperparameters, modeCache) = cache
    
    print("m: ",mode)
    mode = "max" if mode == None and modeCache == None else modeCache if mode == None else mode 
    
    # Retrieve hyperparameters from "hparameters" (≈2 lines)
    stride = hyperparameters["stride"]
    f = hyperparameters["f"]
    
    # Retrieve dimensions from A_prev's shape and dA's shape (≈2 lines)
    (m, nHprev, nWprev, nCprev) = Aprev.shape
    (m, nH, nW, nC) = dA.shape
    
    # Initialize dA_prev with zeros (≈1 line)
    dAprev = np.zeros(Aprev.shape)
    
    for m1 in range(m):                                               # loop over the training examples
        for i,h1 in enumerate(range(0,nHprev-f+1, stride)):           # loop on the vertical axis
            print("h1: ",h1)
            for j,w1 in enumerate(range(0,nWprev-f+1, stride)):       # loop on the horizontal axis
                print("w1: ",w1)
                for c1 in range(nC):                                  # loop over the channels (depth)
                    # Compute the backward propagation in both modes.
                    #print("1)dAprev{}: \n{}".format(dAprev[m1,h1:h1+f,w1:w1+f,c1].shape,dAprev[m1,h1:h1+f,w1:w1+f,c1]))

                    if mode == "max": 
                        dAprev[m1,h1:h1+f,w1:w1+f,c1] += CreateMaskForWindow(Aprev[m1,h1:h1+f,w1:w1+f,c1]) * dA[m1,i,j,c1]
                    elif mode == "average":
                        #print("dA{}: \n{}".format(dA[m1,i,j,c1].shape,dA[m1,i,j,c1]))
                        s = DistributeValue(dA[m1,i,j,c1], (f,f))
                        dAprev[m1,h1:h1+f,w1:w1+f,c1] += s
                    #print("2)dAprev{}: \n{}".format(dAprev[m1,h1:h1+f,w1:w1+f,c1].shape,dAprev[m1,h1:h1+f,w1:w1+f,c1]))

                  
    ### END CODE ###
    
    # Making sure your output shape is correct
    assert(dAprev.shape == Aprev.shape)
    
    return dAprev

In [33]:
np.random.seed(1)
Aprev = np.random.randn(5,5,3,2)
hyperparameters = {"stride": 2, "f":1}
A, cache = PoolForward(Aprev, hyperparameters)
dA = np.random.randn(5,4,2,2,)
#print("dA{}: \n{}".format(dA.shape,dA))
modes = ("max","average")

for mode in modes:
    dAprev = BackPropagationPool(dA, cache, mode)
    print("\nmode: ", mode)
    print("dAprev{}: \n{}".format(dAprev[0,:,:,0].shape,dAprev[0,:,:,0]))

m:  max
h1:  0
w1:  0
w1:  2
h1:  2
w1:  0
w1:  2
h1:  4
w1:  0
w1:  2
h1:  0
w1:  0
w1:  2
h1:  2
w1:  0
w1:  2
h1:  4
w1:  0
w1:  2
h1:  0
w1:  0
w1:  2
h1:  2
w1:  0
w1:  2
h1:  4
w1:  0
w1:  2
h1:  0
w1:  0
w1:  2
h1:  2
w1:  0
w1:  2
h1:  4
w1:  0
w1:  2
h1:  0
w1:  0
w1:  2
h1:  2
w1:  0
w1:  2
h1:  4
w1:  0
w1:  2

mode:  max
dAprev(5, 3): 
[[-0.31011677  0.          1.0388246 ]
 [ 0.          0.          0.        ]
 [ 0.44136444  0.         -0.13644474]
 [ 0.          0.          0.        ]
 [ 0.01740941  0.         -0.51709446]]
m:  average
h1:  0
w1:  0
w1:  2
h1:  2
w1:  0
w1:  2
h1:  4
w1:  0
w1:  2
h1:  0
w1:  0
w1:  2
h1:  2
w1:  0
w1:  2
h1:  4
w1:  0
w1:  2
h1:  0
w1:  0
w1:  2
h1:  2
w1:  0
w1:  2
h1:  4
w1:  0
w1:  2
h1:  0
w1:  0
w1:  2
h1:  2
w1:  0
w1:  2
h1:  4
w1:  0
w1:  2
h1:  0
w1:  0
w1:  2
h1:  2
w1:  0
w1:  2
h1:  4
w1:  0
w1:  2

mode:  average
dAprev(5, 3): 
[[-0.31011677  0.          1.0388246 ]
 [ 0.          0.          0.        ]
 [ 0.44136444  0. 