In [90]:
import numpy as np
import matplotlib.pyplot as plt
from Pad import PadZero
from FC import forwardPass, backPropagation
#from Conv import feedForwardConv, Empty

In [91]:
def Empty(x):
    return x

In [92]:
def CacheForConv(cache1,cache2):
    cache = [cache1[0][0].copy(),cache2[1].copy(),cache2[2],cache2[3].copy()]
    shape = cache2[0].shape
    cache[0] = cache[0].reshape(shape)
    return cache

In [93]:
# GRADED FUNCTION: conv_forward

def FeedForwardConv(prevA,W,b,hyperparameters):
    """
    Implements the forward propagation for a convolution function
    
    Arguments:
    prevA -- output activations of the previous layer, numpy array of shape (m, nHprev, nWprev, nCprev)
    W -- Weights, numpy array of shape (f, f, nCprev, nC)
    b -- Biases, numpy array of shape (1, 1, 1, nC)
    hparameters -- python dictionary containing "stride" and "padding"
        
    Returns:
    Z -- conv output, numpy array of shape (m, nH, nW, nC)
    cache -- cache of values needed for the BackPropagationConv() function
    """
    
    ### START CODE HERE ###
    # Retrieve dimensions from A_prev's shape (≈1 line) 
    (m, oldNH, oldNW, nC) = prevA.shape
    
    # Retrieve dimensions from W's shape (≈1 line)
    (f,f,nCprev,nC) = W.shape
    
    # Retrieve information from "hparameters" (≈2 lines)
    stride = hyperparameters["stride"]
    pad = hyperparameters["padding"]
    
    # Compute the dimensions of the CONV output volume using the formula given above. Hint: use int() to floor. (≈2 lines)
    newNH = int((oldNH - f + 2*pad)/stride) + 1
    newNW = int((oldNH - f + 2*pad)/stride) + 1
    
    # Initialize the output volume Z with zeros. (≈1 line)
    Z = np.zeros((m,newNH, newNW, nC))
    #A = np.zeros((m,newNH, newNW, nC))
    
    if (pad != 0):
        # Create A_prev_pad by padding A_prev
        prevA = PadZero(prevA, pad)
    
    for m1 in range(m):                                                 # loop over the batch of training examples
        for i,h1 in enumerate(range(0,newNH,stride)):                   # loop over vertical axis of the output volume# Select ith training example's padded activation
            for j,w1 in enumerate(range(0,newNW, stride)):              # loop over horizontal axis of the output volume
                for c1 in range(nC):                                     # loop over channels (= #filters) of the output volume
                    Z[m1,i,j,c1] = np.sum((prevA[m1,h1:h1+f,w1:w1+f,:] * W[...,c1]) + b[...,c1])#[...,с1] нужно, чтобы размерности совпали
                    #A[m1,i,j,c1] = activationFunction(Z[m1,i,j,c1])

    ### END CODE HERE ###
    
    # Making sure your output shape is correct
    assert ((m,newNH,newNW,nC) == Z.shape)
    
    # Save information in "cache" for the backprop
    cache = [Z, W, b, hyperparameters]
    
    return Z, cache

In [94]:
def BackPropagationConv(cache, dZ):
    """
    Implement the backward propagation for a convolution function
    
    Arguments:
    dZ -- gradient of the cost with respect to the output of the conv layer (Z), numpy array of shape (m, nH, nW, nC)
    cache -- cache of values needed for the BackPropagationConv(), output of BackPropagationConv()
    
    Returns:
    dA_prev -- gradient of the cost with respect to the input of the conv layer (Aprev),
               numpy array of shape (m, nH_prev, nWprev, nCprev)
    dW -- gradient of the cost with respect to the weights of the conv layer (W)
          numpy array of shape (f, f, nCprev, nC)
    db -- gradient of the cost with respect to the biases of the conv layer (b)
          numpy array of shape (1, 1, 1, nC)
    """
    
    ### START CODE HERE ###
    # Retrieve information from "cache"
    (Aprev,W,b,hyperparameters) = cache
    
    # Retrieve dimensions from Aprev's shape
    (m,nHprev,nWprev,nCprev) = Aprev.shape
    
    # Retrieve dimensions from W's shape
    (f,f,nCprev,nC) = W.shape
    
    # Retrieve information from "hyperparameters"
    stride = hyperparameters["stride"]
    pad = hyperparameters["padding"]
    
    # Retrieve dimensions from dZ's shape
    (m, nH, nW, nC) = dZ.shape
    
    # Initialize dAprev, dW, db with the correct shapes
    dAprev = np.zeros(Aprev.shape)
    dW = np.zeros(W.shape)
    dB = np.zeros((1,1,1,nC))
    
    # Pad Aprev and dAprev
    AprevPad = PadZero(Aprev, pad)
    dAprevPad = PadZero(dAprev, pad)
    
    for m1 in range(m):                                                 # loop over the training examples
        for i,h1 in enumerate(range(0,nH-f+1,stride)):                  # loop over vertical axis of the output volume
            for j,w1 in enumerate(range(0,nW-f+1,stride)):              # loop over horizontal axis of the output volume
                for c1 in range(nC):                                    # loop over the channels of the output volume
                    
                    print("\n\nm1: {}\ni:{} h1:{}\nj:{} w1:{}\nc1:{}".format(m1,i,h1,j,w1,c1))
                    print("W1: \n",dW[...,c1])
                    print("a: \n", dAprevPad[m1,h1:h1+f,w1:w1+f,c1])
                    print("delta: \n",dZ[m1,i,j,c1])
                    
                    #dAprevPad[m1,h1:h1+f,w1:w1+f,:] += W[...,c1] * dZ[m1, h1:h1+f, w1:w1+f, c1]
                    dAprevPad[m1,h1:h1+f,w1:w1+f,:] += W[...,c1] * dZ[m1, i, j, c1]
                    #da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[...,c1] += AprevPad[m1,h1:h1+f,w1:w1+f,:] * dZ[m1,i,j,c1]
                    dB[...,c1] += dZ[m1,i,j,c1]
                    print("W: \n",dW[...,c1])
    
    dAprev = dAprevPad[:,pad:-pad,pad:-pad,:]
    print("dAprev{}: \n{}".format(dAprev.shape, dAprev))
    # Making sure your output shape is correct
    assert (dAprev.shape == (m, nHprev,nWprev,nCprev))       
                    
    return dAprev, dW, dB

In [95]:
m = 1
nH = 4
nW = 4
nC = 1
nCprev = 1

sizeFilter = 3

stride = 1
pad = 1

x = np.random.randint(0,16,(m,nH,nW,nC))
#x = np.array([1,2,0,2,2,1,0,0,2,0,0,0,2,2,0,0]).reshape(m,nH,nW,nC)
print("x{}: \n{}".format(x.shape,x[0,:,:,0]))


#w = np.ones((sizeFilter,sizeFilter,nCprev,nC), dtype = np.int32)
w = np.random.randint(0,2,(sizeFilter,sizeFilter,nCprev,nC))

#b = np.array([0]).reshape(1,1,1,nC)
b = np.random.randint(0,2,(1,1,1,nC))
print("b{}: \n{}".format(b.shape, b))

hyperparameters = {"stride": stride, "padding":pad}
caches2 = []
print("\nfilter/w{}: \n{}".format(w.shape,w[:,:,0]))


x(1, 4, 4, 1): 
[[ 3 11  4  7]
 [ 8  5  9  2]
 [10  4  4  7]
 [ 1  9  5 15]]
b(1, 1, 1, 1): 
[[[[0]]]]

filter/w(3, 3, 1, 1): 
[[[1]
  [0]
  [0]]

 [[0]
  [1]
  [1]]

 [[1]
  [1]
  [0]]]


In [96]:
a1, cache = FeedForwardConv(x,w,b,hyperparameters)

print("a1{}: \n{}".format(a1.shape,a1[0,:,:,0]))


a1(1, 4, 4, 1): 
[[22. 28. 25. 18.]
 [23. 31. 30. 17.]
 [15. 26. 30. 36.]
 [10. 24. 24. 19.]]


In [97]:
a2, cache = FeedForwardConv(a1,w,b,hyperparameters)

print("a2{}: \n{}".format(a2.shape, a2[0,:,:,0]))

a2(1, 4, 4, 1): 
[[ 73. 107. 104.  65.]
 [ 69. 124. 131. 108.]
 [ 51. 113. 145. 109.]
 [ 34.  63.  69.  49.]]


In [98]:
caches2.append(cache)

In [99]:
a2 = a2.reshape(m,a2[0,0,:,0].shape[0] * a2[0,:,0,0].shape[0],1, nC)

In [100]:
print("a2{}: \n{}".format(a2.shape,a2))

a2(1, 16, 1, 1): 
[[[[ 73.]]

  [[107.]]

  [[104.]]

  [[ 65.]]

  [[ 69.]]

  [[124.]]

  [[131.]]

  [[108.]]

  [[ 51.]]

  [[113.]]

  [[145.]]

  [[109.]]

  [[ 34.]]

  [[ 63.]]

  [[ 69.]]

  [[ 49.]]]]


In [101]:
#a = np.array([6,6,5,2,8,8,5,2,9,9,3,0,6,6,2,0], dtype = np.float).reshape(16,1)
#a = np.array([28,38,28,14,46,59,40,17,46,56,35,12,30,35,20,5], dtype = np.float).reshape(16,1)

size = [a2.shape[0],3,2]

#y = np.array([1,0]).reshape(size[-1],m)
y = np.random.randint(0,2,(size[-1],m)).reshape(size[-1],m)

#x = np.array([[1,2,3,1],[3,2,1,1]]).reshape(size[0],m)
x = a2
print("x{}: \n{}".format(x.shape,x))

w = [np.random.randint(1,4,(y,x)) for x,y in zip(size[:-1],size[1:])]
#w1 = np.array([[1,3,2,3,1,2,2,2,2,2,3,1,2,2,1,1],[1,1,2,1,1,3,1,3,3,1,3,3,2,3,1,1],[1,3,2,3,1,3,1,2,3,1,3,1,3,3,3,2]])
#w2 = np.array([[1,2,1],[1,1,2]])
#w = [w1,w2]
print("w: \n",w)

b = [np.random.randint(1,4,(y,1)) for y in size[1:]]
#b1 = np.array([[1,3,3]]).reshape(3,1)
#b2 = np.array([[1,2]]).reshape(2,1)
#b = [b1,b2]
print("b: \n",b)

learningRate = 1

hyperparameters = {"learningRate":learningRate}


x(1, 16, 1, 1): 
[[[[ 73.]]

  [[107.]]

  [[104.]]

  [[ 65.]]

  [[ 69.]]

  [[124.]]

  [[131.]]

  [[108.]]

  [[ 51.]]

  [[113.]]

  [[145.]]

  [[109.]]

  [[ 34.]]

  [[ 63.]]

  [[ 69.]]

  [[ 49.]]]]
w: 
 [array([[3],
       [1],
       [3]]), array([[3, 2, 2],
       [2, 3, 1]])]
b: 
 [array([[3],
       [1],
       [2]]), array([[2],
       [3]])]


In [102]:
yHat, cache = forwardPass(x,y,w,b,hyperparameters,Empty)

print("yHat: \n",yHat)

ValueError: operands could not be broadcast together with shapes (3,1,16,1) (3,1) 

In [None]:
caches2.append(cache)



In [None]:
nablaB, nablaW, delta = backPropagation(caches2[1], Empty)
print(nablaW.shape)
[print(newW) for newW in nablaW]

In [None]:
a2 = a2.reshape(m,nH,nW,nC)
print("a2{}: \n{}".format(a2.shape, a2[0,:,:,0]))

In [None]:
caches3 = CacheForConv(caches2[1],caches2[0])
print("caches3[0]{}: \n{}".format(caches3[0].shape,caches3[0]))
#caches2[1][0] = [a.reshape(1,len(a),1,1) for a in caches2[1][0]]
#print("caches2[1][0][0]: \n",caches2[1][0])

In [None]:
delta = np.array([1,0,1,2,2,0,2,1,1,0,1,1,2,2,0,1]).reshape(1,4,4,1)
#delta = np.random.randint(0,3,(m,nH,nW,nC))
print("delta{}: \n{}".format(delta.shape,delta))

In [None]:
dA,dW,dB = BackPropagationConv(caches3, delta)

In [None]:
print("dW{}: \n{}".format(dW.shape,dW[...,0,0]))