<a href="https://colab.research.google.com/github/MinhVuong2000/Data-Science/blob/master/BackpropagationANN%26DNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### Good blog
1. https://missinglink.ai/guides/neural-network-concepts/backpropagation-neural-networks-process-examples-code-minus-math/#:~:text=Backpropagation%20is%20an%20algorithm%20commonly,one%2C%20given%20the%20initial%20weights.
2. https://www.linkedin.com/pulse/forward-back-propagation-over-cnn-code-from-scratch-coy-ulloa/
3. https://www.jefkine.com/general/2016/09/05/backpropagation-in-convolutional-neural-networks/
4. https://datascience.stackexchange.com/questions/27506/back-propagation-in-cnn
5. https://medium.com/@pavisj/convolutions-and-backpropagations-46026a8f5d2c

In [None]:
def conv_forward(X, W):
    '''
    The forward computation for a convolution function
    
    Arguments:
    X -- output activations of the previous layer, numpy array of shape (n_H_prev, n_W_prev) assuming input channels = 1
    W -- Weights, numpy array of size (f, f) assuming number of filters = 1
    
    Returns:
    H -- conv output, numpy array of size (n_H, n_W)
    cache -- cache of values needed for conv_backward() function
    '''
    
    # Retrieving dimensions from X's shape
    (n_H_prev, n_W_prev) = X.shape
    
    # Retrieving dimensions from W's shape
    (f, f) = W.shape
    
    # Compute the output dimensions assuming no padding and stride = 1
    n_H = n_H_prev - f + 1
    n_W = n_W_prev - f + 1
    
    # Initialize the output H with zeros
    H = np.zeros((n_H, n_W))
    
    # Looping over vertical(h) and horizontal(w) axis of output volume
    for h in range(n_H):
        for w in range(n_W):
            x_slice = X[h:h+f, w:w+f]
            H[h,w] = np.sum(x_slice * W)
            
    # Saving information in 'cache' for backprop
    cache = (X, W)
    
    return H, cache

In [None]:
def conv_backward(dH, cache):
    '''
    The backward computation for a convolution function
    
    Arguments:
    dH -- gradient of the cost with respect to output of the conv layer (H), numpy array of shape (n_H, n_W) assuming channels = 1
    cache -- cache of values needed for the conv_backward(), output of conv_forward()
    
    Returns:
    dX -- gradient of the cost with respect to input of the conv layer (X), numpy array of shape (n_H_prev, n_W_prev) assuming channels = 1
    dW -- gradient of the cost with respect to the weights of the conv layer (W), numpy array of shape (f,f) assuming single filter
    '''
    
    # Retrieving information from the "cache"
    (X, W) = cache
    
    # Retrieving dimensions from X's shape
    (n_H_prev, n_W_prev) = X.shape
    
    # Retrieving dimensions from W's shape
    (f, f) = W.shape
    
    # Retrieving dimensions from dH's shape
    (n_H, n_W) = dH.shape
    
    # Initializing dX, dW with the correct shapes
    dX = np.zeros(X.shape)
    dW = np.zeros(W.shape)
    
    # Looping over vertical(h) and horizontal(w) axis of the output
    for h in range(n_H):
        for w in range(n_W):
            dX[h:h+f, w:w+f] += W * dH(h,w)
            dW += X[h:h+f, w:w+f] * dH(h,w)
    
    return dX, dW

In [None]:
import tensorflow as tf

y0 = tf.constant( y_ , dtype=tf.float32 )

# Layer 1 = the 2x3 hidden sigmoid
m1 = tf.Variable( tf.random_uniform( [2,3] , minval=0.1 , maxval=0.9 , dtype=tf.float32  ))
b1 = tf.Variable( tf.random_uniform( [3]   , minval=0.1 , maxval=0.9 , dtype=tf.float32  ))
h1 = tf.sigmoid( tf.matmul( x0,m1 ) + b1 )

# Layer 2 = the 3x1 sigmoid output
m2 = tf.Variable( tf.random_uniform( [3,1] , minval=0.1 , maxval=0.9 , dtype=tf.float32  ))
b2 = tf.Variable( tf.random_uniform( [1]   , minval=0.1 , maxval=0.9 , dtype=tf.float32  ))
y_out = tf.sigmoid( tf.matmul( h1,m2 ) + b2 )


### loss
# loss : sum of the squares of y0—y_out
loss = tf.reduce_sum( tf.square( y0—y_out ) )

# training step : gradient descent (1.0) to minimize loss
train = tf.train.GradientDescentOptimizer(1.0).minimize(loss)


### training
# run 500 times using all the X and Y
# print out the loss and any other interesting info
with tf.Session() as sess:
  sess.run( tf.global_variables_initializer() )
  for step in range(500) :
    sess.run(train)

  results = sess.run([m1,b1,m2,b2,y_out,loss])
  labels  = "m1,b1,m2,b2,y_out,loss".split(",")
  for label,result in zip(*(labels,results)) :
    print ""
    print label
    print result

print ""

In [None]:
#!/usr/bin/env python3

	"""Convolutional Neural Networks"""
	
    import numpy as np
	

	

	def conv_forward(A_prev, W, b, activation, padding="same", stride=(1, 1)):
	    """forward prop convolutional 3D image, RGB image - color
	    
        Arg:
	       A_prev: contains the output of prev layer (m, h_prev, w_prev, c_prev)
	       W: filter for the convolution (kh, kw, c_prev, c_new)
	       b: biases (1, 1, 1, c_new)
	       padding: string ‘same’, or ‘valid’
	       stride: tuple (sh, sw)
	    
        Return: padded convolved images RGB np.array
	    """

	    m, h_prev, w_prev, c_prev = A_prev.shape
	    k_h, k_w, c_prev, c_new = W.shape
	    s_h, s_w = stride
	

	    if padding == 'valid':
	        p_h = 0
	        p_w = 0
	

	    if padding == 'same':
	        p_h = np.ceil(((s_h*h_prev) - s_h + k_h - h_prev) / 2)
	        p_h = int(p_h)
	        p_w = np.ceil(((s_w*w_prev) - s_w + k_w - w_prev) / 2)
	        p_w = int(p_w)
	

	    A_prev = np.pad(A_prev, [(0, 0), (p_h, p_h), (p_w, p_w), (0, 0)],
	                    mode='constant', constant_values=0)
	

	    out_h = int(((h_prev - k_h + (2*p_h)) / (stride[0])) + 1)
	    out_w = int(((w_prev - k_w + (2*p_w)) / (stride[1])) + 1)
	    output_conv = np.zeros((m, out_h, out_w, c_new))
	    m_A_prev = np.arange(0, m)
	

	    for i in range(out_h):
	        for j in range(out_w):
	            for f in range(c_new):
	                output_conv[m_A_prev, i, j, f] = activation((
	                    np.sum(np.multiply(
	                        A_prev[
	                            m_A_prev,
	                            i*(stride[0]):k_h+(i*(stride[0])),
	                            j*(stride[1]):k_w+(j*(stride[1]))],
	                        W[:, :, :, f]), axis=(1, 2, 3))) + b[0, 0, 0, f])
	    
        
        return output_conv


if __name__ == "__main__":
	    np.random.seed(0)
	    lib = np.load('../data/MNIST.npz')
	    X_train = lib['X_train']
	    m, h, w = X_train.shape
	    X_train_c = X_train.reshape((-1, h, w, 1))
	

	    W = np.random.randn(3, 3, 1, 2)
	    b = np.random.randn(1, 1, 1, 2)
	

	    def relu(Z):
	        return np.maximum(Z, 0)
	

	    plt.imshow(X_train[0])
	    plt.show()
	    A = conv_forward(X_train_c, W, b, relu, padding='valid')
	    print(A.shape)
	    plt.imshow(A[0, :, :, 0])
	    plt.show()
	    plt.imshow(A[0, :, :, 1])
	    plt.show()


In [None]:
#!/usr/bin/env python3
	"""Convolutional Neural Networks"""
	import numpy as np
	

	

	def conv_backward(dZ, A_prev, W, b, padding="same", stride=(1, 1)):
	    """back prop convolutional 3D image, RGB image - color
	    Arg:
	       dZ: containing the partial derivatives (m, h_new, w_new, c_new)
	       A_prev: contains the output of prev layer (m, h_prev, w_prev, c_prev)
	       W: filter for the convolution (kh, kw, c_prev, c_new)
	       b: biases (1, 1, 1, c_new)
	       padding: string ‘same’, or ‘valid’
	       stride: tuple (sh, sw)
	    Returns: parcial dev prev layer (dA_prev), kernels (dW), biases (db)
	    """
	    k_h, k_w, c_prev, c_new = W.shape
	    _, h_new, w_new, c_new = dZ.shape
	    m, h_x, w_x, c_prev = A_prev.shape
	    s_h, s_w = stride
	    x = A_prev
	

	    if padding == 'valid':
	        p_h = 0
	        p_w = 0
	

	    if padding == 'same':
	        p_h = np.ceil(((s_h*h_x) - s_h + k_h - h_x) / 2)
	        p_h = int(p_h)
	        p_w = np.ceil(((s_w*w_x) - s_w + k_w - w_x) / 2)
	        p_w = int(p_w)
	

	    db = np.sum(dZ, axis=(0, 1, 2), keepdims=True)
	

	    x_padded = np.pad(x, [(0, 0), (p_h, p_h), (p_w, p_w), (0, 0)],
	                      mode='constant', constant_values=0)
	

	    dW = np.zeros_like(W)
	    dx = np.zeros(x_padded.shape)
	    m_i = np.arange(m)
	    for i in range(m):
	        for h in range(h_new):
	            for w in range(w_new):
	                for f in range(c_new):
	                    dx[i,
	                       h*(stride[0]):(h*(stride[0]))+k_h,
	                       w*(stride[1]):(w*(stride[1]))+k_w,
	                       :] += dZ[i, h, w, f] * W[:, :, :, f]
	

	                    dW[:, :,
	                       :, f] += x_padded[i,
	                                         h*(stride[0]):(h*(stride[0]))+k_h,
	                                         w*(stride[1]):(w*(stride[1]))+k_w,
	                                         :] * dZ[i, h, w, f]
	    if padding == 'same':
	        dx = dx[:, p_h:-p_h, p_w:-p_w, :]
	    else:
	        dx = dx
	

	    return dx, dW, db



if __name__ == "__main__":
    np.random.seed(0)
    lib = np.load('../data/MNIST.npz')
    X_train = lib['X_train']
    _, h, w = X_train.shape
    X_train_c = X_train[:10].reshape((-1, h, w, 1))

    W = np.random.randn(3, 3, 1, 2)
    b = np.random.randn(1, 1, 1, 2)

    dZ = np.random.randn(10, h - 2, w - 2, 2)
    print(conv_backward(dZ, X_train_c, W, b, padding="valid"))
