In [1]:
import numpy as np

# Max Pooling layer

In [2]:
def maxpooling(array, pool_size, stride, comming_gradient):
    b,hi,wi,c = array.shape
    ho = int((hi-pool_size)/stride) + 1
    wo = int((wi-pool_size)/stride) + 1
    output_shape = (b,ho,wo,c)
    output = np.zeros(output_shape)
    
    
    #forward
    for i in range (ho):
        hstart = i * stride
        hend =  hstart + pool_size
        for j in range (wo):
            vstart = j * stride
            vend =  vstart + pool_size
            
            window = array[:,hstart:hend, vstart:vend,:]
            output[:,i,j,:] = np.max(window,axis=(1, 2))

    #backward
    gradient = np.zeros(array.shape)#gradients have the same size of inputs
    b,hg,wg,c = comming_gradient.shape
    
    for i in range (hg):
        hstart = i * stride
        hend =  hstart + pool_size
        
        for j in range (wg):
            vstart = j * stride
            vend =  vstart + pool_size
            
            window = array[:,hstart:hend, vstart:vend,:]
            x = np.max(window,axis=(1, 2)).reshape(b,1,1,c)
            
            window = ( window == x )
            k = comming_gradient[:, i, j, :].reshape(b,1,1,c)
            
            gradient[:,hstart:hend, vstart:vend,:] = window * k
    
    return output, gradient

In [3]:
random_array = np.array([[[[23, 95],[92, 43],[28,  2],[51, 89]],
                          [[30, 95],[98, 77],[69, 42],[ 2,  6]],
                          [[16, 75],[28, 11],[ 2, 86],[66, 17]],
                          [[34, 22],[74, 26],[68, 38],[83,  8]]],
                         
                         [[[83, 95],[15, 28],[60, 47],[42, 43]],
                          [[73, 94],[70, 44],[34, 56],[42, 29]],
                          [[54, 75],[82, 35],[25, 78],[77, 43]],
                          [[ 2, 82],[19, 16],[44, 21],[67, 33]]],
                         
                         [[[67, 32],[ 4, 60],[70,  8],[48, 57]],
                          [[ 3, 91],[33,  1],[53, 65],[17, 92]],
                          [[22, 60],[91, 57],[86, 87],[42, 35]],
                          [[56, 57],[20, 80],[90, 34],[99, 91]]]]) #shape (3,4,4,2)

 

comming_gradient = np.array([[[[1, 1],[1, 1]],
                              [[1, 1],[1, 1]]],
                              
                             [[[1, 1],[1, 1]],
                              [[1, 1],[1, 1]]],
                                                            
                             [[[1, 1],[1, 1]],
                              [[1, 1],[1, 1]]]]) #shape (3,2,2,2)

In [11]:
forward, backward = maxpooling(random_array, 2, 2,comming_gradient)        
print(f"first channel from the first sample of the input array :\n {random_array[0,:,:,0]} \n")
print(f"maxpooling output :\n {forward[0,:,:,0]} \n")
print(f"backward output when gradients of output with respect to maxpooling are ones:\n {backward[0,:,:,0]} \n")

first channel from the first sample of the input array :
 [[ 23  92  28 -51]
 [-30 -98 -69   2]
 [ 16  28   2  66]
 [ 34  74 -68  83]] 

maxpooling output :
 [[92. 28.]
 [74. 83.]] 

backward output when gradients of output with respect to maxpooling equal ones:
 [[0. 1. 1. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 1. 0. 1.]] 



# ReLU and LeakyReLU

In [8]:
def ReLU(array, comming_gradients):        
    # forward
    output = np.maximum(0, array)
    
    # backward
    temp = np.where(array <= 0, 0, 1)
    gradients = np.multiply(temp, comming_gradients)
    
    return output, gradients
        

def LeakyReLU(array,alpha,comming_gradients):        
    # forward
    output = np.maximum(alpha*array, array)
    
    # backward
    temp = np.ones(array.shape) #slop when x>0
    temp[ array <= 0 ] = alpha  #slop when x<=0
    gradients = np.multiply(temp, comming_gradients)
    
    return output, gradients

In [12]:
random_array = np.array([[[[23, 95],[92, 43],[28,  2],[-51, 89]],
                          [[-30, 95],[-98, 77],[-69, 42],[ 2,  6]],
                          [[16, -75],[28, 11],[ 2, 86],[66, 17]],
                          [[34, 22],[74, 26],[-68, 38],[83,  8]]],
                         
                         [[[83, 95],[15, 28],[-60, 47],[42, 43]],
                          [[73, -94],[70, 44],[-34, 56],[42, -29]],
                          [[54, 75],[82, 35],[25, 78],[77, -43]],
                          [[ -2, 82],[19, 16],[44, 21],[67, 33]]],
                         
                         [[[67, -32],[ -4, 60],[70,  8],[-48, 57]],
                          [[ 3, 91],[33,  1],[53, 65],[17, 92]],
                          [[22, 60],[91, 57],[-86, 87],[-42, 35]],
                          [[56, 57],[20, 80],[90, -34],[99, 91]]]]) #shape (3,4,4,2)
comming_gradients = np.ones_like(random_array)

In [15]:
forward,backward= ReLU(random_array, comming_gradients)
print(f"first channel from the first sample of the input array :\n {random_array[0,:,:,0]} \n")
print(f"ReLU output :\n {forward[0,:,:,0]} \n")
print(f"ReLU backward output when gradients of output with respect to it are ones:\n {backward[0,:,:,0]} \n")

first channel from the first sample of the input array :
 [[ 23  92  28 -51]
 [-30 -98 -69   2]
 [ 16  28   2  66]
 [ 34  74 -68  83]] 

ReLU output :
 [[23 92 28  0]
 [ 0  0  0  2]
 [16 28  2 66]
 [34 74  0 83]] 

ReLU backward output when gradients of output with respect to it are ones:
 [[1 1 1 0]
 [0 0 0 1]
 [1 1 1 1]
 [1 1 0 1]] 



In [19]:
forward,backward= LeakyReLU(random_array,0.001, comming_gradients)
print(f"first channel from the first sample of the input array :\n {random_array[0,:,:,0]} \n")
print(f"LeakyReLU output :\n {forward[0,:,:,0]} \n")
print(f"LeakyReLU backward output when gradients of output with respect to it are ones:\n {backward[0,:,:,0]} \n")

first channel from the first sample of the input array :
 [[ 23  92  28 -51]
 [-30 -98 -69   2]
 [ 16  28   2  66]
 [ 34  74 -68  83]] 

LeakyReLU output :
 [[ 2.3e+01  9.2e+01  2.8e+01 -5.1e-02]
 [-3.0e-02 -9.8e-02 -6.9e-02  2.0e+00]
 [ 1.6e+01  2.8e+01  2.0e+00  6.6e+01]
 [ 3.4e+01  7.4e+01 -6.8e-02  8.3e+01]] 

LeakyReLU backward output when gradients of output with respect to it are ones:
 [[1.    1.    1.    0.001]
 [0.001 0.001 0.001 1.   ]
 [1.    1.    1.    1.   ]
 [1.    1.    0.001 1.   ]] 



# Convolution

In [33]:
def convolution(random_array,kernel,stride=1):
    b, ih, iw, ci = random_array.shape
    k, kh, kw, ck = kernel.shape
    outh = int((ih-kh)/stride) + 1
    outw = int((iw-kw)/stride) + 1
    out = np.zeros((b, outh, outw, k))
    
    for ker in range (k):
        for i in range (outh):
            hst = i*stride
            hend = hst + kh
            
            for j in range (outw):
                vst = j*stride
                vend = vst + kw
            
                temp = random_array[:, hst:hend, vst:vend, :]
                temp = kernel[ker,:,:,:]*temp
                out[:,i,j,ker] = np.sum(temp,axis=(1,2,3))
    return(out)

In [34]:
random_array = np.array([[[[23, 95],[92, 43],[28,  2],[51, 89]],
                         [[30, 95],[98, 77],[69, 42],[ 2,  6]],
                         [[16, 75],[28, 11],[ 2, 86],[66, 17]],
                         [[34, 22],[74, 26],[68, 38],[83,  8]]],
                        
                        [[[83, 95],[15, 28],[60, 47],[42, 43]],
                         [[73, 94],[70, 44],[34, 56],[42, 29]],
                         [[54, 75],[82, 35],[25, 78],[77, 43]],
                         [[ 2, 82],[19, 16],[44, 21],[67, 33]]],
                        
                        [[[67, 32],[ 4, 60],[70,  8],[48, 57]],
                         [[ 3, 91],[33,  1],[53, 65],[17, 92]],
                         [[22, 60],[91, 57],[86, 87],[42, 35]],
                         [[56, 57],[20, 80],[90, 34],[99, 91]]]]) #shape (3,4,4,2)

kernel = np.array([[[[0, 1],[1, 1]],
                    [[0, 1],[0, 0]]],
                   [[[1, 0],[1, 1]],
                    [[0, 1],[1, 1]]]]) #shape (2,2,2,2)

In [41]:
output = convolution(random_array, kernel)
print(f"input array:\nfirst channel: \n{random_array[0,:,:,0]} \n")
print(f"second channel: \n{random_array[0,:,:,1]} \n")

print(f"kernel :\nfirst channel: \n{kernel[0,:,:,0]} \n")
print(f"second channel: \n{kernel[0,:,:,1]} \n")

print(f"first output: \n{output[0,:,:,0]} \n")

input array:
first channel: 
[[23 92 28 51]
 [30 98 69  2]
 [16 28  2 66]
 [34 74 68 83]] 

second channel: 
[[95 43  2 89]
 [95 77 42  6]
 [75 11 86 17]
 [22 26 38  8]] 

kernel :
first channel: 
[[0 1]
 [0 0]] 

second channel: 
[[1 1]
 [1 0]] 

first output: 
[[325. 150. 184.]
 [345. 199. 136.]
 [136. 125. 207.]] 



In [None]:
def backward(self):
    X = self.inputs[0]
    kernels, biases = self.parameters

    # Initialize gradients
    self.gradients[kernels] = np.zeros_like(kernels.value)
    self.gradients[biases] = np.zeros_like(biases.value)
    self.gradients[X] = np.zeros_like(X.value)

    dL_dOutput = self.outputs[0].gradients[self]  # Gradient of loss w.r.t output

    # Compute gradients w.r.t. kernels and biases
    for b in range(self.batch_size):
        for k in range(self.number_of_kernals):
            for i in range(dL_dOutput.shape[1]):  # Iterate over output height
                h_start = i * self.stride
                h_end = h_start + self.kernal_HW

                for j in range(dL_dOutput.shape[2]):  # Iterate over output width
                    w_start = j * self.stride
                    w_end = w_start + self.kernal_HW

                    # Slice the input window
                    input_window = X.value[b, h_start:h_end, w_start:w_end, :]

                    # Update gradient w.r.t. kernels
                    self.gradients[kernels][k] += input_window * dL_dOutput[b, i, j, k]

                    # Update gradient w.r.t. input
                    self.gradients[X][b, h_start:h_end, w_start:w_end, :] += (
                        kernels.value[k] * dL_dOutput[b, i, j, k]
                    )

            # Update gradient w.r.t. biases (sum over spatial dimensions)
            self.gradients[biases][k] += np.sum(dL_dOutput[b, :, :, k])

    # Backward propagate gradients to kernels and biases
    kernels.backward()
    biases.backward()
