In [3]:
import numpy as np

![image.png](attachment:image.png)

In [4]:
# Convolutional Layer (Conv3x3):
# conv = Conv3x3(8): Creates a convolutional layer with 8 filters, each of size 3x3.
# Input shape: 28x28x1 (grayscale image)
# Output shape: 26x26x8 (feature maps)
# Max Pooling Layer (MaxPool2):
# pool = MaxPool2(): Creates a max pooling layer with a pool size of 2x2.
# Input shape: 26x26x8 (feature maps)
# Output shape: 13x13x8 (downsampled feature maps)
# Softmax Layer:
# softmax = Softmax(13 * 13 * 8, 10): Creates a softmax layer with 13138 inputs (flattened feature maps) and 10 outputs (class probabilities).
# Input shape: 13x13x8 (flattened feature maps)
# Output shape: 10 (class probabilities)

In [5]:
# A Convolution layer using 3x3 filters.
num_filters = 8 # 28x28x1 -> 26x26x8  26x26x8 -> 13x13x8  13x13x8 -> 10 
image_height = 28 # for greyscale images
image_width = 28

In [6]:
# filters is a 3d array with dimensions (num_filters, 3, 3)
# We divide by 9 to reduce the variance of our initial values
filters = np.random.randn(num_filters, 3, 3) / 9

In [7]:
filters

array([[[ 0.01267095,  0.20470475,  0.09748474],
        [ 0.05568934, -0.03303646, -0.01112282],
        [ 0.10758389,  0.02007382,  0.07619285]],

       [[-0.08498716, -0.00374426, -0.12824152],
        [-0.01429454,  0.00819415, -0.08061402],
        [ 0.0437479 , -0.00196678,  0.10243619]],

       [[-0.1033074 ,  0.08319609, -0.13292061],
        [-0.04424397, -0.06655614,  0.15981453],
        [ 0.20135892, -0.09759611,  0.14350246]],

       [[-0.01883311, -0.13721395, -0.09912846],
        [-0.09317714,  0.07193293,  0.19069769],
        [-0.03538911,  0.13244822,  0.00865655]],

       [[-0.00283871,  0.12421434,  0.08159141],
        [-0.03592091, -0.17254956, -0.07766537],
        [ 0.10526048, -0.10400888, -0.01105415]],

       [[-0.21682431,  0.07806685,  0.07949128],
        [-0.0224462 ,  0.10101604, -0.10534961],
        [ 0.05890768, -0.13262552,  0.16772724]],

       [[-0.0152912 ,  0.13321466,  0.03515719],
        [-0.02791577,  0.01815089,  0.01112153],
        

convolution layer for 2d array image

In [8]:
# from PIL import Image

In [9]:
# img = Image.open('./data/image0.jpg')

In [10]:
import cv2

In [11]:
img = cv2.imread('./data/image0.jpg')

In [12]:
img

array([[[113,  49,   1],
        [114,  50,   2],
        [114,  49,   4],
        ...,
        [237, 220, 193],
        [238, 221, 194],
        [238, 221, 194]],

       [[113,  49,   1],
        [114,  50,   2],
        [114,  49,   4],
        ...,
        [238, 221, 194],
        [238, 221, 194],
        [238, 221, 194]],

       [[113,  49,   1],
        [114,  50,   2],
        [114,  49,   4],
        ...,
        [238, 221, 194],
        [238, 221, 194],
        [238, 221, 194]],

       ...,

       [[ 10, 123,  65],
        [  6, 128,  68],
        [ 19, 124,  67],
        ...,
        [ 33, 194, 126],
        [ 26, 192, 121],
        [ 27, 198, 124]],

       [[  8, 124,  65],
        [  2, 119,  62],
        [ 14, 115,  60],
        ...,
        [ 41, 206, 139],
        [ 30, 199, 130],
        [ 33, 205, 133]],

       [[  4, 120,  61],
        [  0, 110,  54],
        [  0,  96,  42],
        ...,
        [ 43, 209, 144],
        [ 34, 202, 135],
        [ 31, 202, 134]]

In [13]:
img.shape

(630, 1200, 3)

In [14]:
# Convert RGB to grayscale (single-channel)
# gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

In [15]:
# gray_img

In [16]:
# Save the grayscale image
# gr_img = cv2.imwrite('grayscale_image.jpg', gray_img)

In [17]:
# gr_img

In [18]:
# Read the image file
# img_gray = cv2.imread('grayscale_image.jpg')

In [19]:
# img_gray

In [20]:
# Convert grayscale to RGB
# rgb_img = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2RGB)

# Save the RGB image
# cv2.imwrite('rgb_image.jpg', rgb_img)

In [21]:
# img_gray.shape

In [22]:
# Assume 'img' is a 3D NumPy array with shape (H, W, 3)
img_2d = np.mean(img, axis=-1)

In [23]:
img_2d

array([[ 54.33333333,  55.33333333,  55.66666667, ..., 216.66666667,
        217.66666667, 217.66666667],
       [ 54.33333333,  55.33333333,  55.66666667, ..., 217.66666667,
        217.66666667, 217.66666667],
       [ 54.33333333,  55.33333333,  55.66666667, ..., 217.66666667,
        217.66666667, 217.66666667],
       ...,
       [ 66.        ,  67.33333333,  70.        , ..., 117.66666667,
        113.        , 116.33333333],
       [ 65.66666667,  61.        ,  63.        , ..., 128.66666667,
        119.66666667, 123.66666667],
       [ 61.66666667,  54.66666667,  46.        , ..., 132.        ,
        123.66666667, 122.33333333]], shape=(630, 1200))

In [24]:
cv2.imwrite('2d_image.jpg', img_2d)

True

In [25]:
img_2d.shape

(630, 1200)

In [26]:
# Resize the image
resized_img = cv2.resize(img_2d, (image_height, image_width))

In [27]:
resized_img.shape

(28, 28)

In [28]:
# img.shape

In [29]:
# img_resized = cv2.resize(img, (255, 255))

In [30]:
# img_resized.shape

In [31]:
# img_3d = np.stack((img_2d,)*3, axis=-1)

![image.png](attachment:image.png)

![image.png](attachment:image.png)![image-2.png](attachment:image-2.png)

In [32]:
#  Generates all possible 3x3 image regions using valid padding.
#     - image is a 2d numpy array.
h, w = resized_img.shape

In [33]:
h

28

In [34]:
w

28

In [35]:
# image: A 2D NumPy array representing the input image.
# Return Value:
# The function yields a tuple containing:
# im_region: A 3x3 NumPy array representing the current image region.
# i: The row index of the top-left corner of the current region.
# j: The column index of the top-left corner of the current region.
# Implementation:
# Get the height (h) and width (w) of the input image.
# Iterate over the rows of the image, excluding the last two rows (range(h - 2)).
# Iterate over the columns of the image, excluding the last two columns (range(w - 2)).
# Extract the 3x3 region from the image using NumPy array slicing (image[i:(i + 3), j:(j + 3)]).
# Yield the extracted region, along with its top-left corner coordinates (i, j).

In [36]:
im_region_dict = {}

In [37]:
for i in range(h - 2):
      row_dict = {}
      for j in range(w - 2):
        im_region = img_2d[i:(i + 3), j:(j + 3)]
        # print(f'Region {i}, {j}: {im_region}') 
        row_dict[j] = im_region
      im_region_dict[i] = row_dict
        # yield im_region, i, j  # we will use yield to get the values one by one on each iteration or funtion call
        

In [38]:
# for i, region in im_region_dict.items():
#     print(f"Region {i}: {region}")

In [39]:
next(iter(im_region_dict.items()))

(0,
 {0: array([[54.33333333, 55.33333333, 55.66666667],
         [54.33333333, 55.33333333, 55.66666667],
         [54.33333333, 55.33333333, 55.66666667]]),
  1: array([[55.33333333, 55.66666667, 55.66666667],
         [55.33333333, 55.66666667, 55.66666667],
         [55.33333333, 55.66666667, 55.66666667]]),
  2: array([[55.66666667, 55.66666667, 54.66666667],
         [55.66666667, 55.66666667, 54.66666667],
         [55.66666667, 55.66666667, 54.66666667]]),
  3: array([[55.66666667, 54.66666667, 53.66666667],
         [55.66666667, 54.66666667, 53.66666667],
         [55.66666667, 54.66666667, 54.66666667]]),
  4: array([[54.66666667, 53.66666667, 54.33333333],
         [54.66666667, 53.66666667, 54.33333333],
         [54.66666667, 54.66666667, 54.33333333]]),
  5: array([[53.66666667, 54.33333333, 54.33333333],
         [53.66666667, 54.33333333, 54.33333333],
         [54.66666667, 54.33333333, 54.33333333]]),
  6: array([[54.33333333, 54.33333333, 54.66666667],
         [54.

In [40]:
im_region # last region

array([[58.        , 58.        , 58.        ],
       [58.        , 58.        , 58.        ],
       [58.66666667, 58.66666667, 58.66666667]])

In [41]:
#  Performs a forward pass of the conv layer using the given input.
#     Returns a 3d numpy array with dimensions (h, w, num_filters).
#     - input is a 2d numpy array

In [42]:
# We transform the image from [0, 255] to [-0.5, 0.5] to make it easier
  # to work with. This is standard practice.

In [43]:
nor_img = (resized_img/255) - 0.5

In [44]:
nor_img

array([[-0.27138189, -0.26830065, -0.2695845 , -0.24591503, -0.23076564,
        -0.20849673, -0.16869748, -0.10866013, -0.11619981, -0.09551821,
        -0.07124183, -0.04367414, -0.01304855,  0.02348273,  0.05688609,
         0.09626517,  0.1369281 ,  0.17189542,  0.34180672,  0.31699346,
         0.36809057,  0.37815126,  0.43510738,  0.47352941,  0.49673203,
         0.48858543,  0.37684407,  0.35228758],
       [-0.26762372, -0.26234827, -0.21111111, -0.20081699, -0.21869748,
        -0.20534547, -0.18239963, -0.15478525, -0.12418301, -0.09995331,
        -0.0630719 , -0.0339169 , -0.00205415,  0.03202614,  0.07567694,
         0.10681606,  0.15490196,  0.4380719 ,  0.36526611,  0.33398693,
         0.35882353,  0.36788049,  0.43594771,  0.46862745,  0.49264706,
         0.47588702,  0.35833333,  0.36451914],
       [-0.26216153, -0.25644258, -0.24869281, -0.23316993, -0.21601307,
        -0.19033613, -0.16928105, -0.13802521, -0.11837068, -0.08559757,
        -0.05898693, -0.0312

In [45]:
nor_img.shape

(28, 28)

In [46]:
output = np.zeros((h - 2, w - 2, num_filters))

In [47]:
output

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

In [48]:
output.shape

(26, 26, 8)

![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [49]:
im_region_dict[0][0]

array([[54.33333333, 55.33333333, 55.66666667],
       [54.33333333, 55.33333333, 55.66666667],
       [54.33333333, 55.33333333, 55.66666667]])

In [50]:
filters[0]

array([[ 0.01267095,  0.20470475,  0.09748474],
       [ 0.05568934, -0.03303646, -0.01112282],
       [ 0.10758389,  0.02007382,  0.07619285]])

In [51]:
im_region_dict[0][0]*filters[0] # elements wise multiplication

array([[ 0.68845509, 11.32699643,  5.42665061],
       [ 3.02578743, -1.82801758, -0.61917022],
       [ 5.84539136,  1.11075145,  4.2414019 ]])

In [52]:
output.shape[0]

26

In [53]:
output.shape[1]

26

In [54]:
output.shape

(26, 26, 8)

In [55]:
filters.shape

(8, 3, 3)

In [56]:
im_region_dict[0][0].shape

(3, 3)

In [57]:
np.dot(im_region_dict[0][0], filters) # (8, 3, 3) x (3, 3) = (3, 8, 3) matrix multiplication

array([[[  9.75876841,  10.41171682,   8.92261024],
        [ -2.97330072,   0.1404875 ,  -5.72615004],
        [  3.14777775,  -4.59530188,   9.60935456],
        [ -8.14906157,   3.89794822,   5.64784071],
        [  3.7176401 ,  -8.58859092,  -0.47969771],
        [ -9.74361633,   2.44836552,   7.82649754],
        [-10.35644623,  13.16637234,  10.69235335],
        [-20.27782507,  13.4717445 , -20.21103894]],

       [[  9.75876841,  10.41171682,   8.92261024],
        [ -2.97330072,   0.1404875 ,  -5.72615004],
        [  3.14777775,  -4.59530188,   9.60935456],
        [ -8.14906157,   3.89794822,   5.64784071],
        [  3.7176401 ,  -8.58859092,  -0.47969771],
        [ -9.74361633,   2.44836552,   7.82649754],
        [-10.35644623,  13.16637234,  10.69235335],
        [-20.27782507,  13.4717445 , -20.21103894]],

       [[  9.75876841,  10.41171682,   8.92261024],
        [ -2.97330072,   0.1404875 ,  -5.72615004],
        [  3.14777775,  -4.59530188,   9.60935456],
        

In [58]:
np.dot(im_region_dict[0][0], filters).shape

(3, 8, 3)

In [59]:
np.dot(filters, im_region_dict[0][0]) # matrix multiplication

array([[[ 17.10741772,  17.42227817,  17.52723166],
        [  0.62646652,   0.63799658,   0.64183993],
        [ 11.07588042,  11.27973098,  11.34768117]],

       [[-11.7888631 , -12.00583604, -12.07816035],
        [ -4.71148287,  -4.79819728,  -4.82710208],
        [  7.83580737,   7.98002469,   8.02809712]],

       [[ -8.314734  ,  -8.46776592,  -8.51877656],
        [  2.66311677,   2.71213119,   2.72846933],
        [ 13.43474622,  13.68201148,  13.76443324]],

       [[-13.86453667, -14.11971219, -14.20477069],
        [  9.20697247,   9.37642595,   9.43291045],
        [  5.74388387,   5.84959952,   5.88483807]],

       [[ 11.02787637,  11.23084342,  11.29849911],
        [-15.54671397, -15.83284981, -15.92822842],
        [ -0.53260503,  -0.54240758,  -0.54567509]],

       [[ -3.22012955,  -3.27939574,  -3.29915113],
        [ -1.45503375,  -1.48181351,  -1.4907401 ],
        [  5.10784393,   5.20185333,   5.2331898 ]],

       [[  8.31738145,   8.47046209,   8.52148897],


In [60]:
np.dot(filters, im_region_dict[0][0]).shape

(8, 3, 3)

In [61]:
im_region_dict[0][0] * filters # elements to elementwise multiplication for 8 filters

array([[[  0.68845509,  11.32699643,   5.42665061],
        [  3.02578743,  -1.82801758,  -0.61917022],
        [  5.84539136,   1.11075145,   4.2414019 ]],

       [[ -4.6176359 ,  -0.20718239,  -7.13877776],
        [ -0.77667013,   0.45340963,  -4.48751358],
        [  2.37696931,  -0.10882841,   5.70228128]],

       [[ -5.61303547,   4.60351712,  -7.39924703],
        [ -2.4039226 ,  -3.68277291,   8.89634218],
        [ 10.94050123,  -5.40031796,   7.98830345]],

       [[ -1.02326577,  -7.59250526,  -5.51815086],
        [ -5.06262459,   3.98028864,  10.61550494],
        [ -1.92280858,   7.32880148,   0.48188125]],

       [[ -0.15423637,   6.87319361,   4.54192202],
        [ -1.95170273,  -9.54774259,  -4.32337204],
        [  5.71915279,  -5.75515798,  -0.61534759]],

       [[-11.78078756,   4.31969877,   4.42501446],
        [ -1.21957671,   5.58955433,  -5.86446147],
        [  3.20065056,  -7.33861222,   9.33681639]],

       [[ -0.83082193,   7.37121098,   1.9570833 ],


In [62]:
im_region_dict[0][0] * filters[0]

array([[ 0.68845509, 11.32699643,  5.42665061],
       [ 3.02578743, -1.82801758, -0.61917022],
       [ 5.84539136,  1.11075145,  4.2414019 ]])

In [63]:
(im_region_dict[0][0] * filters)[0] # first filter multiplication

array([[ 0.68845509, 11.32699643,  5.42665061],
       [ 3.02578743, -1.82801758, -0.61917022],
       [ 5.84539136,  1.11075145,  4.2414019 ]])

In [64]:
np.sum((im_region_dict[0][0] * filters)[0])

np.float64(29.218246471288303)

In [65]:
# output: A 2D array that stores the output of the convolution operation.
# im_region_dict: A dictionary that stores regions of the image, indexed by i and j.
# filters: A 3D array that stores the filters to be applied to the image regions.
# Operation:
# im_region_dict[i][j]: Retrieves the image region at position (i, j) from the dictionary.
# * filters: Performs element-wise multiplication between the image region and the filters. This is equivalent to a convolution operation without the sliding window.
# np.sum(..., axis=(1, 2)): Computes the sum of the multiplied values along the second and third axes (i.e., the spatial dimensions of the filters). This reduces the dimensionality of the output.

In [66]:
np.sum(im_region_dict[0][0] * filters, axis=(1, 2)) 

array([ 29.21824647,  -8.80394794,   7.929368  ,   1.28712125,
        -5.21329086,   0.66829655,  13.87566865, -27.0219215 ])

In [67]:
for i in range(output.shape[0]):
    for j in range(output.shape[1]):
        output[i, j] = np.sum(im_region_dict[i][j] * filters, axis=(1, 2))

In [68]:
output[0]

array([[ 29.21824647,  -8.80394794,   7.929368  ,   1.28712125,
         -5.21329086,   0.66829655,  13.87566865, -27.0219215 ],
       [ 29.45810469,  -8.85865404,   7.95619016,   1.16211095,
         -5.19757136,   0.50341951,  13.76903166, -27.30852857],
       [ 29.35419798,  -8.77074597,   7.80372963,   1.01275204,
         -5.16827631,   0.30142965,  13.51385232, -27.06290427],
       [ 29.07609394,  -8.56437355,   7.85779185,   0.85401561,
         -5.01985825,   0.28083062,  13.22775248, -27.11067673],
       [ 28.76065847,  -8.68667205,   7.75743949,   1.12485664,
         -5.03172182,   0.20896259,  13.24491405, -26.96549127],
       [ 28.80005243,  -8.58376816,   7.9486162 ,   1.1491968 ,
         -4.99051606,   0.6118302 ,  13.35954619, -26.68444706],
       [ 28.90732961,  -8.68546359,   8.0421391 ,   1.2526311 ,
         -5.112965  ,   0.52155536,  13.54808044, -26.99681951],
       [ 28.86511714,  -8.55224279,   8.03863949,   1.51040272,
         -5.42971008,   0.4364768

![image.png](attachment:image.png)

In [69]:
output.shape  # 8 chanel (n1) for 8 filters before max pooling

(26, 26, 8)

this is the output from first conv layer

max pooling layer

In [70]:
# A Max Pooling layer using a pool size of 2.
# A Max Pooling layer using a pool size of 2.

In [71]:
h, w, _ = output.shape

![image.png](attachment:image.png)

In [72]:
new_h = h // 2
new_w = w // 2

In [73]:
new_h

13

In [74]:
new_w

13

In [75]:
# for i in range(new_h):
#       for j in range(new_w):
#         im_region = output[(i * 2):(i * 2 + 2), (j * 2):(j * 2 + 2)]
#         # yield im_region, i, j

In [76]:
pool_dict = {}

![image.png](attachment:image.png)

In [77]:
output[(0 * 2):(0 * 2 + 2), (0 * 2):(0 * 2 + 2)]

array([[[ 29.21824647,  -8.80394794,   7.929368  ,   1.28712125,
          -5.21329086,   0.66829655,  13.87566865, -27.0219215 ],
        [ 29.45810469,  -8.85865404,   7.95619016,   1.16211095,
          -5.19757136,   0.50341951,  13.76903166, -27.30852857]],

       [[ 29.21824647,  -8.80394794,   7.929368  ,   1.28712125,
          -5.21329086,   0.66829655,  13.87566865, -27.0219215 ],
        [ 29.45810469,  -8.85865404,   7.95619016,   1.16211095,
          -5.19757136,   0.50341951,  13.76903166, -27.30852857]]])

In [78]:
for i in range(new_h):
      row_dict = {}
      for j in range(new_w):
        im_region = output[(i * 2):(i * 2 + 2), (j * 2):(j * 2 + 2)]
        # print(f'Region {i}, {j}: {im_region}') 
        row_dict[j] = im_region
      pool_dict[i] = row_dict
        # yield im_region, i, j  # we will use yield to get the values one by one on each iteration or funtion call

In [79]:
pool_dict[0][0]


array([[[ 29.21824647,  -8.80394794,   7.929368  ,   1.28712125,
          -5.21329086,   0.66829655,  13.87566865, -27.0219215 ],
        [ 29.45810469,  -8.85865404,   7.95619016,   1.16211095,
          -5.19757136,   0.50341951,  13.76903166, -27.30852857]],

       [[ 29.21824647,  -8.80394794,   7.929368  ,   1.28712125,
          -5.21329086,   0.66829655,  13.87566865, -27.0219215 ],
        [ 29.45810469,  -8.85865404,   7.95619016,   1.16211095,
          -5.19757136,   0.50341951,  13.76903166, -27.30852857]]])

In [80]:
h, w, num_filters = output.shape

In [81]:
pool_output = np.zeros((h // 2, w // 2, num_filters))

In [82]:
pool_output

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

In [83]:
pool_dict[0][0]

array([[[ 29.21824647,  -8.80394794,   7.929368  ,   1.28712125,
          -5.21329086,   0.66829655,  13.87566865, -27.0219215 ],
        [ 29.45810469,  -8.85865404,   7.95619016,   1.16211095,
          -5.19757136,   0.50341951,  13.76903166, -27.30852857]],

       [[ 29.21824647,  -8.80394794,   7.929368  ,   1.28712125,
          -5.21329086,   0.66829655,  13.87566865, -27.0219215 ],
        [ 29.45810469,  -8.85865404,   7.95619016,   1.16211095,
          -5.19757136,   0.50341951,  13.76903166, -27.30852857]]])

In [84]:
np.amax(pool_dict[0][0] , axis=(0,1))

array([ 29.45810469,  -8.80394794,   7.95619016,   1.28712125,
        -5.19757136,   0.66829655,  13.87566865, -27.0219215 ])

In [85]:
for i in range(pool_output.shape[0]):
    for j in range(pool_output.shape[1]):
        pool_output[i, j] = np.amax(pool_dict[i][j] , axis=(0,1))

In [86]:
# for im_region, i, j in iterate_regions(input):
#     output[i, j] = np.amax(im_region, axis=(0, 1))

In [87]:
pool_output

array([[[ 29.45810469,  -8.80394794,   7.95619016, ...,   0.66829655,
          13.87566865, -27.0219215 ],
        [ 29.35419798,  -8.56437355,   8.01760638, ...,   0.30142965,
          13.51385232, -27.06290427],
        [ 28.93193462,  -8.49562651,   8.04787468, ...,   0.75711124,
          13.4783386 , -26.68444706],
        ...,
        [ 29.79838231,  -8.99264867,   8.00634401, ...,   0.44305956,
          13.80889296, -27.49329205],
        [ 29.69787241,  -8.84085012,   8.06171484, ...,   0.49371378,
          13.79346672, -27.2252018 ],
        [ 29.59496785,  -8.72512881,   8.30304015, ...,   0.61924783,
          13.90858687, -27.25205641]],

       [[ 29.45810469,  -8.80394794,   7.95619016, ...,   0.66829655,
          13.87566865, -27.0219215 ],
        [ 29.35419798,  -8.77074597,   7.88468578, ...,   0.30142965,
          13.51385232, -27.06290427],
        [ 29.01800485,  -8.57978597,   8.07739643, ...,   0.55577272,
          13.61593257, -26.95360176],
        ...,


In [88]:
pool_output.shape

(13, 13, 8)

In [89]:
pool_output.shape[0]

13

neural networks and


activation layer (softmax)

In [90]:
input_len =pool_output.shape[0]*pool_output.shape[1]*pool_output.shape[2]                   # 13 * 13 * 8
nodes = 10 # output nodes or no of class

In [91]:
# A standard fully-connected layer with softmax activation.
weights = np.random.randn(input_len, nodes) / input_len

In [92]:
weights  # w

array([[ 1.16801968e-04,  8.32208635e-04, -3.97271508e-04, ...,
         3.60697411e-04, -1.14725422e-03,  5.55566533e-04],
       [ 8.17482205e-04,  9.26980579e-04, -6.48787154e-04, ...,
         2.01856332e-04, -8.86396526e-04,  2.58187400e-04],
       [-3.68312857e-04,  9.65764843e-05,  3.89297736e-04, ...,
        -1.50043842e-04, -1.21313113e-04, -2.70738818e-04],
       ...,
       [-1.14756344e-03,  3.45887060e-04,  9.14982351e-04, ...,
        -9.98570908e-04, -1.38305354e-04,  1.04748725e-03],
       [-1.50347413e-04, -1.37788495e-03, -1.07347304e-03, ...,
         6.20943979e-04,  4.60444868e-04,  1.94223279e-04],
       [ 1.22009613e-04,  7.58150542e-04, -1.75338953e-04, ...,
         4.97712034e-06,  7.15309044e-04, -1.72464515e-03]],
      shape=(1352, 10))

In [93]:
biases = np.zeros(nodes)

In [94]:
biases

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [95]:
# Performs a forward pass of the softmax layer using the given input.
#     Returns a 1d numpy array containing the respective probability values.
#     - input can be any array with any dimensions.

![image.png](attachment:image.png)

![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [96]:
last_input_shape = pool_output.shape

In [97]:
last_input_shape[0]

13

![image.png](attachment:image.png)

In [98]:
input = pool_output.flatten()


In [99]:
input

array([ 29.45810469,  -8.80394794,   7.95619016, ...,   0.52455287,
        14.34257294, -28.58086799], shape=(1352,))

In [100]:
input.shape

(1352,)

neural network layer  for one hidden layer

In [101]:
input_len, nodes = weights.shape

In [102]:
input_len

1352

In [103]:
nodes

10

In [104]:
pool_output

array([[[ 29.45810469,  -8.80394794,   7.95619016, ...,   0.66829655,
          13.87566865, -27.0219215 ],
        [ 29.35419798,  -8.56437355,   8.01760638, ...,   0.30142965,
          13.51385232, -27.06290427],
        [ 28.93193462,  -8.49562651,   8.04787468, ...,   0.75711124,
          13.4783386 , -26.68444706],
        ...,
        [ 29.79838231,  -8.99264867,   8.00634401, ...,   0.44305956,
          13.80889296, -27.49329205],
        [ 29.69787241,  -8.84085012,   8.06171484, ...,   0.49371378,
          13.79346672, -27.2252018 ],
        [ 29.59496785,  -8.72512881,   8.30304015, ...,   0.61924783,
          13.90858687, -27.25205641]],

       [[ 29.45810469,  -8.80394794,   7.95619016, ...,   0.66829655,
          13.87566865, -27.0219215 ],
        [ 29.35419798,  -8.77074597,   7.88468578, ...,   0.30142965,
          13.51385232, -27.06290427],
        [ 29.01800485,  -8.57978597,   8.07739643, ...,   0.55577272,
          13.61593257, -26.95360176],
        ...,


![image-2.png](attachment:image-2.png)

In [105]:
all_class_output = np.dot(input, weights) + biases 

In [106]:
all_class_output 

array([ 0.35333665,  0.40912535, -0.22820773,  0.35323881,  0.18017952,
       -1.38450016, -0.50026854, -0.59583091,  0.06343068,  0.16050995])

In [107]:
exp = np.exp(all_class_output) # = z probability values for each class

In [108]:
exp # probability values for each class 

array([1.42381039, 1.50550042, 0.7959589 , 1.42367109, 1.1974323 ,
       0.25044895, 0.6063678 , 0.55110446, 1.06548563, 1.17410945])

In [109]:
np.sum(exp, axis=0) # total probability is 1 

np.float64(9.99388940028729)

Softmax Function:

σ(z) = exp(z[i]) / Σ exp(z[i])

exp(z) is the exponential function applied element-wise to z

Σ exp(z) is the sum of the exponentials of z

In [110]:
soft_out = exp / np.sum(exp, axis=0)   # softmax probability

In [111]:
soft_out # sigma

array([0.1424681 , 0.15064209, 0.07964456, 0.14245416, 0.11981645,
       0.02506021, 0.06067386, 0.05514414, 0.10661371, 0.11748273])

In [112]:
pred = np.argmax(soft_out, axis=0)

In [113]:
pred

np.int64(1)

cross_entropy_loss and accuracy 

In [114]:
label = 0 # let true label is 0

In [115]:
soft_out[label] # this is the prediction probability for label 0

np.float64(0.14246809509733757)

In [116]:
# gradient[label] = -1 / out[label]
# This is the derivative of the log loss function (also known as cross-entropy loss) with respect to the output of the model.
# Log Loss Function:
# The log loss function is defined as:
# L(y, y') = -∑(y * log(y'))
# where y is the true label, y' is the predicted probability, and the sum is taken over all classes.
# Derivative:
# The derivative of the log loss function with respect to the output y' is:
# dL/dy' = -y / y'
# In this specific case, y is 1 for the true label and 0 for all other classes. Therefore, the derivative simplifies to:
# dL/dy' = -1 / y' for the true label
# This is exactly what the code calculates and stores in the gradient array.

log loss

In [117]:
loss = -1 * np.log(soft_out[label]) # loss of predition for label 0  L(y, y') = -∑(y * log(y')) 
# y =1 , y' = 0.08263477573609264

In [118]:
loss

np.float64(1.9486371983952187)

In [119]:
acc = 1 if pred == label else 0

In [120]:
acc

0

training layer

In [121]:
nodes # node in hidden layer

10

In [122]:
gradients = np.zeros(nodes)

In [123]:
gradients

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [124]:
# loss = - 1 * np.log(soft_out[label]) # loss of predition for label 0  L(y, y') = -∑(y * log(y'))

partial derivatives

In [125]:
-1/soft_out[label]   #  then dL/dy' = -y / y'  ; y=1 actual probability 1 for label 0 prediction and 
# y' = 0.08263477573609264 predicted probability  0.08263477573609264 for label 0 

np.float64(-7.019115397849437)

In [126]:
gradients[label] = -1 / soft_out[label]

In [127]:
gradients # calculated for only  class 0  or label 0 dL_dy' or dL/dy'

array([-7.0191154,  0.       ,  0.       ,  0.       ,  0.       ,
        0.       ,  0.       ,  0.       ,  0.       ,  0.       ])

back propagration start   : reverse direction : softmax > all_class_output > max_pool > convolution

forward propagration : convolution > max_pool > all_class__output > softmax

In [128]:
#  Performs a backward pass of the softmax layer.
#     Returns the loss gradient for this layer's inputs.
#     - d_L_d_out is the loss gradient for this layer's outputs.
#     - learn_rate is a float.

In [129]:
 # only 1 element of dL/dy' will be nonzero  or dl_dy' gradient list
for i, grad in enumerate(gradients):
    if grad == 0:
      continue
    print(i,":", grad)

0 : -7.019115397849437


In [130]:
# dL_dw = dL_dy' * dy'_dσ * dσ_dz * dz_dw   # weight update for neural network
# dL_db = dL_dy' *dy'_dσ * dσ_dz * dz_db

# dw = weight
# db = bias
# dL = loss
# dy' = predict = dout
# dσ = for softmax = dexpt
# dz = all_class_output
# dw = weight for neural network


In [131]:
# dy'_dσ = d/dσ(y') = 0 as y' onstant probability the this part will be neglected it makes all 0

In [132]:
# dL_dw = dL_dy' * dσ_dz * dz_dw 
# dL_db =dL_dy' * dσ_dz * dz_db

Softmax Function:

σ(z) = exp(z) / Σ exp(z)

In [133]:
dexp =  np.exp(all_class_output) # z = all_class_output 

In [134]:
dexp # exp(z)

array([1.42381039, 1.50550042, 0.7959589 , 1.42367109, 1.1974323 ,
       0.25044895, 0.6063678 , 0.55110446, 1.06548563, 1.17410945])

In [135]:
dexp[0] # for class 0

np.float64(1.4238103854724038)

In [136]:
sum_exp = np.sum(dexp)

In [137]:
sum_exp # Σ exp(z)

np.float64(9.99388940028729)

In [138]:
# σ(z) = exp(z) / Σ exp(z)
# where:
# σ(z) is the softmax function
# z is the input vector
# exp(z) is the exponential of the input z
# Σ exp(z) is the sum of the exponentials of all inputs z

In [139]:
# Calculates the derivative of the softmax output with respect to the all_class_output(z) for all classes except the i-th class.
# Derivative of Softmax Function:

# Step 1: Apply the Quotient Rule
# To find the derivative of the softmax function, we'll apply the quotient rule:

# σ(z) = exp(z) / Σ exp(z)

## σ(z) = exp(z) / Σ exp(z)  

# d(a/b)/dx = [b(da/dx) - a(da/dx)] / b^2

# partial derivatives # except variable term rest term are constant and derivatives will be 0

# (∂/∂z (∑exp(z))) = (∂/∂z (exp(x) + exp(y) + exp(z))) = (exp(z)) # x,y are constant w r t z 

# ∂σ/∂z        = [{(∂/∂z (exp(z))) * (∑exp(z))} - {(exp(z)) * (∂/∂z (∑exp(z)))}] / (∑exp(z))^2
#              = [{(exp(z)) * (∑exp(z))} - {(exp(z)) * (exp(z))}] / (∑exp(z))^2
#              = (exp(z) * [ (∑exp(z)) - exp(z) ] / (∑exp(z))^2
#∂σ[i]_dz[i] = dexp[i] * (sum_exp - dexp[i]) / (sum_exp ** 2)  # i == j



#∂σ[j]_dz[i] = -dexp[i] * dexp[j]) / (sum_exp ** 2)  # i != j   1st derivative  (∂/∂z[i] (exp(z[j])) will be 0 , 
# exp(z[j] is constant wrt ∂z[i]


# ∂σ/∂z = σ(z) * (1 - σ(z))

In [140]:
dexpt_dz = -dexp[0] * dexp / (sum_exp ** 2)

In [141]:
dexpt_dz

array([-0.02029716, -0.02146169, -0.01134681, -0.02029517, -0.01707002,
       -0.00357028, -0.00864409, -0.00785628, -0.01518905, -0.01673754])

In [142]:
# dy'_dexpt = 
dexpt_dz[0] = dexp[0] * (sum_exp - dexp[0]) / (sum_exp ** 2) # update value for 0 class

In [143]:
dexpt_dz[0]

np.float64(0.12217093697667357)

In [144]:
dexpt_dz

array([ 0.12217094, -0.02146169, -0.01134681, -0.02029517, -0.01707002,
       -0.00357028, -0.00864409, -0.00785628, -0.01518905, -0.01673754])

z = w * x + b

In [145]:
# dz_dw > derivative of all class output values (z) with respect to weights (w)
# z = w * x + b
# parttal derivative of all class output values (z) with respect to
# dz_dw = x  # input values x
# dz_db = 1 
# dz_dx = w # weights w

In [146]:
dz_dw = input

In [147]:
dz_dw

array([ 29.45810469,  -8.80394794,   7.95619016, ...,   0.52455287,
        14.34257294, -28.58086799], shape=(1352,))

In [148]:
dz_db = 1

In [149]:
dz_dx = weights

In [150]:
dz_dx

array([[ 1.16801968e-04,  8.32208635e-04, -3.97271508e-04, ...,
         3.60697411e-04, -1.14725422e-03,  5.55566533e-04],
       [ 8.17482205e-04,  9.26980579e-04, -6.48787154e-04, ...,
         2.01856332e-04, -8.86396526e-04,  2.58187400e-04],
       [-3.68312857e-04,  9.65764843e-05,  3.89297736e-04, ...,
        -1.50043842e-04, -1.21313113e-04, -2.70738818e-04],
       ...,
       [-1.14756344e-03,  3.45887060e-04,  9.14982351e-04, ...,
        -9.98570908e-04, -1.38305354e-04,  1.04748725e-03],
       [-1.50347413e-04, -1.37788495e-03, -1.07347304e-03, ...,
         6.20943979e-04,  4.60444868e-04,  1.94223279e-04],
       [ 1.22009613e-04,  7.58150542e-04, -1.75338953e-04, ...,
         4.97712034e-06,  7.15309044e-04, -1.72464515e-03]],
      shape=(1352, 10))

In [151]:
gradients

array([-7.0191154,  0.       ,  0.       ,  0.       ,  0.       ,
        0.       ,  0.       ,  0.       ,  0.       ,  0.       ])

In [152]:
dexpt_dz

array([ 0.12217094, -0.02146169, -0.01134681, -0.02029517, -0.01707002,
       -0.00357028, -0.00864409, -0.00785628, -0.01518905, -0.01673754])

In [153]:
dz_dw

array([ 29.45810469,  -8.80394794,   7.95619016, ...,   0.52455287,
        14.34257294, -28.58086799], shape=(1352,))

In [154]:
dz_dw[np.newaxis]

array([[ 29.45810469,  -8.80394794,   7.95619016, ...,   0.52455287,
         14.34257294, -28.58086799]], shape=(1, 1352))

In [155]:
dz_dw[np.newaxis].T

array([[ 29.45810469],
       [ -8.80394794],
       [  7.95619016],
       ...,
       [  0.52455287],
       [ 14.34257294],
       [-28.58086799]], shape=(1352, 1))

In [156]:
gradients * dexpt_dz # element wise multiplication

array([-0.8575319, -0.       , -0.       , -0.       , -0.       ,
       -0.       , -0.       , -0.       , -0.       , -0.       ])

In [157]:
(gradients * dexpt_dz)[np.newaxis]

array([[-0.8575319, -0.       , -0.       , -0.       , -0.       ,
        -0.       , -0.       , -0.       , -0.       , -0.       ]])

In [158]:
dz_dw[np.newaxis].T.shape

(1352, 1)

In [159]:
(gradients * dexpt_dz)[np.newaxis].shape

(1, 10)

In [160]:
# dL_dw = dL_dy' * dσ_dz * dz_dw 
dL_dw = (gradients * dexpt_dz)[np.newaxis].T * dz_dw[np.newaxis] # (1352, 1) x (1, 10) = (1352, 10)

In [166]:
dL_dw

array([[-25.26126463,   7.54966625,  -6.8226869 , ...,  -0.44982082,
        -12.2992139 ,  24.50900617],
       [ -0.        ,   0.        ,  -0.        , ...,  -0.        ,
         -0.        ,   0.        ],
       [ -0.        ,   0.        ,  -0.        , ...,  -0.        ,
         -0.        ,   0.        ],
       ...,
       [ -0.        ,   0.        ,  -0.        , ...,  -0.        ,
         -0.        ,   0.        ],
       [ -0.        ,   0.        ,  -0.        , ...,  -0.        ,
         -0.        ,   0.        ],
       [ -0.        ,   0.        ,  -0.        , ...,  -0.        ,
         -0.        ,   0.        ]], shape=(10, 1352))

In [167]:
dL_dw.shape

(10, 1352)

In [161]:
# dL_db =dL_dy' * dσ_dz * dz_db
dL_db = (gradients * dexpt_dz) * dz_db

In [168]:
dL_db

array([-0.8575319, -0.       , -0.       , -0.       , -0.       ,
       -0.       , -0.       , -0.       , -0.       , -0.       ])

In [169]:
dL_db.shape

(10,)

In [162]:
# learning_rate = 
lr = 0.01

In [163]:
old_weights = weights.copy()

In [164]:
old_weights

array([[ 1.16801968e-04,  8.32208635e-04, -3.97271508e-04, ...,
         3.60697411e-04, -1.14725422e-03,  5.55566533e-04],
       [ 8.17482205e-04,  9.26980579e-04, -6.48787154e-04, ...,
         2.01856332e-04, -8.86396526e-04,  2.58187400e-04],
       [-3.68312857e-04,  9.65764843e-05,  3.89297736e-04, ...,
        -1.50043842e-04, -1.21313113e-04, -2.70738818e-04],
       ...,
       [-1.14756344e-03,  3.45887060e-04,  9.14982351e-04, ...,
        -9.98570908e-04, -1.38305354e-04,  1.04748725e-03],
       [-1.50347413e-04, -1.37788495e-03, -1.07347304e-03, ...,
         6.20943979e-04,  4.60444868e-04,  1.94223279e-04],
       [ 1.22009613e-04,  7.58150542e-04, -1.75338953e-04, ...,
         4.97712034e-06,  7.15309044e-04, -1.72464515e-03]],
      shape=(1352, 10))

In [170]:
lr * dL_dw

array([[-0.25261265,  0.07549666, -0.06822687, ..., -0.00449821,
        -0.12299214,  0.24509006],
       [-0.        ,  0.        , -0.        , ..., -0.        ,
        -0.        ,  0.        ],
       [-0.        ,  0.        , -0.        , ..., -0.        ,
        -0.        ,  0.        ],
       ...,
       [-0.        ,  0.        , -0.        , ..., -0.        ,
        -0.        ,  0.        ],
       [-0.        ,  0.        , -0.        , ..., -0.        ,
        -0.        ,  0.        ],
       [-0.        ,  0.        , -0.        , ..., -0.        ,
        -0.        ,  0.        ]], shape=(10, 1352))

In [171]:
(lr * dL_dw).shape

(10, 1352)

In [172]:

# Update weights / biases
weights -= (lr * dL_dw).T

In [173]:
weights

array([[ 2.52729448e-01,  8.32208635e-04, -3.97271508e-04, ...,
         3.60697411e-04, -1.14725422e-03,  5.55566533e-04],
       [-7.46791803e-02,  9.26980579e-04, -6.48787154e-04, ...,
         2.01856332e-04, -8.86396526e-04,  2.58187400e-04],
       [ 6.78585562e-02,  9.65764843e-05,  3.89297736e-04, ...,
        -1.50043842e-04, -1.21313113e-04, -2.70738818e-04],
       ...,
       [ 3.35064479e-03,  3.45887060e-04,  9.14982351e-04, ...,
        -9.98570908e-04, -1.38305354e-04,  1.04748725e-03],
       [ 1.22841792e-01, -1.37788495e-03, -1.07347304e-03, ...,
         6.20943979e-04,  4.60444868e-04,  1.94223279e-04],
       [-2.44968052e-01,  7.58150542e-04, -1.75338953e-04, ...,
         4.97712034e-06,  7.15309044e-04, -1.72464515e-03]],
      shape=(1352, 10))

In [174]:
biases -= lr * dL_db

In [175]:
biases

array([0.00857532, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ])

back propagration in maxpooling layer  

z = w * x + b

In [184]:
# old weights
dz_dx


array([[ 2.52729448e-01,  8.32208635e-04, -3.97271508e-04, ...,
         3.60697411e-04, -1.14725422e-03,  5.55566533e-04],
       [-7.46791803e-02,  9.26980579e-04, -6.48787154e-04, ...,
         2.01856332e-04, -8.86396526e-04,  2.58187400e-04],
       [ 6.78585562e-02,  9.65764843e-05,  3.89297736e-04, ...,
        -1.50043842e-04, -1.21313113e-04, -2.70738818e-04],
       ...,
       [ 3.35064479e-03,  3.45887060e-04,  9.14982351e-04, ...,
        -9.98570908e-04, -1.38305354e-04,  1.04748725e-03],
       [ 1.22841792e-01, -1.37788495e-03, -1.07347304e-03, ...,
         6.20943979e-04,  4.60444868e-04,  1.94223279e-04],
       [-2.44968052e-01,  7.58150542e-04, -1.75338953e-04, ...,
         4.97712034e-06,  7.15309044e-04, -1.72464515e-03]],
      shape=(1352, 10))

In [None]:
# loss with respect to input (x) to neural network
# dL_dx = dL_dy' * dσ_dz * dz_dx

In [186]:
(gradients * dexpt_dz).shape

(10,)

In [187]:
dz_dx.shape

(1352, 10)

In [193]:
# flatten the output
input  # output of maxpool layer

array([ 29.45810469,  -8.80394794,   7.95619016, ...,   0.52455287,
        14.34257294, -28.58086799], shape=(1352,))

In [189]:
dL_dx = dz_dx @  (gradients * dexpt_dz) # matrix multiplication like dot

In [190]:
dL_dx # output gradients of maxpooling layer

array([-0.21672357,  0.06403978, -0.05819088, ..., -0.00287328,
       -0.10534076,  0.21006792], shape=(1352,))

In [None]:
#  Performs a backward pass of the maxpool layer.
#  Returns the loss gradient for this layer's inputs.
#  d_L_d_out is the loss gradient for this layer's outputs.

In [192]:
pool_output

array([[[ 29.45810469,  -8.80394794,   7.95619016, ...,   0.66829655,
          13.87566865, -27.0219215 ],
        [ 29.35419798,  -8.56437355,   8.01760638, ...,   0.30142965,
          13.51385232, -27.06290427],
        [ 28.93193462,  -8.49562651,   8.04787468, ...,   0.75711124,
          13.4783386 , -26.68444706],
        ...,
        [ 29.79838231,  -8.99264867,   8.00634401, ...,   0.44305956,
          13.80889296, -27.49329205],
        [ 29.69787241,  -8.84085012,   8.06171484, ...,   0.49371378,
          13.79346672, -27.2252018 ],
        [ 29.59496785,  -8.72512881,   8.30304015, ...,   0.61924783,
          13.90858687, -27.25205641]],

       [[ 29.45810469,  -8.80394794,   7.95619016, ...,   0.66829655,
          13.87566865, -27.0219215 ],
        [ 29.35419798,  -8.77074597,   7.88468578, ...,   0.30142965,
          13.51385232, -27.06290427],
        [ 29.01800485,  -8.57978597,   8.07739643, ...,   0.55577272,
          13.61593257, -26.95360176],
        ...,


reverse flattened_output gradient

In [217]:
dL_dxf = dL_dx.reshape(pool_output.shape)

In [218]:
dL_dxf

array([[[-0.21672357,  0.06403978, -0.05819088, ..., -0.00403915,
         -0.10318057,  0.19982735],
        [-0.21631459,  0.06304125, -0.05853247, ..., -0.002482  ,
         -0.10020062,  0.19923994],
        [-0.21110338,  0.06277346, -0.05890076, ..., -0.00452989,
         -0.09860979,  0.19678793],
        ...,
        [-0.2186728 ,  0.06757028, -0.0592361 , ..., -0.00346725,
         -0.10225121,  0.20283806],
        [-0.21868603,  0.06474749, -0.05835423, ..., -0.0041275 ,
         -0.10174925,  0.20103168],
        [-0.21749589,  0.06477065, -0.06091043, ..., -0.00423039,
         -0.10287437,  0.20104054]],

       [[-0.21744843,  0.06481306, -0.05898578, ..., -0.0047006 ,
         -0.10096549,  0.19951292],
        [-0.21477726,  0.06429584, -0.05780282, ..., -0.00213101,
         -0.09981037,  0.19949597],
        [-0.21238172,  0.06313045, -0.05922796, ..., -0.00348103,
         -0.10003597,  0.19870952],
        ...,
        [-0.21908979,  0.06521613, -0.05988829, ..., -

In [237]:
dL_dinput = np.zeros(output.shape) # input gradients of max pool layer output of conv layer

In [238]:
dL_dinput 

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

In [197]:
pool_output # out put of max pool layer

array([[[ 29.45810469,  -8.80394794,   7.95619016, ...,   0.66829655,
          13.87566865, -27.0219215 ],
        [ 29.35419798,  -8.56437355,   8.01760638, ...,   0.30142965,
          13.51385232, -27.06290427],
        [ 28.93193462,  -8.49562651,   8.04787468, ...,   0.75711124,
          13.4783386 , -26.68444706],
        ...,
        [ 29.79838231,  -8.99264867,   8.00634401, ...,   0.44305956,
          13.80889296, -27.49329205],
        [ 29.69787241,  -8.84085012,   8.06171484, ...,   0.49371378,
          13.79346672, -27.2252018 ],
        [ 29.59496785,  -8.72512881,   8.30304015, ...,   0.61924783,
          13.90858687, -27.25205641]],

       [[ 29.45810469,  -8.80394794,   7.95619016, ...,   0.66829655,
          13.87566865, -27.0219215 ],
        [ 29.35419798,  -8.77074597,   7.88468578, ...,   0.30142965,
          13.51385232, -27.06290427],
        [ 29.01800485,  -8.57978597,   8.07739643, ...,   0.55577272,
          13.61593257, -26.95360176],
        ...,


In [None]:
# for i in range(new_h):
#       row_dict = {}
#       for j in range(new_w):
#         im_region = output[(i * 2):(i * 2 + 2), (j * 2):(j * 2 + 2)]
#         # print(f'Region {i}, {j}: {im_region}') 
#         row_dict[j] = im_region
#       pool_dict[i] = row_dict
#         # yield im_region, i, j  # we will use yield to get the values one by one on each iteration or funtion call

In [198]:
output # output of convolution layer layer and input of max pool layer

array([[[ 29.21824647,  -8.80394794,   7.929368  , ...,   0.66829655,
          13.87566865, -27.0219215 ],
        [ 29.45810469,  -8.85865404,   7.95619016, ...,   0.50341951,
          13.76903166, -27.30852857],
        [ 29.35419798,  -8.77074597,   7.80372963, ...,   0.30142965,
          13.51385232, -27.06290427],
        ...,
        [ 29.4547246 ,  -8.84085012,   7.96074512, ...,   0.49371378,
          13.66869241, -27.63521031],
        [ 29.55889265,  -8.74600373,   7.9384627 , ...,   0.61924783,
          13.90858687, -27.3960941 ],
        [ 29.59496785,  -8.72512881,   8.30304015, ...,   0.38418876,
          13.74225353, -27.48341691]],

       [[ 29.21824647,  -8.80394794,   7.929368  , ...,   0.66829655,
          13.87566865, -27.0219215 ],
        [ 29.45810469,  -8.85865404,   7.95619016, ...,   0.50341951,
          13.76903166, -27.30852857],
        [ 29.35419798,  -8.77074597,   7.80372963, ...,   0.30142965,
          13.51385232, -27.06290427],
        ...,


In [207]:
pool_dict

{0: {0: array([[[ 29.21824647,  -8.80394794,   7.929368  ,   1.28712125,
            -5.21329086,   0.66829655,  13.87566865, -27.0219215 ],
          [ 29.45810469,  -8.85865404,   7.95619016,   1.16211095,
            -5.19757136,   0.50341951,  13.76903166, -27.30852857]],
  
         [[ 29.21824647,  -8.80394794,   7.929368  ,   1.28712125,
            -5.21329086,   0.66829655,  13.87566865, -27.0219215 ],
          [ 29.45810469,  -8.85865404,   7.95619016,   1.16211095,
            -5.19757136,   0.50341951,  13.76903166, -27.30852857]]]),
  1: array([[[ 29.35419798,  -8.77074597,   7.80372963,   1.01275204,
            -5.16827631,   0.30142965,  13.51385232, -27.06290427],
          [ 29.07609394,  -8.56437355,   7.85779185,   0.85401561,
            -5.01985825,   0.28083062,  13.22775248, -27.11067673]],
  
         [[ 29.35419798,  -8.77074597,   7.80372963,   1.01275204,
            -5.16827631,   0.30142965,  13.51385232, -27.06290427],
          [ 29.06497112,  -8.644987

In [229]:
pool_dict[0][0][0][0] # only for first region

array([ 29.21824647,  -8.80394794,   7.929368  ,   1.28712125,
        -5.21329086,   0.66829655,  13.87566865, -27.0219215 ])

In [225]:
# for i in range(pool_output.shape[0]):
#     for j in range(pool_output.shape[1]):
#         amx = np.amax(pool_dict[i][j] , axis=(0,1))

In [243]:
# amx # last region max value

![image.png](attachment:image.png)

In [241]:
# The line amax = np.amax(im_region, axis=(0, 1)) finds the maximum values in each feature map of the current region.
amx = np.amax(pool_dict[0][0] , axis=(0,1)) # first region max value

In [242]:
amx

array([ 29.45810469,  -8.80394794,   7.95619016,   1.28712125,
        -5.19757136,   0.66829655,  13.87566865, -27.0219215 ])

In [214]:
pool_dict[0][0].shape

(2, 2, 8)

In [219]:
dL_dxf.shape

(13, 13, 8)

In [236]:
pool_output.shape

(13, 13, 8)

In [247]:
dL_dinput.shape

(26, 26, 8)

In [None]:
# Gradient Routing:
# This line of code is responsible for routing the gradient from the output of the max pooling layer back to the input pixels that contributed to the output.
# Condition:
# The condition im_region[i2, j2, f2] == amax[f2] checks whether the current pixel value im_region[i2, j2, f2] is equal to the maximum value amax[f2] in its feature map.
# Gradient Assignment:
# If the condition is true, the code assigns the gradient value d_L_d_out[i, j, f2] to the corresponding input pixel d_L_d_input[i * 2 + i2, j * 2 + j2, f2]

In [244]:
# only for first region
for i2 in range(2):
        for j2 in range(2):
          for f2 in range(8):
            # If this pixel was the max value, copy the gradient to it.
            if pool_dict[0][0][0][0][f2] == amx[f2]:
              print(amx[f2])
              dL_dinput[0 * 2 + i2, 0 * 2 + j2, f2] = dL_dxf[0, 0, f2]  # i = 0, j = 0
              # copy (13,13,8) dL_dxf to (14, 14, 8) dL_dinput

-8.803947940733803
1.2871212500108342
0.668296546502761
13.87566864572048
-27.021921497273198
-8.803947940733803
1.2871212500108342
0.668296546502761
13.87566864572048
-27.021921497273198
-8.803947940733803
1.2871212500108342
0.668296546502761
13.87566864572048
-27.021921497273198
-8.803947940733803
1.2871212500108342
0.668296546502761
13.87566864572048
-27.021921497273198


In [245]:
dL_dxf

array([[[-0.21672357,  0.06403978, -0.05819088, ..., -0.00403915,
         -0.10318057,  0.19982735],
        [-0.21631459,  0.06304125, -0.05853247, ..., -0.002482  ,
         -0.10020062,  0.19923994],
        [-0.21110338,  0.06277346, -0.05890076, ..., -0.00452989,
         -0.09860979,  0.19678793],
        ...,
        [-0.2186728 ,  0.06757028, -0.0592361 , ..., -0.00346725,
         -0.10225121,  0.20283806],
        [-0.21868603,  0.06474749, -0.05835423, ..., -0.0041275 ,
         -0.10174925,  0.20103168],
        [-0.21749589,  0.06477065, -0.06091043, ..., -0.00423039,
         -0.10287437,  0.20104054]],

       [[-0.21744843,  0.06481306, -0.05898578, ..., -0.0047006 ,
         -0.10096549,  0.19951292],
        [-0.21477726,  0.06429584, -0.05780282, ..., -0.00213101,
         -0.09981037,  0.19949597],
        [-0.21238172,  0.06313045, -0.05922796, ..., -0.00348103,
         -0.10003597,  0.19870952],
        ...,
        [-0.21908979,  0.06521613, -0.05988829, ..., -

In [246]:
dL_dinput # loss with respect to max pool inputs or output of conv layer due to first region

array([[[ 0.        ,  0.06403978,  0.        , ..., -0.00403915,
         -0.10318057,  0.19982735],
        [ 0.        ,  0.06403978,  0.        , ..., -0.00403915,
         -0.10318057,  0.19982735],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]],

       [[ 0.        ,  0.06403978,  0.        , ..., -0.00403915,
         -0.10318057,  0.19982735],
        [ 0.        ,  0.06403978,  0.        , ..., -0.00403915,
         -0.10318057,  0.19982735],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  

In [None]:
# Performs a backward pass of the conv layer.
#     - d_L_d_out is the loss gradient for this layer's outputs. which is dL_dinput in max pool layer
#     - learn_rate is a float.

# dL_dinput = dL_dout ( loss gradient w rt output of conv or input of max pool layer)

to update filters values

In [255]:
# dL_dfilter = dL_dinput * dinput_dfilter

dL_dfilter = np.zeros(filters.shape) # initialize

In [256]:
dL_dfilter

array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]])

In [252]:
resized_img.shape

(28, 28)

In [253]:
filters.shape

(8, 3, 3)

In [251]:
output.shape

(26, 26, 8)

In [254]:
im_region_dict

{0: {0: array([[54.33333333, 55.33333333, 55.66666667],
         [54.33333333, 55.33333333, 55.66666667],
         [54.33333333, 55.33333333, 55.66666667]]),
  1: array([[55.33333333, 55.66666667, 55.66666667],
         [55.33333333, 55.66666667, 55.66666667],
         [55.33333333, 55.66666667, 55.66666667]]),
  2: array([[55.66666667, 55.66666667, 54.66666667],
         [55.66666667, 55.66666667, 54.66666667],
         [55.66666667, 55.66666667, 54.66666667]]),
  3: array([[55.66666667, 54.66666667, 53.66666667],
         [55.66666667, 54.66666667, 53.66666667],
         [55.66666667, 54.66666667, 54.66666667]]),
  4: array([[54.66666667, 53.66666667, 54.33333333],
         [54.66666667, 53.66666667, 54.33333333],
         [54.66666667, 54.66666667, 54.33333333]]),
  5: array([[53.66666667, 54.33333333, 54.33333333],
         [53.66666667, 54.33333333, 54.33333333],
         [54.66666667, 54.33333333, 54.33333333]]),
  6: array([[54.33333333, 54.33333333, 54.66666667],
         [54.3

In [257]:
num_filters

8

In [261]:
im_region_dict[0][0] # first im_region

array([[54.33333333, 55.33333333, 55.66666667],
       [54.33333333, 55.33333333, 55.66666667],
       [54.33333333, 55.33333333, 55.66666667]])

In [265]:
dL_dinput[0][0][0].shape

()

In [264]:
im_region_dict[0][0].shape

(3, 3)

In [271]:
for f in range(num_filters):
        dL_dfilter[f] += dL_dinput[0, 0, f] * im_region_dict[0][0] # filter update only for first im_region through broadcast

In [272]:
dL_dfilter # loss wrt filter  only for first im_region through broadcast

array([[[ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ]],

       [[ 3.4794947 ,  3.54353448,  3.56488107],
        [ 3.4794947 ,  3.54353448,  3.56488107],
        [ 3.4794947 ,  3.54353448,  3.56488107]],

       [[ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ]],

       [[-0.51428367, -0.52374902, -0.52690413],
        [-0.51428367, -0.52374902, -0.52690413],
        [-0.51428367, -0.52374902, -0.52690413]],

       [[ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ]],

       [[-0.21946024, -0.22349938, -0.22484576],
        [-0.21946024, -0.22349938, -0.22484576],
        [-0.21946024, -0.22349938, -0.22484576]],

       [[-5.60614444, -5.70932501, -5.74371854],
        [-5.60614444, -5.70932501, -5.74371854],
        

In [273]:
old_filters = filters.copy()

In [274]:
old_filters

array([[[ 0.01267095,  0.20470475,  0.09748474],
        [ 0.05568934, -0.03303646, -0.01112282],
        [ 0.10758389,  0.02007382,  0.07619285]],

       [[-0.08498716, -0.00374426, -0.12824152],
        [-0.01429454,  0.00819415, -0.08061402],
        [ 0.0437479 , -0.00196678,  0.10243619]],

       [[-0.1033074 ,  0.08319609, -0.13292061],
        [-0.04424397, -0.06655614,  0.15981453],
        [ 0.20135892, -0.09759611,  0.14350246]],

       [[-0.01883311, -0.13721395, -0.09912846],
        [-0.09317714,  0.07193293,  0.19069769],
        [-0.03538911,  0.13244822,  0.00865655]],

       [[-0.00283871,  0.12421434,  0.08159141],
        [-0.03592091, -0.17254956, -0.07766537],
        [ 0.10526048, -0.10400888, -0.01105415]],

       [[-0.21682431,  0.07806685,  0.07949128],
        [-0.0224462 ,  0.10101604, -0.10534961],
        [ 0.05890768, -0.13262552,  0.16772724]],

       [[-0.0152912 ,  0.13321466,  0.03515719],
        [-0.02791577,  0.01815089,  0.01112153],
        

In [275]:
# updated_filters =
filters -= lr * dL_dfilter

In [276]:
filters

array([[[ 0.01267095,  0.20470475,  0.09748474],
        [ 0.05568934, -0.03303646, -0.01112282],
        [ 0.10758389,  0.02007382,  0.07619285]],

       [[-0.11978211, -0.0391796 , -0.16389033],
        [-0.04908949, -0.02724119, -0.11626283],
        [ 0.00895295, -0.03740212,  0.06678738]],

       [[-0.1033074 ,  0.08319609, -0.13292061],
        [-0.04424397, -0.06655614,  0.15981453],
        [ 0.20135892, -0.09759611,  0.14350246]],

       [[-0.01369028, -0.13197646, -0.09385942],
        [-0.0880343 ,  0.07717042,  0.19596673],
        [-0.03024628,  0.13768571,  0.01392559]],

       [[-0.00283871,  0.12421434,  0.08159141],
        [-0.03592091, -0.17254956, -0.07766537],
        [ 0.10526048, -0.10400888, -0.01105415]],

       [[-0.21462971,  0.08030184,  0.08173974],
        [-0.02025159,  0.10325104, -0.10310115],
        [ 0.06110228, -0.13039053,  0.1699757 ]],

       [[ 0.04077024,  0.19030791,  0.09259437],
        [ 0.02814568,  0.07524414,  0.06855872],
        

In [277]:
old_filters == filters # filters updated

array([[[ True,  True,  True],
        [ True,  True,  True],
        [ True,  True,  True]],

       [[False, False, False],
        [False, False, False],
        [False, False, False]],

       [[ True,  True,  True],
        [ True,  True,  True],
        [ True,  True,  True]],

       [[False, False, False],
        [False, False, False],
        [False, False, False]],

       [[ True,  True,  True],
        [ True,  True,  True],
        [ True,  True,  True]],

       [[False, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False]],

       [[False, False, False],
        [False, False, False],
        [False, False, False]]])

In [278]:
old_weights == weights # weights updated

array([[False,  True,  True, ...,  True,  True,  True],
       [False,  True,  True, ...,  True,  True,  True],
       [False,  True,  True, ...,  True,  True,  True],
       ...,
       [False,  True,  True, ...,  True,  True,  True],
       [False,  True,  True, ...,  True,  True,  True],
       [False,  True,  True, ...,  True,  True,  True]], shape=(1352, 10))

In [279]:
biases # biases updated

array([0.00857532, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ])