In [2]:
import numpy as np
import tensorflow as tf

In [3]:
x = np.random.normal(size=(3, 3, 2))
x

array([[[-0.28563073, -0.16043339],
        [-0.74400044,  1.09532693],
        [-0.31780663, -1.89730885]],

       [[-0.1571318 , -1.23555716],
        [-0.63246621,  0.63302541],
        [-0.2513021 , -0.26742342]],

       [[ 0.86617707, -0.21632862],
        [ 0.55248821, -1.58236407],
        [-0.06013818, -0.79852044]]])

In [14]:
class ConvKernel:  # or filter
  def __init__(self, kernel_dim: int | tuple[int, int], depth=1, stride: int = 1):
    # depth or n_channels
    self.kernel_dim = (kernel_dim, kernel_dim, 1) if type(kernel_dim) is int else (*kernel_dim, depth)
    self.stride = stride
    self.kernel = np.random.normal(size=(kernel_dim))
    self.bias = np.random.normal() + 0.5

  @staticmethod
  def _reLU(x):
    return np.where(x < 0, 0, x)

  def __call__(self, input, train=False):
    if len(input.shape) == 2:
      if self.kernel_dim[2] != 1:
        raise Exception(f'Dimensions don\'t match: {input.shape}, {self.kernel_dim}')
      else:
        input = np.expand_dims(input, axis=-1)
    if len(input.shape) not in [2, 3]:
      raise Exception('Incorrect input shape')
    if len(input.shape) == 3 and input.shape[2] != self.kernel_dim[2]:
      raise Exception(f'Input and kernel depths don\'t match: {input.shape} vs {self.kernel_dim}')

    # Original formula is FLOOR((N + 2P - F) / S + 1), but input is already padded in the ConvLayer, so no need to worry about that here
    row_convolutions = np.floor((input.shape[0] - self.kernel_dim[0]) / self.stride + 1).astype(int)
    col_convolutions = np.floor((input.shape[1] - self.kernel_dim[1]) / self.stride + 1).astype(int)
    z = np.zeros((row_convolutions, col_convolutions))
    for i in range(0, row_convolutions):
      row_start = i * self.stride
      row_end = row_start + self.kernel_dim[0]
      for j in range(0, col_convolutions):
        col_start = j * self.stride
        col_end = col_start + self.kernel_dim[1]
        z_i = np.sum(input[row_start:row_end, col_start:col_end,:] * self.kernel) + self.bias
        z[i,j] = z_i

    a = ConvKernel._reLU(z)

    if train:
      cache = [input, self.kernel, self.bias, self.stride]
      return a, cache

    return a


In [5]:
k = ConvKernel((2,2), 2)
k(x)

array([[2.44296051, 1.39042991],
       [3.91765131, 3.09709761]])

In [37]:
class ConvLayer:
  def __init__(self, kernel_dim: tuple[int, int] | tuple[int, int, int], num_kernels=1, stride=1, padding='valid'):
    self.kernel_dim = kernel_dim
    self.num_kernels = num_kernels
    self.stride=stride
    self.padding=padding
    self.kernels = []
    self.initialized = False
    self.input_dimensions = None
    self._padding=None

  def initialize(self, input):
    self.input_dimensions = input.shape # if len(input.shape) == 2 else input.shape[:-1]

    # calculate same padding
    # P = ceil(((S-1)*W-S+F)/2), with F = filter size, S = stride, W = input size
    if self.padding == 'same':
      col_padding = np.ceil(
          ((self.stride-1) * self.input_dimensions[1] - self.stride + self.kernel_dim[1]) / 2
      ).astype(np.int32)
      row_padding = np.ceil(
          ((self.stride-1) * self.input_dimensions[0] - self.stride + self.kernel_dim[0]) / 2
      ).astype(np.int32)

      self._padding = (row_padding, col_padding)

      self.kernels = [
          ConvKernel(
              kernel_dim = self.kernel_dim[:-1],
              depth = self.kernel_dim[-1],
              stride = self.stride
          ) for _ in range(self.num_kernels)
      ]


  def _pad(self, input):
    row_pad = np.zeros((input.shape[0], self._padding[0], input.shape[2]))
    col_pad = np.zeros((self._padding[1], input.shape[0] + self._padding[0]*2, input.shape[2]))  # as number of rows will increase after row padding

    input = np.hstack((row_pad, input, row_pad))
    input = np.vstack((col_pad, input, col_pad))

    print(input.shape)

    return input

  def __call__(self, input, train=False):
    if not self.initialized:
      self.initialize(input)
    input = self._pad(input)

    if train:
      results = np.array([
        kernel(input, train)[0] for kernel in self.kernels
      ])
      cache = np.array([
        kernel(input, train)[1] for kernel in self.kernels
      ])
    else:
      results = np.array([
        kernel(input, train) for kernel in self.kernels
      ])

    output = np.moveaxis(results, 0, -1)

    if train:
      cache = np.array([*cache, self._padding])
      return output, cache
    return output

  def backward(self, dZ, cache):
    (A_prev, W, b, stride, padding) = cache
    (m, n_H_prev, n_W_prev, n_C_prev) = a_prev.shape
    (f, f, n_C_prev, n_C) = W.shape
    (m, n_H, n_W, n_C) = dZ.shape

    dA_prev = np.zeros(A_prev.shape)
    dW = np.zeros(W.shape)
    db = np.zeros(b.shape) # b.shape = [1,1,1,n_C]

    A_prev_pad = np.pad(A_prev, (padding[0], padding[0]), (padding[1], padding[1]))
    dA_prev_pad = np.pad(dA_prev, (padding[0], padding[0]), (padding[1], padding[1]))

    for i in range(m):                       # loop over the training examples

        # select ith training example from A_prev_pad and dA_prev_pad
        a_prev_pad = A_prev_pad[i]
        da_prev_pad = dA_prev_pad[i]

        for h in range(n_H):                   # loop over vertical axis of the output volume
            for w in range(n_W):               # loop over horizontal axis of the output volume
                for c in range(n_C):           # loop over the channels of the output volume

                    # Find the corners of the current "slice"
                    vert_start = stride * h
                    vert_end = vert_start + f
                    horiz_start = stride * w
                    horiz_end = horiz_start + f

                    # Use the corners to define the slice from a_prev_pad
                    a_slice = a_prev_pad[vert_start:vert_end,horiz_start:horiz_end,:]

                    # Update gradients for the window and the filter's parameters using the code formulas given above
                    da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
                    dW[:,:,:,c] += a_slice * dZ[i, h, w, c]
                    db[:,:,:,c] += dZ[i, h, w, c]

        # Set the ith training example's dA_prev to the unpadded da_prev_pad (Hint: use X[pad:-pad, pad:-pad, :])
        dA_prev[i, :, :, :] = da_prev_pad[padding[0]:-padding[0], padding[1]:-padding[1], :]

    return dA_prev, dW, db



In [39]:
num_filters = 3

k = ConvLayer((3, 3, 3), num_filters, padding='same')
x = np.random.normal(size=(5, 5, 3))
a, c = k(x, train=True)
print(a.shape)
print(c.shape)
assert a.shape == (*x.shape[:-1], num_filters)

(7, 7, 3)
(5, 5, 3)
(4,)


  cache = np.array([
  cache = np.array([*cache, self._padding])


In [23]:
class MaxPoolLayer:
  def __init__(self, kernel_dim: int | tuple[int, int], stride=1):
    self.kernel_dim = (kernel_dim, kernel_dim) if type(kernel_dim) is int else kernel_dim
    self.stride = stride

  def __call__(self, input):
    if len(input.shape) != 3:
      raise Exception('Input must have dimenshion 3: (rows, cols, layers)')

    rows, cols, layers = input.shape
    row_convolutions = np.floor((input.shape[0] - self.kernel_dim[0]) / self.stride + 1).astype(int)
    col_convolutions = np.floor((input.shape[1] - self.kernel_dim[1]) / self.stride + 1).astype(int)

    z = np.zeros((row_convolutions, col_convolutions, layers))
    for i in range(layers):
      layer = i
      for j in range(0, row_convolutions):
        row_start = j * self.stride
        row_end = row_start + self.kernel_dim[0]
        for k in range(0, col_convolutions):
          col_start = k * self.stride
          col_end = col_start + self.kernel_dim[1]
          z_i = np.max(input[row_start:row_end,col_start:col_end,layer])
          z[j,k,layer] = z_i

    return z

In [39]:
x = np.random.normal(size=(4, 4, 1))
print(x)
mp = MaxPoolLayer(kernel_dim=2, stride=2)
print('--MaxPooled----')
print(mp(x))

[[[ 0.84785968]
  [-0.51184513]
  [ 0.25802967]
  [-0.63142137]]

 [[ 0.96113609]
  [ 0.94982028]
  [-0.72973484]
  [-1.08607309]]

 [[ 3.74372918]
  [ 0.69988795]
  [-0.68262831]
  [-1.17995028]]

 [[ 0.243664  ]
  [ 1.32086961]
  [-0.17208798]
  [-0.29396866]]]
--MaxPooled----
[[[ 0.96113609]
  [ 0.25802967]]

 [[ 3.74372918]
  [-0.17208798]]]
