# Implementation of CNN from scratch using OOP and numpy

In this notebook, I implement CNN using numpy only and declare these layers in terms of classes, writing forward and backward functions inside these classes. This is much more compact than writing the forward and backward equations in separate functions and takes much less lines to write.

This method of coding is learned from "The Independent Code" Channel in youtube from their playlist of writing neural networks from scratch: https://www.youtube.com/@independentcode

## Base Layer

Two attributes. Declared for convenience so other layers don't have to declare them:

* `self.forward()` - return layer output
* `self.backward()` - update parameters of the layer and return input gradient. We can pass an optimizer here as inputs but, for simplicity, we will pass only the learning rate here to update via Gradient Descent



In [36]:
a = np.array([[1, 2, 3, -1, -2, 1], [-4, -3, 2, 13, -1, 5]])
np.maximum(a, 0)
(a > 0).astype("float32")

array([[1., 1., 1., 0., 0., 1.],
       [0., 0., 1., 1., 0., 1.]], dtype=float32)

In [25]:
import numpy as np

class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    def forward(self, input):
        # TODO: return output
        pass

    def backward(self, output_gradient, learning_rate):
        # TODO: update parameters and return input gradient
        pass

### Dense Layer

Will inherit from `Layer()` class and we will define it's own `self.forward()` and `self.backward()` methods.

This layer will behave like a Linear Layer (without activation)

In [26]:
class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)

    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.bias

    def backward(self, output_gradient, learning_rate):
        weights_gradient = np.dot(output_gradient, self.input.T)
        input_gradient = np.dot(self.weights.T, output_gradient)
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return input_gradient


### Activation Layer

Inherits from base Layer class. Applies non-linear activation on inputs. Also has forward and backward methods.

Also defined Tanh() activation that performs the tanh operation on inputs.

In [27]:
class Activation(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(self.input)

    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))

class Tanh(Activation):
    def __init__(self):
        tanh = lambda x: np.tanh(x)
        tanh_prime = lambda x: 1 - np.tanh(x) ** 2
        super().__init__(tanh, tanh_prime)

In [28]:
from scipy import signal

#
class Convolutional(Layer):
    def __init__(self, input_shape, kernel_size, depth):
        # unpacking the input shape
        input_depth, input_height, input_width = input_shape

        # storing the depth of the kernel
        self.depth = depth

        # storing the input shape
        self.input_shape = input_shape

        # storing the input depth
        self.input_depth = input_depth

        # Creating and storing the output shape
        self.output_shape = (depth, input_height - kernel_size + 1, input_width - kernel_size + 1)

        # Initializing the parameters of the layer, i.e, the kernels and biases
        self.kernels_shape = (depth, input_depth, kernel_size, kernel_size)
        self.kernels = np.random.randn(*self.kernels_shape)
        self.biases = np.random.randn(*self.output_shape)

    def forward(self, input):
        self.input = input
        self.output = np.copy(self.biases)
        for i in range(self.depth):
            for j in range(self.input_depth):
                self.output[i] += signal.correlate2d(self.input[j], self.kernels[i, j], "valid")
        return self.output

    def backward(self, output_gradient, learning_rate):
        kernels_gradient = np.zeros(self.kernels_shape)
        input_gradient = np.zeros(self.input_shape)

        for i in range(self.depth):
            for j in range(self.input_depth):
                kernels_gradient[i, j] = signal.correlate2d(self.input[j], output_gradient[i], "valid")
                input_gradient[j] += signal.convolve2d(output_gradient[i], self.kernels[i, j], "full")

        self.kernels -= learning_rate * kernels_gradient
        self.biases -= learning_rate * output_gradient
        return input_gradient

In [29]:
class Reshape(Layer):
    def __init__(self, input_shape, output_shape):
        self.input_shape = input_shape
        self.output_shape = output_shape

    def forward(self, input):
        return np.reshape(input, self.output_shape)

    def backward(self, output_gradient, learning_rate):
        return np.reshape(output_gradient, self.input_shape)

In [30]:
def binary_cross_entropy(y_true, y_pred):
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_prime(y_true, y_pred):
    return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)

In [24]:
class Sigmoid(Activation):
    def __init__(self):
        sigmoid = lambda x: 1 / (1 + np.exp(-x))
        sigmoid_prime = lambda x: sigmoid(x) * (1 - sigmoid(x))
        super().__init__(sigmoid, sigmoid_prime)

In [31]:
!pip install np_utils



In [32]:
from keras.datasets import mnist
from keras.utils import to_categorical

#
def preprocess_data(x, y, limit):
    zero_index = np.where(y == 0)[0][:limit]
    one_index = np.where(y == 1)[0][:limit]
    all_indices = np.hstack((zero_index, one_index))
    all_indices = np.random.permutation(all_indices)
    x, y = x[all_indices], y[all_indices]
    x = x.reshape(len(x), 1, 28, 28)
    x = x.astype("float32") / 255
    y = to_categorical(y)
    y = y.reshape(len(y), 2, 1)
    return x, y

# load MNIST from server, limit to 100 images per class since we're not training on GPU
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train = preprocess_data(x_train, y_train, 100)
x_test, y_test = preprocess_data(x_test, y_test, 100)

# neural network
network = [
    Convolutional((1, 28, 28), 3, 5),
    Sigmoid(),
    Reshape((5, 26, 26), (5 * 26 * 26, 1)),
    Dense(5 * 26 * 26, 100),
    Sigmoid(),
    Dense(100, 2),
    Sigmoid()
]
#
epochs = 20
learning_rate = 0.1
#
# train
for e in range(epochs):
    error = 0
    for x, y in zip(x_train, y_train):
        # forward
        output = x
        for layer in network:
            output = layer.forward(output)

        # error
        error += binary_cross_entropy(y, output)

        # backward
        grad = binary_cross_entropy_prime(y, output)
        for layer in reversed(network):
            grad = layer.backward(grad, learning_rate)

    error /= len(x_train)
    print(f"{e + 1}/{epochs}, error={error}")
#
# test
for x, y in zip(x_test, y_test):
    output = x
    for layer in network:
        output = layer.forward(output)
    print(f"pred: {np.argmax(output)}, true: {np.argmax(y)}")

1/20, error=0.7172536206640002
2/20, error=0.22676229512963794
3/20, error=0.09435768043383909
4/20, error=0.07071678348019511
5/20, error=0.04303018625549971
6/20, error=0.03128314947147966
7/20, error=0.02333603634568812
8/20, error=0.01640286146176707
9/20, error=0.012327391875588954
10/20, error=0.009713587990739753
11/20, error=0.008478706182171728
12/20, error=0.007239537103126336
13/20, error=0.0063792193595302886
14/20, error=0.005709957481155583
15/20, error=0.005168291911871626
16/20, error=0.004725594338428851
17/20, error=0.004353750460964956
18/20, error=0.004032603321003528
19/20, error=0.003749673042518586
20/20, error=0.0035011335383866517
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 0, true: 0
pred: 1, true: 1
pred: 1, true: 1
pred: 0, true