# MNIST Fashion Dataset: A ConvNet Classifier

In [34]:
import gzip
import numpy as np
from functools import reduce
import matplotlib.pyplot as plt
from typing import *

## Dataset

## Training Functions

## Network

### Activation Functions

In [35]:
def sigmoid(z: np.ndarray) -> np.ndarray:
    """Applies the logistic sigmoid function to the given values"""
    return 1 / (1 + np.exp(-z))

def sigmoid_g(z: np.ndarray) -> np.ndarray:
    """Computes the gradients of the sigmoid function"""
    return sigmoid(z) * (1 - sigmoid(z))

def tanh(z: np.ndarray) -> np.ndarray:
    """Applies the TanH activation function to the given values"""
    return np.tanh(z)

def tanh_g(z: np.ndarray) -> np.ndarray:
    """Computes the gradients of the TanH function"""
    return 1 - tanh(z) ** 2

def relu(z: np.ndarray) -> np.ndarray:
    """Applies the ReLU activation function to the given values"""
    return z.clip(0)

def relu_g(z: np.ndarray) -> np.ndarray:
    """Computes the gradients of the ReLU function"""
    r: np.ndarray = np.zeros(z.shape)
    r[z>0] = 1
    return r

### Weight Initialization Functions

In [36]:
def uniform_init(inp: int, out: int, shape: List[int]) -> np.ndarray:
    """
    Generates initalized weights of a given shape
    :param inp: The number of inputs
    :param out: The number of outputs
    :param shape: The shape of the weights to be initialized
    :return: The uniformly randomly initalized weights
    """
    return np.random.uniform(-1, 1, shape)

def standard_init(inp: int, out: int, shape: List[int]) -> np.ndarray:
    """
    Generates initalized weights of a given shape
    :param inp: The number of inputs
    :param out: The number of outputs
    :param shape: The shape of the weights to be initialized
    :return: The "Standard"-initalized weights
    """
    return np.random.uniform(-1, 1, shape) * np.sqrt(1 / inp)

def xavier_init(inp: int, out: int, shape: List[int]) -> np.ndarray:
    """
    Generates initalized weights of a given shape
    :param inp: The number of inputs
    :param out: The number of outputs
    :param shape: The shape of the weights to be initialized
    :return: The "Xavier"-initalized weights
    """
    return np.random.uniform(-1, 1, shape) * np.sqrt(6 / (inp + out))

def he_init(inp: int, out: int, shape: List[int]) -> np.ndarray:
    """
    Generates initalized weights of a given shape
    :param inp: The number of inputs
    :param out: The number of outputs
    :param shape: The shape of the weights to be initialized
    :return: The "He"-initalized weights
    """
    return np.random.uniform(-1, 1, shape) * np.sqrt(2 / inp)

### Layer Classes

In [37]:
class Layer(object):
    """
    Represents a layer in a deep neural network.
    """

    def __init__(self, actf: Callable[[np.ndarray], np.ndarray] = relu, actf_g: Callable[[np.ndarray], np.ndarray] = relu_g,
                 initializer: Callable[[int, int, List[int]], np.ndarray] = he_init):
        """
        Create a new layer.
        :param actf: The activation function to use
        :param actf_g: The activation function's derivation
        :param initializer: The weight initializer to use
        :param inp_shape: The shape of the layer's input
        """
        self.actf: Callable[[np.ndarray], np.ndarray] = actf
        self.actf_g: Callable[[np.ndarray], np.ndarray] = actf_g
        self.initializer: Callable[[int, int, List[int]], np.ndarray] = initializer
        self.weights: Optional[np.ndarray] = None

    def initialize(self) -> None:
        """Initializes the weights"""
        raise NotImplementedError()

    def forward(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation + return the results."""
        raise NotImplementedError()
    
    def backward(self, grad: np.ndarray) -> np.ndarray:
        """Perform backward propagation + return the results."""
        raise NotImplementedError()
    
    def __call__(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation + return the results."""
        return self.forward(inp) 

In [38]:
class FCLayer(Layer):
    """
    Represents a fully-connected layer in a deep neural network.
    """

    def __init__(self, neurons: int, actf: Callable[[np.ndarray], np.ndarray] = relu, actf_g: Callable[[np.ndarray], np.ndarray] = relu_g,
                 initializer: Callable[[int, int, List[int]], np.ndarray] = he_init, inp_shape: Optional[Tuple[int]] = None):
        """
        Create a new fully-connected layer.
        :param neurons: The number of neurons in the layer
        :param actf: The activation function to use
        :param actf_g: The activation function's derivation
        :param initializer: The weight initializer to use
        :param inp_shape: The shape of the layer's input - format (#neurons)
        """
        self.neurons: int = neurons
        super().__init__(actf, actf_g, initializer)
        self.inp_shape: Optional[Tuple[int]] = inp_shape

    @property
    def out_shape(self) -> Tuple[int]:
        return ( self.neurons, )

    def initialize(self) -> None:
        """Initializes the weights"""
        if self.inp_shape is None:
            return
        self.weights = self.initializer(self.inp_shape[0] + 1, self.neurons, [ self.inp_shape[0] + 1, self.neurons, ])

    def forward(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation + return the results."""
        return self.actf(self._forward(inp))

    def _forward(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation - without applying the activation function."""
        assert len(inp.shape) == 2
        assert inp.shape[0] == self.inp_shape[0]
        return self.weights.T @ np.vstack((inp, np.ones(inp.shape[1])))
    
    def backward(self, grad: np.ndarray, val: np.ndarray) -> np.ndarray:
        """
        Perform backward propagation + return the results.
        :param grad: The gradient of the next layer
        :param val: This layer's value (= before application of activation function)
        :return: This layer's gradient
        """
        assert len(grad.shape) == 2
        assert grad.shape == val.shape
        assert grad.shape[0] == self.neurons
        return self.weights[:-1, :] @ (grad * self.actf_g(val))

    def _backward(self, grad: np.ndarray, val: np.ndarray, p_act: np.ndarray, lam: float) -> np.ndarray:
        """
        Perform backward propagation - but return the weight's gradients.
        :param grad: The gradient of the next layer
        :param val: This layer's value (= before application of activation function)
        :param p_act: The previous layer's activation
        :param lam: The regularization parameter lambda - for weight regularization
        :return: This layer's gradient
        """
        assert len(grad.shape) == 2
        assert grad.shape == val.shape
        assert grad.shape[0] == self.neurons
        assert p_act.shape[1] == val.shape[1]
        assert p_act.shape[0] == self.inp_shape[0]
        return np.vstack((p_act, np.ones(p_act.shape[1]))) @ (grad * self.actf_g(val)).T + (lam/val.shape[1]) * self.weights

    def __call__(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation + return the results."""
        return self.forward(inp) 

    def __setattr__(self, name: str, value: Any) -> None:
        if name == 'inp_shape' and value and len(value) != 1:
            raise TypeError()
        elif name in ('neurons', 'inp_shape'):
            self.__dict__[name] = value
            if 'neurons' in self.__dict__.keys() and 'inp_shape' in self.__dict__.keys():
                self.initialize()
            return
        elif name == 'initializer':
            self.__dict__[name] = value
            if 'shape' in self.__dict__.keys() and 'inp_shape' in self.__dict__.keys():
                self.initialize()
            return
        super().__setattr__(name, value)

In [39]:
class ConvLayer(Layer):
    """
    Represents a convolutional layer in a deep neural network.
    """

    def __init__(self, shape: Tuple[int, int, int], stride: int, pad: int = 0, 
                 actf: Callable[[np.ndarray], np.ndarray] = relu, actf_g: Callable[[np.ndarray], np.ndarray] = relu_g,
                 initializer: Callable[[int, int, List[int]], np.ndarray] = he_init, inp_shape: Optional[Tuple[int, int, int]] = None):
        """
        Create a new convolutional layer.
        :param shape: The shape of the layer's filters - format (#filters x height x width)
        :param stride: The filters' stride
        :param actf: The activation function to use
        :param actf_g: The activation function's derivation
        :param initializer: The weight initializer to use
        :param inp_shape: The shape of the layer's input - format (height x width x #channels)
        """
        self.shape: List[int] = shape
        self.stride: int = stride
        self.pad: int = pad
        super().__init__(actf, actf_g, initializer)
        self.inp_shape: Optional[Tuple[int, int, int]] = inp_shape

    @classmethod
    def _convolve(cls, x: np.ndarray, f: np.ndarray, p: int = 0, s: int = 1) -> np.ndarray:
        """
        Perform convolution + return result.
        :param x: The base array to convolve on - shape: ( #inputs, height, width, #channels )
        :param f: The filter(s) to use - shape: ( #filters, height, width, #channels )
        :param p: The padding (added on all sides) - default: 0
        :param s: The stride - default: 0
        """
        assert len(x.shape) == 4
        padded: np.ndarray = np.pad(x, [(0, 0), *((p,)*2,)*2, (0, 0)])
        strides: np.ndarray = np.lib.stride_tricks.sliding_window_view(padded, (x.shape[0], *f.shape[1:]))
        strides = np.moveaxis(np.repeat(strides.reshape(*strides.shape[1:3], x.shape[0], 1, *strides.shape[5:7], f.shape[-1])[::s, ::s], 
                	                    f.shape[0], axis=3) * f, 2, 0)
        return np.sum(strides.reshape(*strides.shape[:4], -1), axis=4)

    @classmethod
    def _full_convolve(cls, x: np.ndarray, f: np.ndarray) -> np.ndarray:
        """
        Perform "full" convolution + return result.
        :param x: The base array to convolve on - shape: ( #inputs, height, width, #channels )
        :param f: The filter(s) to use - shape: ( #filters, height, width, #channels )
        """
        return cls._convolve(x, f, f.shape[1] - 1)

    @property
    def out_shape(self) -> Tuple[int, int, int]:
        return ( int(1 + (self.inp_shape[0] + 2 * self.pad - self.shape[1]) / self.stride),
                 int(1 + (self.inp_shape[1] + 2 * self.pad - self.shape[2]) / self.stride),
                 self.shape[0], )

    def initialize(self) -> None:
        """Initializes the weights"""
        if self.inp_shape is None:
            raise TypeError()
        filters_size: int = reduce(lambda x, y: x*y, self.shape)
        self.weights = self.initializer(filters_size * self.inp_shape[2], filters_size, [ *self.shape, self.inp_shape[2], ])
    
    def forward(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation + return the results."""
        return self.actf(self._forward(inp))

    def _forward(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation - without applying the activation function."""
        assert inp.shape[1:] == self.inp_shape
        return self._convolve(inp, self.weights, self.pad, self.stride)

    def backward(self, grad: np.ndarray, val: np.ndarray) -> np.ndarray:
        """
        Perform backward propagation + return the results.
        :param grad: The gradient of the next layer
        :param val: This layer's value (= before application of activation function)
        :return: This layer's gradient
        """
        assert grad.shape[1:] == self.out_shape
        assert val.shape[1:] == self.out_shape
        return self._full_convolve(grad * self.actf_g(val), np.swapaxes(np.flip(self.weights, axis=(1,2)), 0, 3))

    def _backward(self, grad: np.ndarray, val: np.ndarray, p_act: np.ndarray, lam: float) -> np.ndarray:
        """
        Perform backward propagation - but return the weight's gradients.
        :param grad: The gradient of the next layer
        :param val: This layer's value (= before application of activation function)
        :param p_act: The previous layer's activation
        :param lam: The regularization parameter lambda - for weight regularization
        :return: This layer's gradient
        """
        assert grad.shape[1:] == self.out_shape
        assert val.shape[1:] == self.out_shape
        return self._convolve(np.swapaxes(p_act, 0, 3), np.swapaxes(grad * self.actf_g(val), 0, 3)).swapaxes(0, 3) + (lam/val.shape[1]) * self.weights

    def __call__(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation + return the results."""
        return self.forward(inp) 

    def __setattr__(self, name: str, value: Any) -> None:
        if name in ('shape', 'inp_shape',) and value:
            if len(value) != 3:
                raise TypeError()
            else:
                self.__dict__[name] = value
                if 'shape' in self.__dict__.keys() and 'inp_shape' in self.__dict__.keys():
                    self.initialize()
                return
        elif name == 'initializer':
            self.__dict__[name] = value
            if 'shape' in self.__dict__.keys() and 'inp_shape' in self.__dict__.keys():
                self.initialize()
            return
        super().__setattr__(name, value)

In [40]:
class FlattenLayer(Layer):
    """
    Flattens the output of a previous layer in a neural network.
    """

    def __init__(self, inp_shape: Optional[Tuple[int, int, int]] = None):
        """
        Create a new flatten layer.
        :param inp_shape: The shape of the layer's input - format (height x width x #channels)
        """
        self.inp_shape: Optional[Tuple[int, int, int]] = inp_shape

    @property
    def out_shape(self) -> Tuple[int]:
        return ( reduce(lambda a,b: a*b, self.inp_shape), )

    def initialize(self) -> None:
        """Dummy function - layer has no weights"""
        pass

    def forward(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation + return the results."""
        return inp.reshape(-1, *self.out_shape).T

    def backward(self, grad: np.ndarray, *args: List[Any]) -> np.ndarray:
        """
        Perform backward propagation + return the results.
        :param grad: The gradient of the next layer
        :return: This layer's gradient (= the reshaped gradient)
        """
        return grad.reshape(-1, *self.inp_shape).T

    def __call__(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation + return the results."""
        return self.forward(inp)

### Network Class

In [45]:
class Network(object):
    """
    Represents a deep neural network.
    """

    def __init__(self, initializer: Callable[[List[int]], List[np.ndarray]] = he_init, layers: List[Layer] = []):
        """
        Create a new deep neural network.
        :param initializer: The weight initializer to use
        :param layers: The network's layers
        """
        self.__initializer: Callable[[List[int]], List[np.ndarray]] = initializer
        self.layers: List[Layer] = layers

    @property
    def inp_shape(self) -> List[int]:
        return self.layers[0].inp_shape if self.layers else None

    @property
    def out_shape(self) -> List[int]:
        return self.layers[-1].out_shape if self.layers else None

    def initialize(self) -> None:
        """Re-initialize all weights"""
        for l in self.layers:
            l.initialize()

    def __prepare_layer(self, layer: Layer) -> None:
        if self.layers:
            layer.inp_shape = self.out_shape
        layer.initializer = self.__initializer

    def append(self, layer: Layer) -> None:
        """Appends a new layer to the network"""
        self.__prepare_layer(layer)
        self.layers.append(layer)
        self.initialize()

    def insert(self, idx: int, layer: Layer) -> None:
        """Inserts the given layer at the given index"""
        self.__prepare_layer(layer)
        self.layers.insert(idx, layer)
        self.initialize()

    def forward(self, x: np.ndarray) -> np.ndarray:
        """Perform forward propagation using the given input"""
        out: np.ndarray = x
        for l in self.layers:
            out = l(out)
        return out

    def __call__(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation + return the results."""
        return self.forward(inp) 

    def __add__(self, other: Any) -> Any:
        if not isinstance(other, Layer):
            raise ArithmeticError()
        self.__prepare_layer(other)
        return Network(self.__initializer, [ *self.layers.copy(), other, ])

    def __iadd__(self, other: Any) -> None:
        if not isinstance(other, Layer):
            raise ArithmeticError()
        self.append(other)
        return self

    def __len__(self) -> int:
        return len(self.layers)

    def __getitem__(self, key: int) -> Layer:
        return self.layers[key]

    def __setitem__(self, key: int, value: Layer) -> None:
        if key > 0:
            value.inp_shape = self[key-1].out_shape
        self.layers[key] = value
        self.initialize()
    
    def __delitem__(self, key: int) -> None:
        del self.layers[key]
        self.initialize()

    def __str__(self) -> str:
        return 'DNN{ ' + ' --> '.join(f'{l.__class__.__name__}{l.out_shape}' for l in self.layers) + ' }'

## Training

In [46]:
n: Network = Network(initializer=he_init)

n += ConvLayer((8, 5, 5), 1, inp_shape=(28, 28, 3))
n += ConvLayer((16, 3, 3), 1)
n += FlattenLayer()
n += FCLayer(150)
n += FCLayer(10)

print(n)

DNN{ ConvLayer(24, 24, 8) --> ConvLayer(22, 22, 16) --> FlattenLayer(7744,) --> FCLayer(150,) --> FCLayer(10,) }


## Evaluation

## Experiments