# MNIST Fashion Dataset: A ConvNet Classifier

In [25]:
import gzip
import numpy as np
from functools import reduce
import matplotlib.pyplot as plt
from typing import *

## Dataset

## Training Functions

## Network

### Activation Functions

In [9]:
def sigmoid(z: np.ndarray) -> np.ndarray:
    """Applies the logistic sigmoid function to the given values"""
    return 1 / (1 + np.exp(-z))

def sigmoid_g(z: np.ndarray) -> np.ndarray:
    """Computes the gradients of the sigmoid function"""
    return sigmoid(z) * (1 - sigmoid(z))

def tanh(z: np.ndarray) -> np.ndarray:
    """Applies the TanH activation function to the given values"""
    return np.tanh(z)

def tanh_g(z: np.ndarray) -> np.ndarray:
    """Computes the gradients of the TanH function"""
    return 1 - tanh(z) ** 2

def relu(z: np.ndarray) -> np.ndarray:
    """Applies the ReLU activation function to the given values"""
    return z.clip(0)

def relu_g(z: np.ndarray) -> np.ndarray:
    """Computes the gradients of the ReLU function"""
    r: np.ndarray = np.zeros(z.shape)
    r[z>0] = 1
    return r

### Weight Initialization Functions

In [21]:
def uniform_init(inp: int, out: int, shape: List[int]) -> np.ndarray:
    """
    Generates initalized weights of a given shape
    :param inp: The number of inputs
    :param out: The number of outputs
    :param shape: The shape of the weights to be initialized
    :return: The uniformly randomly initalized weights
    """
    return np.random.uniform(-1, 1, shape)

def standard_init(inp: int, out: int, shape: List[int]) -> np.ndarray:
    """
    Generates initalized weights of a given shape
    :param inp: The number of inputs
    :param out: The number of outputs
    :param shape: The shape of the weights to be initialized
    :return: The "Standard"-initalized weights
    """
    return np.random.uniform(-1, 1, shape) * np.sqrt(1 / inp)

def xavier_init(inp: int, out: int, shape: List[int]) -> np.ndarray:
    """
    Generates initalized weights of a given shape
    :param inp: The number of inputs
    :param out: The number of outputs
    :param shape: The shape of the weights to be initialized
    :return: The "Xavier"-initalized weights
    """
    return np.random.uniform(-1, 1, shape) * np.sqrt(6 / (inp + out))

def he_init(inp: int, out: int, shape: List[int]) -> np.ndarray:
    """
    Generates initalized weights of a given shape
    :param inp: The number of inputs
    :param out: The number of outputs
    :param shape: The shape of the weights to be initialized
    :return: The "He"-initalized weights
    """
    return np.random.uniform(-1, 1, shape) * np.sqrt(2 / inp)

### Layer Classes

In [27]:
class Layer(object):
    """
    Represents a layer in a deep neural network.
    """

    def __init__(self, actf: Callable[[np.ndarray], np.ndarray] = relu, actf_g: Callable[[np.ndarray], np.ndarray] = relu_g,
                 initializer: Callable[[int, int, List[int]], np.ndarray] = he_init, inp_shape: Optional[List[int]] = None):
        """
        Create a new layer.
        :param actf: The activation function to use
        :param actf_g: The activation function's derivation
        :param initializer: The weight initializer to use
        :param inp_shape: The shape of the layer's input
        """
        self.actf: Callable[[np.ndarray], np.ndarray] = actf
        self.actf_g: Callable[[np.ndarray], np.ndarray] = actf_g
        self.initializer: Callable[[int, int, List[int]], np.ndarray] = initializer
        self.weights: Optional[np.ndarray] = None
        self.inp_shape: Optional[List[int]] = inp_shape

    def initialize(self) -> None:
        """Initializes the weights"""
        raise NotImplementedError()

    def forward(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation + return the results."""
        raise NotImplementedError()
    
    def backward(self, grad: np.ndarray) -> np.ndarray:
        """Perform backward propagation + return the results."""
        raise NotImplementedError()

In [30]:
class FCLayer(Layer):
    """
    Represents a fully-connected layer in a deep neural network.
    """

    def __init__(self, neurons: int, actf: Callable[[np.ndarray], np.ndarray] = relu, actf_g: Callable[[np.ndarray], np.ndarray] = relu_g,
                 initializer: Callable[[int, int, List[int]], np.ndarray] = he_init, inp_shape: Optional[List[int]] = None):
        """
        Create a new fully-connected layer.
        :param neurons: The number of neurons in the layer
        :param actf: The activation function to use
        :param actf_g: The activation function's derivation
        :param initializer: The weight initializer to use
        :param inp_shape: The shape of the layer's input - format (#neurons)
        """
        self.neurons: int = neurons
        super().__init__(actf, actf_g, initializer, inp_shape)

    def initialize(self) -> None:
        """Initializes the weights"""
        if self.inp_shape is None:
            raise TypeError()
        self.weights = self.initializer(self.inp_shape[0] + 1, self.neurons, [ self.inp_shape[0] + 1, self.neurons, ])

    def forward(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation + return the results."""
        return self.actf(self._forward(inp))

    def _forward(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation - without applying the activation function."""
        assert len(inp.shape) == 2
        assert inp.shape[0] == self.neurons
        return self.weights.T @ np.vstack((inp, np.ones(inp.shape[1])))
    
    def backward(self, grad: np.ndarray, val: np.ndarray) -> np.ndarray:
        """
        Perform backward propagation + return the results.
        :param grad: The gradient of the next layer
        :param val: This layer's value (= before application of activation function)
        :return: This layer's gradient
        """
        assert len(grad.shape) == 2
        assert grad.shape == val.shape
        assert grad.shape[0] == self.neurons
        return self.weights[:-1, :] @ (grad * self.actf_g(val))

    def _backward(self, grad: np.ndarray, val: np.ndarray, p_val: np.ndarray, lam: float) -> np.ndarray:
        """
        Perform backward propagation - but return the weight's gradients.
        :param grad: The gradient of the next layer
        :param val: This layer's value (= before application of activation function)
        :param p_val: The previous layer's value (= before application of activation function)
        :param lam: The regularization parameter lambda - for weight regularization
        :return: This layer's gradient
        """
        assert len(grad.shape) == 2
        assert grad.shape == val.shape
        assert grad.shape[0] == self.neurons
        assert p_val.shape[1] == val.shape[1]
        assert p_val.shape[0] == self.inp_shape[0]
        return np.vstack((p_val, np.ones(p_val.shape[1]))) @ (grad * self.actf_g(val)).T + (lam/val.shape[1]) * self.weights
        
    def __setattr__(self, name: str, value: Any) -> None:
        if name == 'inp_shape' and len(value) != 1:
            raise TypeError()
        elif name in ('neurons', 'inp_shape'):
            self.__dict__[name] = value
            if 'neurons' in self.__dict__.keys() and 'inp_shape' in self.__dict__.keys():
                self.initialize()
            return
        super().__setattr__(name, value)

In [31]:
class ConvLayer(Layer):
    """
    Represents a convolutional layer in a deep neural network.
    """

    def __init__(self, shape: List[int], stride: int, actf: Callable[[np.ndarray], np.ndarray] = relu, 
                 actf_g: Callable[[np.ndarray], np.ndarray] = relu_g,
                 initializer: Callable[[int, int, List[int]], np.ndarray] = he_init, inp_shape: Optional[List[int]] = None):
        """
        Create a new convolutional layer.
        :param shape: The shape of the layer's filters - format (#filters x height x width)
        :param stride: The filters' stride
        :param actf: The activation function to use
        :param actf_g: The activation function's derivation
        :param initializer: The weight initializer to use
        :param inp_shape: The shape of the layer's input - format (height x width x #channels)
        """
        self.shape: List[int] = shape
        self.stride: int = stride
        super().__init__(actf, actf_g, initializer, inp_shape)

    def initialize(self) -> None:
        """Initializes the weights"""
        if self.inp_shape is None:
            raise TypeError()
        filters_size: int = reduce(lambda x, y: x*y, self.shape)
        self.weights = self.initializer(filters_size * self.inp_shape[2], filters_size, [ *self.shape, self.inp_shape[2], ])
    
    def forward(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation + return the results."""
        assert len(inp.shape) == 4
        assert inp.shape[1:] == tuple(self.inp_shape)
        strides: np.ndarray = np.lib.stride_tricks.sliding_window_view(inp, (inp.shape[0], *self.weights.shape[1:]))
        strides = np.moveaxis(np.repeat(strides.reshape(*strides.shape[1:3], inp.shape[0], 1, *strides.shape[5:7], 
                                                        self.weights.shape[-1])[::self.stride, ::self.stride], 
                                        self.weights.shape[0], axis=3) * self.weights, 2, 0)
        return np.sum(strides.reshape(*strides.shape[:4], -1), axis=4)

    def backward(self, grad: np.ndarray) -> np.ndarray:
        """Perform backward propagation + return the results."""
        assert len(grad.shape) == 4
        assert grad.shape == (*self.shape[1:], self.shape[0])
        return super().backward(grad)

    def __setattr__(self, name: str, value: Any) -> None:
        if name in ('shape', 'inp_shape',):
            if len(value) != 3:
                raise TypeError()
            else:
                self.__dict__[name] = value
                if 'shape' in self.__dict__.keys() and 'inp_shape' in self.__dict__.keys():
                    self.initialize()
                return
        super().__setattr__(name, value)

### Network Class

In [17]:
class Network(object):
    """
    Represents a deep neural network.
    """

    def __init__(self, initializer: Callable[[List[int]], List[np.ndarray]], layers: List[Layer] = []):
        """
        Create a new deep neural network.
        :param initializer: The weight initializer to use
        :param layers: The network's layers
        """
        self.initializer: Callable[[List[int]], List[np.ndarray]] = initializer
        self.layers: List[Layer] = layers
        self.__initialize_weights()

    def __initialize_weights(self) -> None:
        """Initializes the network's weights"""
        self.weights: List[np.ndarray] = self.initializer([ l.neurons for l in self.layers ])

    def append(self, layer: Layer) -> None:
        """Appends a new layer to the network"""
        self.layers.append(layer)
        self.__initialize_weights()

    def insert(self, idx: int, layer: Layer) -> None:
        """Inserts the given layer at the given index"""
        self.layers.insert(idx, layer)
        self.__initialize_weights()

    def forward(self, inp: np.ndarray) -> np.ndarray:
        """Perform forward propagation using the given input"""
        pass

    def __add__(self, other: Any) -> Any:
        if other.__class__ != Layer:
            raise ArithmeticError()
        return Network(self.initializer, [ *self.layers.copy(), other, ])

    def __iadd__(self, other: Any) -> None:
        if other.__class__ != Layer:
            raise ArithmeticError()
        self.layers.append(other)
        self.__initialize_weights()

    def __len__(self) -> int:
        return len(self.layers)

    def __getitem__(self, key: int) -> Layer:
        return self.layers[key]

    def __setitem__(self, key: int, value: Layer) -> None:
        self.layers[key] = value
        self.__initialize_weights()
    
    def __delitem__(self, key: int) -> None:
        del self.layers[key]
        self.__initialize_weights()

## Training

## Evaluation

## Experiments