In [None]:
  # extend MLP to input 2D
# flatten is required -> flatten layer -> reshape or sth
# conv2d layer( number_of_kernels, padding, type_of_padding(zeros, ones, average, half(of pixels in kernels), same as the border), stride, ..., input_shape, kernel_size, random_init_kernel (BEWARE OF CLOSE 0))
# full forward pass up to 7.11
# something like a unit test (PyTorch) and assert
import numpy as np

Change of schedule:
  * generative adversarial networks will be rescheduled for a next semester
  * autoencoders will not be covered on DLF
  * style(?) transfer added to the lecture

In [None]:
class Layer:
  def __init__(self):
    self.input = None
    self.output = None

  def forward(self, input):
    raise NotImplementedError('Not done')

  def backward(self, output_gradient, learning_rate):
    raise NotImplementedError('Not done')

In [None]:
class Activation(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(self.input)

    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))

In [None]:
class ReLU(Activation):
  def __init__(self):
    def relu(x):
      return np.where(x > 0, x, 0)

    def relu_prim(x):
      return np.where(x > 0, 1, 0)

    super().__init__(relu, relu_prim)

In [None]:
class Sigmoid(Activation):
  def __init__(self):
      def sigmoid(x):
          return 1 / (1 + np.exp(-x))

      def sigmoid_prim(x):
          s = sigmoid(x)
          return s * (1 - s)

      super().__init__(sigmoid, sigmoid_prim)

In [None]:
# TODO: Test this
class TanH(Activation):
  def __init__(self):
    def tanh(x):
      return np.tanh(x)
    def tanh_prim(x):
      t = tanh(x)
      return 1 - t**2

    super().__init__(tanh, tanh_prim)

In [None]:
# # TODO: Test that
# class Softmax(Activation):
#   def __init__(self):
#     def softmax(x):
#       return np.exp(x) / sum(np.exp(x))
#     def softmax_prim(y):
#       softmax = self.input.reshape(-1, 1)
#       d_softmax = softmax - y
#       return d_softmax

#     super().__init__(softmax, softmax_prim)

In [None]:
class Softmax(Layer):
    def forward(self, input):

        self.input = input

        max_val = np.max(input, axis=1, keepdims=True) + 1e-10
        tmp = np.exp(input - max_val)
        self.output = tmp / (np.sum(tmp, axis=1, keepdims=True) + 1e-12)
        return self.output

    def backward(self, d_out, learning_rate):

        d_input = np.zeros_like(self.input)

        y = self.output
        d_input = np.dot(d_out, y)
        return d_input


In [None]:
class Conv2D:
    def __init__(self, image_shape, num_filters, filter_size, stride=(1, 1), padding_type='valid'):

        assert stride == (1, 1), 'Other strides not yet implemented'


        # image info
        self.input_depth, self.input_height, self.input_width = image_shape


        # filter info
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.stride = stride
        self.padding_type = padding_type

        # kernels
        self.kernels = np.random.randn(self.num_filters, self.input_depth, self.filter_size, self.filter_size)

        # output
        if self.padding_type == 'valid':
          self.output = np.zeros((self.num_filters,
                                  self.input_depth,
                                  self.input_height - self.filter_size + 1,
                                  self.input_width - self.filter_size + 1))
        else:
          self.output = np.zeros((self.num_filters, self.input_depth, self.input_height, self.input_width))

        # biases

        self.bias = np.random.randn(*self.output.shape)

    def convolve2d(self, image, filter, stride, padding_type):

        # 28x28 no padding
        height, width = image.shape
        #filter_height, filter_width = filter.shape

        if padding_type == 'valid':

          output_height = height - filter.shape[0] + 1
          output_width = width - filter.shape[1] + 1
        else:
          output_height = height
          output_width = width

        image = self.padding(image, padding_type)

        output = np.zeros((output_height, output_width))

        for i in range(0, output_height, stride[0]):
            for j in range(0, output_width, stride[1]):
                output[i][j] = np.sum(image[i:i+filter.shape[0], j:j+filter.shape[1]] * filter)
        # 26x26
        return output

    def padding(self, x, padding_type):
      if x.ndim == 2:
          if padding_type == 'valid':
              return x
          elif padding_type == 'same':
              pad = (self.filter_size - 1) // 2
              return np.pad(x, [(pad, pad), (pad, pad)], 'constant')
          elif padding_type == 'full':
              pad = self.filter_size - 1
              return np.pad(x, [(pad, pad), (pad, pad)], 'constant')
      elif x.ndim == 3:
          if padding_type == 'valid':
              return x
          elif padding_type == 'same':
              pad = (self.filter_size - 1) // 2
              return np.pad(x, [(0, 0), (pad, pad), (pad, pad)], 'constant')
          elif padding_type == 'full':
              pad = self.filter_size - 1
              return np.pad(x, [(0, 0), (pad, pad), (pad, pad)], 'constant')
      else:
          raise ValueError("Input must have either 2 or 3 dimensions")

    def forward(self, x):
        self.input = x

        if self.input.ndim == 2:
          for i in range(self.num_filters):
            for j in range(self.input_depth):
              self.output[i][j] += self.convolve2d(self.input, self.kernels[i][j], self.stride, padding_type = self.padding_type)
        elif self.input.ndim == 3:
          for i in range(self.num_filters):
            for j in range(self.input_depth):
              self.output[i][j] += self.convolve2d(self.input[j], self.kernels[i][j], self.stride, padding_type = self.padding_type)


        # add bias and return
        return self.output + self.bias

    def backward(self, output_gradient, learning_rate):
        kernels_gradient = np.zeros(self.kernels.shape)
        input_gradient = np.zeros(self.input.shape)

        if self.input.ndim == 2:
          for i in range(self.num_filters):
              for j in range(self.input_depth):
                 kernels_gradient[i][j] = self.convolve2d(self.input, np.rot90(output_gradient[i][j]), self.stride, 'valid')
                 input_gradient += self.convolve2d(output_gradient[i][j], self.kernels[i][j], self.stride, 'full')

          self.kernels -= learning_rate * kernels_gradient
          self.bias -= learning_rate * output_gradient

        elif self.input.ndim == 3:
          # if there are different colors
          for i in range(self.num_filters):
            for j in range(self.input_depth):
              kernels_gradient[i][j] = self.convolve2d(self.input[j], np.rot90(output_gradient[i][j]), self.stride, 'valid')
              input_gradient[j] += self.convolve2d(output_gradient[i][j], self.kernels[i][j], self.stride, 'full')

          self.kernels -= learning_rate * kernels_gradient
          self.bias -= learning_rate * output_gradient

        return input_gradient

In [None]:
class Reshape(Layer):
  def __init__(self, input_shape, output_shape):
    self.input_shape = input_shape
    self.output_shape = output_shape

  def forward(self, input):
    return np.reshape(input, self.output_shape)

  def backward(self, output_gradient, learning_rate):
    return np.reshape(output_gradient, self.input_shape)

In [None]:
class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)

    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.bias

    def backward(self, output_gradient, learning_rate):
        weights_gradient = np.dot(output_gradient, self.input.T)
        input_gradient = np.dot(self.weights.T, output_gradient)
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * output_gradient
        return input_gradient

In [None]:
# TODO: Test in network
class Dropout(Layer):
  def __init__(self, dropout):
    self.dropout = dropout

  def forward(self, input):
    self.input = input
    return self.input

  def backward(self, output):
    data = np.zeros(output.shape)
    value = 1 / self.dropout
    data.flat[np.random.choice(len(output.flatten()), int(len(data.flat) * (1 - self.dropout)), replace=False)] = value

    self.dropped = np.multiply(output, data)

    return self.dropped

In [None]:
# loss
def mse(y_true, y_pred):
    return np.average((y_true - y_pred) ** 2)

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)

def binary_cross_entropy(y_true, y_pred):
    return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_prime(y_true, y_pred):
    return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)


def categorical_crossentropy(y_true, y_pred, eps = 1e-10):
    y_pred = np.clip(y_pred, eps, 1 - eps)

    return -np.sum(y_true * np.log(y_pred))

def categorical_crossentropy_prime(y_true, y_pred, eps = 1e-10):
    y_pred = np.clip(y_pred, eps, 1 - eps)

    return -y_true / (y_pred+1e-10)

In [None]:
def predict(network, input):
    output = input
    for layer in network:
        output = layer.forward(output)
    return output

def train(network, loss, loss_prime, x_train, y_train, epochs = 100, learning_rate = 0.01, info = True):
    for e in range(epochs):
        error = 0
        for x, y in zip(x_train, y_train):
            # forward
            output = predict(network, x)
            # error
            error += loss(y, output)
            #print('error', error.shape)

            # backward
            grad = loss_prime(y, output)
            for layer in reversed(network):
                grad = layer.backward(grad, learning_rate)

        error /= len(x_train)
        if info:
            print(f"{e + 1}/{epochs}, error={error}")

In [None]:
from keras.datasets import mnist
from keras.utils import np_utils, to_categorical
from keras.datasets.mnist import load_data as load_data_MNIST

# the data, split between train and test sets

(x_train, y_train), (x_test, y_test) = load_data_MNIST() # MNIST
# (x_train, y_train), (x_test, y_test) = load_data_Fashion_MNIST() # or Fashion MNIST

## flatten the data from 2D to 1D (vector) as the data shape is [28x28]

# x_train = x_train.reshape(60000, 784)
# x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

## convert to float [0.0 - 1.0]
x_train /= 255
x_test /= 255
print(x_train.shape, 'train samples')
print(x_test.shape, 'test samples')


## One-hot encoding
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

print(y_train.shape, y_test.shape)

(60000, 28, 28) train samples
(10000, 28, 28) test samples
(60000, 10) (10000, 10)


In [None]:
x_train, y_train, x_test, y_test = x_train[:500], y_train[:500], x_test[:100], y_test[:100]
x_train[0].shape, y_train[0].reshape(-1, 1).shape

((28, 28), (10, 1))

In [None]:
y_train = y_train.reshape(-1 , 1)

In [None]:
network = [
    #Convolutional((1, 28, 28), (3, 3), 5, padding = (1,1)),
    Conv2D(image_shape = (1, 28, 28), num_filters = 5, filter_size = 3, stride = (1, 1), padding_type = 'same'),
    ReLU(),
    # no padding, so shape changes
    #Reshape (filters, depth, height, width)
    Reshape((5, 1, 28, 28), (5 * 1 * 28 * 28, 1)),
    Dense(5 * 1 *28 * 28, 100),
    ReLU(),
    Dense(100, 10),
    #ReLU()
    Softmax()
]

Test forwarda

In [None]:
test_input = np.random.randn(3, 28, 28)

In [None]:
predict(network, x_train[0]).shape


(10, 1)

In [None]:
from sklearn.metrics import accuracy_score

train(
    network,
    categorical_crossentropy,
    categorical_crossentropy_prime,
    x_train,
    y_train,
    epochs=10,
    learning_rate=0.1
)

In [None]:
# test
y_pred = []
y_true = []
for x, y in zip(x_test, y_test):
    y_pred.append(np.argmax(predict(network, x)))
    y_true.append(np.argmax(y))

# accuracy score
print('Accuracy of the neural network is equal to: ', accuracy_score(y_true, y_pred))

Accuracy of the neural network is equal to:  0.08
