The abstract class Layer

In [None]:
# Base class
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    # computes the output Y of a layer for a given input X
    def forward_propagation(self, input):
        raise NotImplementedError

    # computes dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

Fully Connected Layer


In [None]:
import numpy as np

# fully Connected Layer
# inherit from base class Layer
class FCLayer(Layer):
    # input_size = number of input neurons
    # output_size = number of output neurons
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        # dBias = output_error

        # update parameters
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

Activation Layer

In [None]:
# Activation Layer
# inherit from base class Layer
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    # returns the activated input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    # Returns input_error=dE/dX for a given output_error=dE/dY.
    # learning_rate is not used because there is no "learnable" parameters.
    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

Convolution Layer

In [None]:
from scipy import signal
import numpy as np

# convolutional layer
# inherit from base class Layer
# This convolutional layer is always with stride 1
class ConvLayer(Layer):
    # input_shape = (i,j,d)
    # kernel_shape = (m,n)
    # layer_depth = output_depth
    def __init__(self, input_shape, kernel_shape, layer_depth):
        self.input_shape = input_shape
        self.input_depth = input_shape[2]
        self.kernel_shape = kernel_shape
        self.layer_depth = layer_depth
        self.output_shape = (input_shape[0]-kernel_shape[0]+1, input_shape[1]-kernel_shape[1]+1, layer_depth)
        self.weights = np.random.rand(kernel_shape[0], kernel_shape[1], self.input_depth, layer_depth) - 0.5
        self.bias = np.random.rand(layer_depth) - 0.5

    # returns output for a given input
    def forward_propagation(self, input):
        self.input = input
        self.output = np.zeros(self.output_shape)

        for k in range(self.layer_depth):
            for d in range(self.input_depth):
                self.output[:,:,k] += signal.correlate2d(self.input[:,:,d], self.weights[:,:,d,k], 'valid') + self.bias[k]

        return self.output

    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
    def backward_propagation(self, output_error, learning_rate):
        in_error = np.zeros(self.input_shape)
        dWeights = np.zeros((self.kernel_shape[0], self.kernel_shape[1], self.input_depth, self.layer_depth))
        dBias = np.zeros(self.layer_depth)

        for k in range(self.layer_depth):
            for d in range(self.input_depth):
                in_error[:,:,d] += signal.convolve2d(output_error[:,:,k], self.weights[:,:,d,k], 'full')
                dWeights[:,:,d,k] = signal.correlate2d(self.input[:,:,d], output_error[:,:,k], 'valid')
            dBias[k] = self.layer_depth * np.sum(output_error[:,:,k])

        self.weights -= learning_rate*dWeights
        self.bias -= learning_rate*dBias
        return in_error

Flatten layer

In [None]:
# flatten layer
# inherit from base class Layer
class FlattenLayer(Layer):
    # returns the flattened input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = input_data.flatten().reshape((1,-1))
        return self.output

    # Returns input_error=dE/dX for a given output_error=dE/dY.
    # learning_rate is not used because there is no "learnable" parameters.
    def backward_propagation(self, output_error, learning_rate):
        return output_error.reshape(self.input.shape)

activation functions

In [None]:
# activation function and its derivative
def tanh(x):
    return np.tanh(x);

def tanh_prime(x):
    return 1-np.tanh(x)**2;

Loss Function


In [None]:
# loss function and its derivative
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2));

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size;

Network Class


In [None]:
#network class
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)
        result = []

        # run network over all samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)

        return result

    # train the network
    def fit(self, x_train, y_train, epochs, learning_rate):
        # sample dimension first
        samples = len(x_train)

        # training loop
        for i in range(epochs):
            err = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                # compute loss (for display purpose only)
                err += self.loss(y_train[j], output)

                # backward propagation
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))

Building Neural Networks: Solve XOR

In [None]:

# training data
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])

# network
net = Network()
net.add(FCLayer(2, 3))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(3, 1))
net.add(ActivationLayer(tanh, tanh_prime))

# train
net.use(mse, mse_prime)
net.fit(x_train, y_train, epochs=1000, learning_rate=0.1)

# test
out = net.predict(x_train)
print(out)

epoch 1/1000   error=0.600156
epoch 2/1000   error=0.319042
epoch 3/1000   error=0.298089
epoch 4/1000   error=0.294215
epoch 5/1000   error=0.292620
epoch 6/1000   error=0.291532
epoch 7/1000   error=0.290590
epoch 8/1000   error=0.289695
epoch 9/1000   error=0.288810
epoch 10/1000   error=0.287922
epoch 11/1000   error=0.287024
epoch 12/1000   error=0.286112
epoch 13/1000   error=0.285184
epoch 14/1000   error=0.284239
epoch 15/1000   error=0.283275
epoch 16/1000   error=0.282292
epoch 17/1000   error=0.281288
epoch 18/1000   error=0.280262
epoch 19/1000   error=0.279215
epoch 20/1000   error=0.278145
epoch 21/1000   error=0.277052
epoch 22/1000   error=0.275936
epoch 23/1000   error=0.274797
epoch 24/1000   error=0.273636
epoch 25/1000   error=0.272452
epoch 26/1000   error=0.271246
epoch 27/1000   error=0.270019
epoch 28/1000   error=0.268772
epoch 29/1000   error=0.267507
epoch 30/1000   error=0.266224
epoch 31/1000   error=0.264924
epoch 32/1000   error=0.263611
epoch 33/1000   e

Solve MNIST: MNIST Dataset consists of images of digits from 0 to 9, of shape 28x28x1. The goal is to predict what digit is drawn on a picture.

In [None]:
from keras.datasets import mnist
from keras.utils import to_categorical

# load MNIST from server
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# training data : 60000 samples
# reshape and normalize input data
x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
x_train = x_train.astype('float32')
x_train /= 255
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = to_categorical(y_train)

# same for test data : 10000 samples
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = to_categorical(y_test)

# Network
net = Network()
net.add(FCLayer(28*28, 100))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(100, 50))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(50, 10))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
net.add(ActivationLayer(tanh, tanh_prime))

# train on 1000 samples
# as we didn't implemented mini-batch GD, training will be pretty slow if we update at each iteration on 60000 samples...
net.use(mse, mse_prime)
net.fit(x_train[0:1000], y_train[0:1000], epochs=35, learning_rate=0.1)

# test on 3 samples
out = net.predict(x_test[0:3])
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_test[0:3])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
epoch 1/35   error=0.245495
epoch 2/35   error=0.086990
epoch 3/35   error=0.065384
epoch 4/35   error=0.054654
epoch 5/35   error=0.047554
epoch 6/35   error=0.041959
epoch 7/35   error=0.037461
epoch 8/35   error=0.033554
epoch 9/35   error=0.030408
epoch 10/35   error=0.027812
epoch 11/35   error=0.025673
epoch 12/35   error=0.023606
epoch 13/35   error=0.021920
epoch 14/35   error=0.020332
epoch 15/35   error=0.018969
epoch 16/35   error=0.017734
epoch 17/35   error=0.016599
epoch 18/35   error=0.015526
epoch 19/35   error=0.014649
epoch 20/35   error=0.013755
epoch 21/35   error=0.013003
epoch 22/35   error=0.012389
epoch 23/35   error=0.011767
epoch 24/35   error=0.011314
epoch 25/35   error=0.010786
epoch 26/35   error=0.010369
epoch 27/35   error=0.009982
epoch 28/35   error=0.009459
epoch 29/35   error=0.009144
epoch 30/35   error=0.008809
epoch 31/35   error=0.008575
epoch 32/35   erro

Convolutional neural network

In [None]:
# training data
x_train = [np.random.rand(10,10,1)]
y_train = [np.random.rand(4,4,2)]

# network
net = Network()
net.add(ConvLayer((10,10,1), (3,3), 1))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(ConvLayer((8,8,1), (3,3), 1))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(ConvLayer((6,6,1), (3,3), 2))
net.add(ActivationLayer(tanh, tanh_prime))

# train
net.use(mse, mse_prime)
net.fit(x_train, y_train, epochs=1000, learning_rate=0.3)

# test
out = net.predict(x_train)
print("predicted = ", out)
print("expected = ", y_train)

epoch 1/1000   error=0.454200
epoch 2/1000   error=0.127497
epoch 3/1000   error=0.118073
epoch 4/1000   error=0.112574
epoch 5/1000   error=0.108730
epoch 6/1000   error=0.105919
epoch 7/1000   error=0.103698
epoch 8/1000   error=0.101832
epoch 9/1000   error=0.100210
epoch 10/1000   error=0.098778
epoch 11/1000   error=0.097501
epoch 12/1000   error=0.096354
epoch 13/1000   error=0.095318
epoch 14/1000   error=0.094377
epoch 15/1000   error=0.093518
epoch 16/1000   error=0.092728
epoch 17/1000   error=0.092000
epoch 18/1000   error=0.091326
epoch 19/1000   error=0.090698
epoch 20/1000   error=0.090111
epoch 21/1000   error=0.089560
epoch 22/1000   error=0.089042
epoch 23/1000   error=0.088553
epoch 24/1000   error=0.088089
epoch 25/1000   error=0.087648
epoch 26/1000   error=0.087229
epoch 27/1000   error=0.086828
epoch 28/1000   error=0.086444
epoch 29/1000   error=0.086076
epoch 30/1000   error=0.085722
epoch 31/1000   error=0.085381
epoch 32/1000   error=0.085051
epoch 33/1000   e

MNIST CNN with convolutional layer

In [None]:
from keras.datasets import mnist
from keras.utils import to_categorical

# load MNIST from server
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# training data : 60000 samples
# reshape and normalize input data
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_train = x_train.astype('float32')
x_train /= 255
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = to_categorical(y_train)

# same for test data : 10000 samples
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
x_test = x_test.astype('float32')
x_test /= 255
y_test = to_categorical(y_test)

# Network
net = Network()
net.add(ConvLayer((28, 28, 1), (3, 3), 1))  # input_shape=(28, 28, 1)   ;   output_shape=(26, 26, 1)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FlattenLayer())                     # input_shape=(26, 26, 1)   ;   output_shape=(1, 26*26*1)
net.add(FCLayer(26*26*1, 100))              # input_shape=(1, 26*26*1)  ;   output_shape=(1, 100)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(100, 10))                   # input_shape=(1, 100)      ;   output_shape=(1, 10)
net.add(ActivationLayer(tanh, tanh_prime))

# train on 1000 samples
# as we didn't implemented mini-batch GD, training will be pretty slow if we update at each iteration on 60000 samples...
net.use(mse, mse_prime)
net.fit(x_train[0:1000], y_train[0:1000], epochs=100, learning_rate=0.1)

# test on 3 samples
out = net.predict(x_test[0:3])
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_test[0:3])

epoch 1/100   error=0.426540
epoch 2/100   error=0.383293
epoch 3/100   error=0.307230
epoch 4/100   error=0.255941
epoch 5/100   error=0.222664
epoch 6/100   error=0.190180
epoch 7/100   error=0.168173
epoch 8/100   error=0.154650
epoch 9/100   error=0.143282
epoch 10/100   error=0.134546
epoch 11/100   error=0.126548
epoch 12/100   error=0.119706
epoch 13/100   error=0.114029
epoch 14/100   error=0.107573
epoch 15/100   error=0.100795
epoch 16/100   error=0.097413
epoch 17/100   error=0.098138
epoch 18/100   error=0.091389
epoch 19/100   error=0.089367
epoch 20/100   error=0.088758
epoch 21/100   error=0.084618
epoch 22/100   error=0.085138
epoch 23/100   error=0.080809
epoch 24/100   error=0.077833
epoch 25/100   error=0.076243
epoch 26/100   error=0.073473
epoch 27/100   error=0.072359
epoch 28/100   error=0.072848
epoch 29/100   error=0.070169
epoch 30/100   error=0.067985
epoch 31/100   error=0.068178
epoch 32/100   error=0.066212
epoch 33/100   error=0.064928
epoch 34/100   erro