In [0]:
import numpy as np

Création de la class couche prenant une entrée et une sortie.


In [0]:
# Base class
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    # computes the output Y of a layer for a given input X
    def forward_propagation(self, input):
        raise NotImplementedError

    # computes dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

Définition de la fonction forward_propagation :
Permetant Comme son nom l'indique, les données d'entrée sont acheminées vers l'avant via le réseau. Chaque couche cachée accepte les données d'entrée, les traite selon la fonction d'activation et passe à la couche successive.

Définition de la fonction backward_propagation :
Méthode pour calculer le gradient de l'erreur pour chaque neurone d'un réseau de neurones, de la dernière couche vers la première.


In [0]:

# inherit from base class Layer
class FCLayer(Layer):
    # input_size = number of input neurons
    # output_size = number of output neurons
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        # dBias = output_error

        # update parameters
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

Création de la class ActivationLayer permettant le traitement des données par une fonction d'activation

In [0]:

# inherit from base class Layer
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    # returns the activated input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    # Returns input_error=dE/dX for a given output_error=dE/dY.
    # learning_rate is not used because there is no "learnable" parameters.
    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

Définition de la fonction de la tangeante hyperbolique et dérivé



In [0]:



# activation function and its derivative
def tanh(x):
    return np.tanh(x);

def tanh_prime(x):
    return 1-np.tanh(x)**2;

Définition de la fonction de la loss function et dérivé
Fonction qui sert de critère pour déterminer la meilleure solution à un problème d'optimisation.

In [0]:


# loss function and its derivative
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2));

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size;

Création de la class réseaux
comprenant une liste de couche 
la fonction d'optimisation
et la fonction d'entraiment

In [0]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)
        result = []

        # run network over all samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)

        return result

    # train the network
    def fit(self, x_train, y_train, epochs, learning_rate):
        # sample dimension first
        samples = len(x_train)

        # training loop
        for i in range(epochs):
            err = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                # compute loss (for display purpose only)
                err += self.loss(y_train[j], output)

                # backward propagation
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            print('epoch %d/%d   error=%f' % (i+1, epochs, err))

entraiement sur deux classes 1 et 2 ayant pour donnée d'entraiement x_train = [[0,0]], [[0,1]], [[1,0]], [[1,1]]]
et comme données test  y_train = [[[0]], [[1]], [[1]], [[0]]])

Creation d'un réseaux comprenant deux entrées et trois sortie 
une deuxième avec trois entrées et une sortie avec la fonction d'activation de la tangeante hyperbolique

et affiche le taux d'erreur de l'apprentissage

In [21]:


# training data
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])

# network
net = Network()
net.add(FCLayer(2, 3))
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(3, 1))
net.add(ActivationLayer(tanh, tanh_prime))

# train
net.use(mse, mse_prime)
net.fit(x_train, y_train, epochs=1000, learning_rate=0.1)

# test
out = net.predict(x_train)
print(out)

epoch 1/1000   error=0.452993
epoch 2/1000   error=0.321128
epoch 3/1000   error=0.300476
epoch 4/1000   error=0.294942
epoch 5/1000   error=0.292791
epoch 6/1000   error=0.291682
epoch 7/1000   error=0.290972
epoch 8/1000   error=0.290443
epoch 9/1000   error=0.290006
epoch 10/1000   error=0.289624
epoch 11/1000   error=0.289277
epoch 12/1000   error=0.288954
epoch 13/1000   error=0.288651
epoch 14/1000   error=0.288362
epoch 15/1000   error=0.288087
epoch 16/1000   error=0.287824
epoch 17/1000   error=0.287571
epoch 18/1000   error=0.287328
epoch 19/1000   error=0.287095
epoch 20/1000   error=0.286870
epoch 21/1000   error=0.286653
epoch 22/1000   error=0.286444
epoch 23/1000   error=0.286243
epoch 24/1000   error=0.286049
epoch 25/1000   error=0.285862
epoch 26/1000   error=0.285682
epoch 27/1000   error=0.285508
epoch 28/1000   error=0.285340
epoch 29/1000   error=0.285178
epoch 30/1000   error=0.285022
epoch 31/1000   error=0.284871
epoch 32/1000   error=0.284726
epoch 33/1000   e

Meme principe que la cellule précédente mais sur la base mnist.
La base de données MNIST de chiffres manuscrits, disponible sur cette page, comprend un ensemble de formation de 60 000 exemples et un ensemble de test de 10 000 exemples. Il s'agit d'un sous-ensemble d'un ensemble plus grand disponible auprès du NIST. Les chiffres ont été normalisés en fonction de la taille et centrés dans une image de taille fixe.

le modèle proposé a un taux de succès de 99%


In [22]:
from keras.datasets import mnist
from keras.utils import np_utils

# load MNIST from server
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# training data : 60000 samples
# reshape and normalize input data
x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
x_train = x_train.astype('float32')
x_train /= 255
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = np_utils.to_categorical(y_train)

# same for test data : 10000 samples
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = np_utils.to_categorical(y_test)

# Network
net = Network()
net.add(FCLayer(28*28, 100))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(100, 50))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(50, 10))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
net.add(ActivationLayer(tanh, tanh_prime))

# train on 1000 samples
# as we didn't implemented mini-batch GD, training will be pretty slow if we update at each iteration on 60000 samples...
net.use(mse, mse_prime)
net.fit(x_train[0:1000], y_train[0:1000], epochs=35, learning_rate=0.1)

# test on 3 samples
out = net.predict(x_test[0:3])
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_test[0:3])

epoch 1/35   error=0.221844
epoch 2/35   error=0.111662
epoch 3/35   error=0.090781
epoch 4/35   error=0.078323
epoch 5/35   error=0.069379
epoch 6/35   error=0.061977
epoch 7/35   error=0.055968
epoch 8/35   error=0.051152
epoch 9/35   error=0.046736
epoch 10/35   error=0.042697
epoch 11/35   error=0.038958
epoch 12/35   error=0.035733
epoch 13/35   error=0.032976
epoch 14/35   error=0.030686
epoch 15/35   error=0.028638
epoch 16/35   error=0.026704
epoch 17/35   error=0.024822
epoch 18/35   error=0.023091
epoch 19/35   error=0.021539
epoch 20/35   error=0.020102
epoch 21/35   error=0.018829
epoch 22/35   error=0.017755
epoch 23/35   error=0.016813
epoch 24/35   error=0.016036
epoch 25/35   error=0.015348
epoch 26/35   error=0.014703
epoch 27/35   error=0.014078
epoch 28/35   error=0.013535
epoch 29/35   error=0.013044
epoch 30/35   error=0.012565
epoch 31/35   error=0.012086
epoch 32/35   error=0.011629
epoch 33/35   error=0.011206
epoch 34/35   error=0.010798
epoch 35/35   error=0.0