In [2]:

import numpy as np
import time
import matplotlib.pyplot


In [3]:
# open the CSV file and read its contents into a list
train_file = open("/content/drive/MyDrive/MNIST/train.csv", 'r')
train_list = train_file.readlines()
train_file.close()

In [4]:
# open the CSV file and read its contents into a list
test_file = open("/content/drive/MyDrive/MNIST/test.csv", 'r')
test_list = test_file.readlines()
test_file.close()



In [5]:

class DNN:
  def __init__(self, sizes, epochs, lr):
    # taille des couches , nb d iterations epoch , taux d apprentissage lr
    self.sizes = sizes
    self.epochs = epochs
    self.lr = lr

    # number of nodes in each layer couche
    input_layer=self.sizes[0]
    hidden_1=self.sizes[1]
    hidden_2=self.sizes[2]
    output_layer=self.sizes[3]
     # Les fonctions d'activation utilisées sont la fonction sigmoïde pour les couches cachées et la fonction softmax pour la couche de sortie.
    self.params = {
        'W1':np.random.randn(hidden_1, input_layer) * np.sqrt(1. / hidden_1),
        'W2':np.random.randn(hidden_2, hidden_1) * np.sqrt(1. / hidden_2),
        'W3':np.random.randn(output_layer, hidden_2) * np.sqrt(1. / output_layer)
    }
  def sigmoid(self, x, derivative=False):
      if derivative:
          return (np.exp(-x))/((np.exp(-x)+1)**2)
      return 1/(1 + np.exp(-x))

  def softmax(self, x, derivative=False):
      # Numerically stable with large exponentials
      exps = np.exp(x - x.max())
      if derivative:
          return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
      return exps / np.sum(exps, axis=0)

      # une propagation avant à travers le réseau pour calculer les activations de chaque couche en fonction des poids actuels.
  def forward_pass(self, x_train):
      params = self.params

      # input layer activations becomes sample
      params['A0'] = x_train

      # input layer to hidden layer 1
      params['Z1'] = np.dot(params["W1"], params['A0'])
      params['A1'] = self.sigmoid(params['Z1'])

      # hidden layer 1 to hidden layer 2
      params['Z2'] = np.dot(params["W2"], params['A1'])
      params['A2'] = self.sigmoid(params['Z2'])

      # hidden layer 2 to output layer
      params['Z3'] = np.dot(params["W3"], params['A2'])
      params['A3'] = self.softmax(params['Z3'])

      return params['A3']

#une rétropropagation de l'erreur à travers le réseau pour calculer les changements nécessaires aux poids afin de minimiser l'erreur
  def backward_pass(self, y_train, output):
      '''
          This is the backpropagation algorithm, for calculating the updates
          of the neural network's parameters.

          Note: There is a stability issue that causes warnings. This is
                caused  by the dot and multiply operations on the huge arrays.

                RuntimeWarning: invalid value encountered in true_divide
                RuntimeWarning: overflow encountered in exp
                RuntimeWarning: overflow encountered in square
      '''
      params = self.params
      change_w = {}

      # Calculate W3 update
      error = 2 * (output - y_train) / output.shape[0] * self.softmax(params['Z3'], derivative=True)
      change_w['W3'] = np.outer(error, params['A2'])

      # Calculate W2 update
      error = np.dot(params['W3'].T, error) * self.sigmoid(params['Z2'], derivative=True)
      change_w['W2'] = np.outer(error, params['A1'])

      # Calculate W1 update
      error = np.dot(params['W2'].T, error) * self.sigmoid(params['Z1'], derivative=True)
      change_w['W1'] = np.outer(error, params['A0'])

      return change_w
#mise à jour les poids du réseau en utilisant la règle de descente de gradient stochastique.
  def update_network_parameters(self, changes_to_w):
      '''
          Update network parameters according to update rule from
          Stochastic Gradient Descent.

          θ = θ - η * ∇J(x, y),
              theta θ:            a network parameter (e.g. a weight w)
              eta η:              the learning rate
              gradient ∇J(x, y):  the gradient of the objective function,
                                  i.e. the change for a specific theta θ
      '''

      for key, value in changes_to_w.items():
          self.params[key] -= self.lr * value

#compute_accuracy calcule la précision du réseau sur un ensemble de données de test en effectuant une propagation
# avant et en comparant les prédictions avec les étiquettes réelles.
  def compute_accuracy(self, test_data, output_nodes):
      '''
          This function does a forward pass of x, then checks if the indices
          of the maximum value in the output equals the indices in the label
          y. Then it sums over each prediction and calculates the accuracy.
      '''
      predictions = []

      for x in train_list:
          all_values = x.split(',')
          # scale and shift the inputs
          inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
          # create the target output values (all 0.01, except the desired label which is 0.99)
          targets = np.zeros(output_nodes) + 0.01
          # all_values[0] is the target label for this record
          targets[int(all_values[0])] = 0.99
          output = self.forward_pass(inputs)
          pred = np.argmax(output)
          predictions.append(pred == np.argmax(targets))

      return np.mean(predictions)

  def train(self, train_list, test_list, output_nodes):
      start_time = time.time()
      for iteration in range(self.epochs):
          for x in train_list:
              all_values = x.split(',')
              # scale and shift the inputs
              inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
              # create the target output values (all 0.01, except the desired label which is 0.99)
              targets = np.zeros(output_nodes) + 0.01
              # all_values[0] is the target label for this record
              targets[int(all_values[0])] = 0.99
              output = self.forward_pass(inputs)
              changes_to_w = self.backward_pass(targets, output)
              self.update_network_parameters(changes_to_w)

          accuracy = self.compute_accuracy(test_list, output_nodes)
          print('Epoch: {0}, Time Spent: {1:.2f}s, Accuracy: {2:.2f}%'.format(
              iteration+1, time.time() - start_time, accuracy * 100
          ))


In [6]:
dnn = DNN(sizes=[784, 128, 64, 10], epochs=20, lr=0.001)
dnn.train(train_list, test_list, 10)

Epoch: 1, Time Spent: 48.14s, Accuracy: 27.77%
Epoch: 2, Time Spent: 97.76s, Accuracy: 40.54%
Epoch: 3, Time Spent: 148.57s, Accuracy: 43.24%
Epoch: 4, Time Spent: 195.91s, Accuracy: 44.16%
Epoch: 5, Time Spent: 244.86s, Accuracy: 44.68%
Epoch: 6, Time Spent: 296.25s, Accuracy: 45.43%
Epoch: 7, Time Spent: 344.49s, Accuracy: 46.61%
Epoch: 8, Time Spent: 392.85s, Accuracy: 47.70%
Epoch: 9, Time Spent: 442.35s, Accuracy: 48.65%
Epoch: 10, Time Spent: 493.16s, Accuracy: 49.48%
Epoch: 11, Time Spent: 541.39s, Accuracy: 50.28%
Epoch: 12, Time Spent: 590.58s, Accuracy: 51.08%
Epoch: 13, Time Spent: 641.45s, Accuracy: 51.85%
Epoch: 14, Time Spent: 690.98s, Accuracy: 52.68%
Epoch: 15, Time Spent: 739.42s, Accuracy: 53.52%
Epoch: 16, Time Spent: 788.22s, Accuracy: 54.40%
Epoch: 17, Time Spent: 839.59s, Accuracy: 55.32%
Epoch: 18, Time Spent: 889.23s, Accuracy: 56.36%
Epoch: 19, Time Spent: 938.26s, Accuracy: 57.37%
Epoch: 20, Time Spent: 988.01s, Accuracy: 58.45%


In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
