In [None]:
from random import *
import math
import numpy as np
from matplotlib import pyplot as plt
import keras
from keras.datasets import mnist
from tqdm import tqdm, trange
from sklearn.model_selection import train_test_split

#importer les données
(x_train_total, y_train_total), (x_test, y_test) = mnist.load_data()


x_train_total = x_train_total.reshape(60000, 784)/255
x_test = x_test.reshape(10000, 784)/255

x_train, x_val, y_train, y_val = train_test_split(
    x_train_total, y_train_total, test_size=0.1, shuffle=True
)

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

# y_train_oh = np.array.zeros(size=(60000, 10))
# for i in range(y_train.shape[0]):
#   y_train_oh[i][y_train[i]] = 1

# one-hot encode the labels
y_train_OH = np.eye(10)[y_train]
y_test_OH = np.eye(10)[y_test]
y_val_OH = np.eye(10)[y_val]



In [None]:

#définir le nombre de neurones dans la couche cachée et output
nb_neurons_h = 500
nb_neurons_o = 10

#générer des valeurs aléatoires pour chaque poids de biais
rng = np.random.default_rng()

weights_h = -1 + 2*rng.random(size=(nb_neurons_h, 784))
w_o = -1 + 2*rng.random(size=(nb_neurons_o, nb_neurons_h))


bias_h = -1 + 2*rng.random(size=(nb_neurons_h))
bias_o = -1 + 2*rng.random(size=(nb_neurons_o))



def sigmoid(y):
  return 1 / (1 + math.e**(-y))

def deriveesigmoid(y):
  return math.e**(-y) / (1 + math.e**(-y))**2

def softmax(y):
  exp_y = np.exp(y - np.max(y))
  softmax = exp_y / np.sum(exp_y)
  return softmax

#categorical cross entropy loss
def categorical_cross_entropy(target, z_o):
  return np.sum(-np.log(z_o + 10**-100)*target)



def forwardprop(x, weights_h, bias_h, w_o, bias_o):
  y_h = (x @ weights_h.T) + (bias_h)
  z_h = sigmoid(y_h)
  y_o = (z_h @ w_o.T) + (bias_o)
  z_o = softmax(y_o)
  return y_h, z_h, y_o, z_o

def backprop(y_h, z_h, y_o, z_o, weights_h, w_o, x, target):
  d_y_o = (z_o - target)
  d_bias_o = d_y_o.squeeze()
  d_w_o = d_y_o.T @ z_h
  d_y_h = (d_y_o @ w_o)*deriveesigmoid(y_h)
  d_bias_h = d_y_h.squeeze()
  d_weights_h = d_y_h.T @ x

  return d_weights_h, d_bias_h, d_w_o, d_bias_o

def update(weights_h, bias_h, w_o, bias_o, d_weights_h, d_bias_h, d_w_o, d_bias_o):
  l_r = 0.01
  bias_h -= l_r*d_bias_h
  bias_o -= l_r*d_bias_o
  weights_h -= l_r*d_weights_h
  w_o -= l_r*d_w_o
  return weights_h, bias_h, w_o, bias_o




def train(weights_h, bias_h, w_o, bias_o, x, target):
  nb_it = 5
  loss_list = []
  for i in trange(nb_it):
    loss = 0

    d_bias_h = np.zeros_like(bias_h)
    d_bias_o = np.zeros_like(bias_o)

    d_weights_h = np.zeros_like(weights_h)
    d_w_o = np.zeros_like(w_o)

    sample_indices = np.arange(x_train.shape[0])
    rng.shuffle(sample_indices)    #bagging
    for sample in (sample_indices):
      #j = randint(0, train_data.shape[1] -1)          #true stochastic descent


      current_x = x_train[sample].reshape(1, -1)


      y_h, z_h, y_o, z_o = forwardprop(current_x, weights_h, bias_h, w_o, bias_o)
      d_weights_h, d_bias_h, d_w_o, d_bias_o = backprop(y_h, z_h, y_o, z_o, weights_h, w_o, current_x, y_train_OH[sample])
      weights_h, bias_h, w_o, bias_o = update(weights_h, bias_h, w_o, bias_o, d_weights_h, d_bias_h, d_w_o, d_bias_o)

      loss += categorical_cross_entropy(y_train_OH[sample], z_o.squeeze()) / (x_train.shape[0])

    loss_list.append(loss)
    nb_correct = 0
    val_loss = 0
    val_idx = np.arange(x_val.shape[0])
    for i in val_idx:
      y_h, z_h, y_o, z_o = forwardprop(x_val[i], weights_h, bias_h, w_o, bias_o)
      val_loss += categorical_cross_entropy(y_val_OH[i], z_o.squeeze()) / (x_val.shape[0])


      answer = z_o.argmax()
      if answer == y_val[i]:
        nb_correct += 1

    accuracy = nb_correct / x_val.shape[0]
    print("validation loss", val_loss)
    print("accuracy", accuracy)


  print("train loss",loss,"after",nb_it,"iterations")
  axe_x = np.arange(len(loss_list))
  plt.plot(axe_x, loss_list)
  plt.show()


train(weights_h, bias_h, w_o, bias_o, x_train, y_train)



In [None]:
#partie test
x_display = x_test.reshape(10000, 28, 28)


def test(x_test, y_test):

  nb_correct = 0
  debug = 0
  predictions = np.zeros(shape=(y_test.shape[0]))
  for i in range(x_test.shape[0]):


    _, _, _, z_o = forwardprop(x_test[i], weights_h, bias_h, w_o, bias_o)

    predictions[i] = z_o.argmax()


    if predictions[i] != y_test[i]: #imprime 10 erreurs
      while debug < 10:
        print("")
        print("Attention : erreur")
        print("Prediction: ", predictions[i])
        print("Label: ", y_test[i])
        plt.gray()
        plt.imshow(x_display[i], interpolation='nearest')
        plt.show()
        debug += 1
        break


  accuracy = np.mean(predictions == y_test)
  print("")
  print("Accuracy", accuracy)
  return predictions

predictions = test(x_test, y_test)


#imprime les données test avec la prediction et le label
for i in range(100):

  print("")
  print("Prediction: ", predictions[i])
  print("Label: ", y_test[i])
  plt.gray()
  plt.imshow(x_display[i], interpolation='nearest')
  plt.show()

def validate():
  train(weights_h, bias_h, w_o, bias_o, x_train, y_train)

  for validations in range(20):
    test()


validate()



