# Réseau de neurones MNIST en utilisant la base de Kaggle
(Conversion en .csv des bases MNIST de Yann Lecun) 

In [None]:
import numpy as np
import random
import pandas as pd
import matplotlib as plt

data = pd.read_csv('train.csv')

In [None]:
data = np.array(data)
m, n = data.shape

data_train = data[0:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
X_train = X_train / 255.
_,m_train = X_train.shape

In [None]:
def init_params():
    W1 = np.random.rand(100, 784) - 0.5
    b1 = np.random.rand(100, 1) - 0.5
    W2 = np.random.rand(10, 100) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2

def ReLU(Z):
    return np.maximum(Z, 0)

def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A
    
def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

def ReLU_deriv(Z):
    return Z > 0

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1    
    W2 = W2 - alpha * dW2  
    b2 = b2 - alpha * db2    
    return W1, b1, W2, b2

In [None]:
def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params()
    file = open("neuralNetIterations.dat", "w")
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        predictions = get_predictions(A2)
        acc=get_accuracy(predictions, Y)
        file.write(str(i)+"\t"+str(100-acc*100)+"\n")
        if i % 10 == 0:
            print("Iteration: ", i)
            print(acc)
    file.close()
    return W1, b1, W2, b2

In [None]:

W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.10, 500)

On obtient environ 90% de précision sur 500 itérations dans la base de test
J'ai choisi de limiter les itérations a 500 et ne pas me baser sur un taux d'erreur afin de ne pas perdre beaucoup de temps entre les exécutions complètes, compte tenu de la faible puissance de notre machine.

In [None]:
def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions

def test_prediction(totalError,index, W1, b1, W2, b2):
    current_image = X_train[:, index, None]
    prediction = make_predictions(current_image, W1, b1, W2, b2)
    label = Y_train[index]
    if label != prediction:
        totalError=totalError+1
    return totalError

Calcul de l'erreur sur 100 éléments pris dans la base de test au hasard.

In [None]:
totalError = 0

for i in range(100):
    totalError = test_prediction(totalError,i , W1, b1, W2, b2)
print("totalError: ", totalError)

PLOT de la descente du gradiant de l'apprentissage.
Selon les valeurs écrites dans le fichier "neuralNetIterations.dat"

In [None]:
neuralNetIterations = pd.read_csv('neuralNetIterations.dat',sep='\t',header=None)
neuralNetIterations = pd.DataFrame(neuralNetIterations)

x = neuralNetIterations[0]
y = neuralNetIterations[1]
plt.plot(x, y)
plt.xlabel('Iterations')
plt.ylabel('Error rate')
plt.show()

# plt.plotfile('neuralNetIterations.dat', delimiter='\t', cols=(0, 1), names=('Iterations', 'Accuracy'), linewidth=0.3) -- deprecated

Utilisation du jeu de données de test pour s'assurer de la concordance de nos données.

In [None]:
test = pd.read_csv('test.csv')
test = np.array(test)
a,z = test.shape

data_test = test[1:2000].T
X_test = data_test[0:z]
X_test = X_test / 255
randomList = random.sample(range(0, 2000), 100)
for i in randomList:
    current_image = X_test[:, i, None]
    prediction = make_predictions(current_image, W1, b1, W2, b2)
    current_image = current_image.reshape((28, 28)) * 255
    plt.gray()
    plt.imshow(current_image, interpolation='nearest')
    plt.show()
    print("Prediction pour cette image: ", prediction)