<h1> MNIST digits classification using a FFNN

Using FFNN with: 1 input layer, 1 hidden layer of 20 neurons, 1 output layer of 10 neurons. The data is the MNIST dataset in a txt file.

Input layer: (60 000 x 784) with float values between 0 and 1

Hidden layer: ((784 + 1) x 20), +1 is for the bias, with Sigmoid activation

Output layer: (60 000 x 10), with Softmax activation

For now the best model gave me 92,55% on the test, limited to 10000 epochs, in 36min47 (slow Intel i5)

In [None]:
import numpy as np
import pygame
import matplotlib.pyplot as plt

<h2> #0 Importing the data

In [None]:
train_labels = []
train_images = []
test_labels = []
test_images = []

with open('data/MNIST_train.txt', 'r') as f:
    for line in f:
        values = list(map(int, line.strip().split(',')))
        train_labels.append(values[0])
        train_images.append(values[1:])

with open('data/MNIST_test.txt', 'r') as f:
    for line in f:
        values = list(map(int, line.strip().split(',')))
        test_labels.append(values[0])
        test_images.append(values[1:])

<h2> #1 Initializing the parameters

In [None]:
I = 60000  #nb of training samples, might be equal to datasize if the X is limited in train()
N = 784    #features
K = 20     #nb of hidden neurons
J = 10     #nb of outputs

I_test = 10000     #nb of testing samples

error = 1E5        #error initialization
thresh = 1E-2      #error threshold
alpha1 = 1E-5      #learning rate for the hidden layer
alpha2 = 1E-5      #learning rate for the output layer
max_itera = 1E4    #maximum iterations for the while loop

<h2> #2 Initializing the weights and preparing the data

In [None]:
V = np.random.randn(N+1,K)*0.01  #(N+1) x K  -> weights for the hidden layer
W = np.random.randn(K+1,J)*0.01  #(K+1) x J  -> weights for the output layer

#X_train with (0,1) values
X = np.array(train_images)/255

#y_train with a (10,4) shape instead of (10,1)
y_ = np.array(train_labels)
y = np.zeros((I,J))
for i in range(I):
    y[i][y_[i]]=1
    
#X_test with (0,1) values
X_test =  np.array(test_images)/255

#y_test with a (10,4) shape instead of (10,1)
y_test_ = np.array(test_labels)
y_test = np.zeros((I_test,J))
for i in range(I_test):
    y_test[i][y_test_[i]]=1

<h2> #3 Functions, Forward Propagation and Backpropagation

In [None]:
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)


def cross_entropy_loss(y_pred, y_true):
    return -np.sum(y_true * np.log(y_pred + 1e-9)) / len(y_true)


def FWP(X_b, V, W, y, datasize):
    #Activated output of the hidden layer
    X_bb = np.dot(X_b,V)         #(I x K) 
    #F  = ReLU(X_bb)              #(I x K) 
    F = 1/(1+np.exp(-X_bb))
    
    #Adding the bias
    F0 = np.ones((datasize,1))
    F_b = np.concatenate((F0,F), axis=1)
    
    #Activated output of the output layer
    F_bb = np.dot(F_b,W)         #(I x J)
    G = softmax(F_bb)            #(I x J)
    
    #Error computation
    error = cross_entropy_loss(G, y)
    
    return X_bb, F, F_b, F_bb, G, error
    

def BP(G, X_b, F, F_b, V, W, y):
    #More explanation in the ReadMe
    #Gradient descent for the W weights
    tmp1 = (G-y)*G*(1-G)
    dEdW = np.dot(F_b.T,tmp1)
    W -= alpha1*dEdW
    
    #Gradient descent for the V weights
    tmp2 = np.dot(tmp1,W[1:].T) *  F * (1-F)
    dEdV = np.dot(X_b.T, tmp2)
    V -= alpha2*dEdV
    
    return V, W

#Comparing estimation and reality
def howManyLinesAreTheSame(Y_pred, Y):
    same_lines = Y_pred == Y
    nb_of_same_lines = np.sum(same_lines)
    return nb_of_same_lines

# Training the data
def train(X_full, y_full, V, W, datasize=I):
    
    X = X_full[:datasize]
    y = y_full[:datasize]
    
    #Adding the bias
    X0 = np.ones((datasize,1))
    X_b = np.concatenate((X0,X), axis = 1)
    
    #First FWP before looping
    X_bb, F, F_b, F_bb, G, error = FWP(X_b, V, W, y, datasize)
    
    #Print some parameters
    print('Datasize:', datasize, 'Tresh:', thresh, 'Max itera:',max_itera, 'Learning rates: (', alpha1,',', alpha2,')')

    itera = 0
    while(thresh < error):
        
        V, W = BP(G, X_b, F, F_b, V, W, y)
        X_bb, F, F_b, F_bb, G, error = FWP(X_b, V, W, y, datasize)
        
        if(itera%100==0):
            print('#',itera, 'Error: ',error)  
        
        if(itera>max_itera):
            break
        itera+=1
       
    #Finish and estimations
    print("Entrainement terminé")
    
    return G, V, W
    
def test(X_test, V, W, I_test):
    #Forced to add a bias to get the right shape to FWP it...
    X0 = np.ones((I_test,1))
    X_b = np.concatenate((X0,X_test), axis = 1)
    
    #One FWP to get the predictions
    X_bb, F, F_b, F_bb, G, error = FWP(X_b, V, W, y_test, I_test)
        
    return G

<h2> #4 Training the model

In [None]:
datasize_ = 60000
G_train, V, W = train(X, y, V, W, datasize=datasize_)

#Get int values of the predictions
G_train_int = np.argmax(G_train, axis=1)
print(f'Correct estimation: {howManyLinesAreTheSame(G_train_int, y_)}/{datasize_}', '(',howManyLinesAreTheSame(G_train_int, y_)*100/datasize_,'%)')

Saving the weights

In [None]:
#np.save('best_weights/V_weights',V)
#np.save('best_weights/W_weights',W)

Loading the best weights (for now)

In [None]:
#V = np.load('best_weights/V_weights.npy')
#W = np.load('best_weights/W_weights.npy')

<h2> #5 Testing the model

In [None]:
G_test = test(X_test, V, W, I_test)

#Get int values of the predictions
G_test_int = np.argmax(G_test, axis=1)
print(G_test_int)
print(f'Correct prediction: {howManyLinesAreTheSame(G_test_int, y_test_)}/{I_test}', '(',howManyLinesAreTheSame(G_test_int, y_test_)*100/I_test,'%)')

<h2> #6 Playing with the model

In [None]:
import pygame

#Grid parameters
grid_size = 28
cell_size = 20  #Size of each cell in pixels
width, height = grid_size * cell_size + 200, grid_size * cell_size + 50  # +200 to display prediction

#Colors
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
GRAY = (200, 200, 200)

#Initialize pygame
pygame.init()
screen = pygame.display.set_mode((width, height))
pygame.display.set_caption("Draw a digit")

font = pygame.font.Font(None, 36)

#Initialize the grid matrix (28x28) filled with zeros
grid = np.zeros((grid_size, grid_size), dtype=np.float32)

#Fill only the grid area with black
screen.fill(WHITE)
pygame.draw.rect(screen, BLACK, (0, 0, grid_size * cell_size, grid_size * cell_size))

#Draws the grid lines on the screen
def draw_grid():
    for x in range(0, grid_size * cell_size, cell_size):
        pygame.draw.line(screen, WHITE, (x, 0), (x, grid_size * cell_size))
    for y in range(0, grid_size * cell_size, cell_size):
        pygame.draw.line(screen, WHITE, (0, y), (grid_size * cell_size, y))

#Updates the grid when the user draws, applying a diffusion effect
def update_grid(pos):
    x, y = pos[0] // cell_size, pos[1] // cell_size
    if y < grid_size and x < grid_size:
        for dy in range(-1, 2):
            for dx in range(-1, 2):
                nx, ny = x + dx, y + dy
                if 0 <= nx < grid_size and 0 <= ny < grid_size:
                    intensity = max(0, 1.0 - (abs(dx) + abs(dy)) * 0.4)  #Gradient effect
                    grid[ny, nx] = max(grid[ny, nx], intensity)
        
        # edraw the grid with updated intensities
        for i in range(grid_size):
            for j in range(grid_size):
                color_intensity = int(grid[i, j] * 255)
                pygame.draw.rect(screen, (color_intensity, color_intensity, color_intensity),
                                 (j * cell_size, i * cell_size, cell_size, cell_size))

#Draws the erase button
def draw_button():
    pygame.draw.rect(screen, GRAY, (grid_size * cell_size // 4, grid_size * cell_size + 10, grid_size * cell_size // 2, 30))
    text = font.render("Erase", True, BLACK)
    screen.blit(text, (grid_size * cell_size // 2 - text.get_width() // 2, grid_size * cell_size + 15))

#Clears the grid and resets it to black
def clear_grid():
    global grid
    grid = np.zeros((grid_size, grid_size), dtype=np.float32)
    pygame.draw.rect(screen, BLACK, (0, 0, grid_size * cell_size, grid_size * cell_size))
    draw_button()

drawing = True
while drawing:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            drawing = False
        elif event.type == pygame.MOUSEBUTTONDOWN:
            if grid_size * cell_size + 10 <= event.pos[1] <= grid_size * cell_size + 40:
                clear_grid()
        elif pygame.mouse.get_pressed()[0]:  # Left mouse button pressed
            update_grid(pygame.mouse.get_pos())
    
    #draw_grid()
    draw_button()
    
    # Normalize the grid values
    normalized_grid = grid / np.max(grid) if np.max(grid) > 0 else grid
    flattened_grid = normalized_grid.flatten().reshape(1, 784)  # Format (1, 784)
    
    # Run digit recognition model 
    G_dessin = test(flattened_grid, V, W, 1)
    max_index = np.argmax(G_dessin)  # Get the highest predicted digit
    
    # Display prediction
    pygame.draw.rect(screen, WHITE, (grid_size * cell_size + 10, 10, 180, 50))  # Clear previous prediction
    text = font.render(f"Prediction: {max_index}", True, BLACK)
    screen.blit(text, (grid_size * cell_size + 20, 20))
    
    pygame.display.flip()

pygame.quit()