In [1]:
# This code is just the same as mnist_training.py (this is just the jupyter notebook version)
# This code is based from the following:
# https://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_logistic_regression_mnist.html
# Author: Arthur Mensch <arthur.mensch@m4x.org>
# License: BSD 3 clause
#
# https://github.com/Fedzbar/deepfedz/blob/master/mnist.py
# Author: Federico Barbero <fb548@cam.ac.uk>
# License: MIT License

import pickle
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.utils import check_random_state

from bbnet.nn import NeuralNet
from bbnet.layers import Linear, Tanh
from bbnet.train import train
from bbnet.data import MiniBatchStochastic
from bbnet.optimizers import GD

# Specify number of examples for training and testing
train_samples = 10000
test_samples = 10000

In [None]:
# Load data from https://www.openml.org/d/554
print("Fetching mnist_784 data...")
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

In [None]:
# Randomize and reshape data before taking training and test examples
print("Randomizing and reshaping data...")
random_state = check_random_state(0)
permutation = random_state.permutation(X.shape[0])
X = X.iloc[permutation].values
y = y.iloc[permutation].values
X = X.reshape((X.shape[0], -1))

# Split data
print("Splitting data...")
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=train_samples, test_size=test_samples)

In [None]:
# Labels come as ['1', '5', ...], we want to convert them to a one hot matrix structure
# Like so: '1' => [0, 1, 0, 0, 0, 0, 0, 0, 0, 0] '5' => [0, 0, 0, 0, 0, 1, 0, 0, 0, 0] and so on
def string_array_to_one_hot_matrix(to_convert: np.ndarray) -> np.ndarray:
    # matrix size: number of labels * 10 options (numbers 0 - 9)
    matrix = np.zeros((to_convert.size, 10))
    matrix[np.arange(to_convert.size), to_convert.astype(int)] = 1
    return matrix

print("Converting labels to one hot matrix...")
y_train = string_array_to_one_hot_matrix(y_train)
y_test = string_array_to_one_hot_matrix(y_test)

In [None]:
print("Initializing neural network...")
net = NeuralNet([
  Linear(input_size=784, output_size=300),
  Tanh(),
  Linear(input_size=300, output_size=100),
  Tanh(),
  Linear(input_size=100, output_size=10),
  Tanh()
])

In [12]:
print("Training neural network...")
train(inputs=X_train, targets=y_train, net=net, iterator=MiniBatchStochastic(batch_size=64), num_epochs=6000, optimizer=GD(lr=0.01))
print("Training complete!")

Training neural network...
0 4645.854973271896
1 4599.947320832474
2 4613.920004253435
3 4593.5835818624155
4 4631.291353717735
5 4579.055288873635
6 4634.752964766984
7 4596.749974035722
8 4609.899496225918
9 4670.416358912554
10 4657.857988147891
11 4618.892025029892
12 4601.357872342263
13 4613.367674516733
14 4647.268831868311
15 4585.8295663141635
16 4600.5818550972845
17 4622.611840763791
18 4640.442980367452
19 4651.0772445228995
20 4613.51754899677
21 4593.759173337833
22 4613.763049716428
23 4585.908143393059
24 4552.032091468618
25 4629.683858057002
26 4610.200021434561
27 4586.9473400606585
28 4617.009311759378
29 4622.2997516416945
30 4580.116732889989
31 4655.780224417459
32 4576.240056671717
33 4576.386228281753
34 4590.068389871436
35 4579.8233697412425
36 4647.096512020398
37 4591.8061418295165
38 4582.287354768376
39 4614.675707033822
40 4604.376304943294
41 4576.039449309358
42 4571.246194578912
43 4602.756853346818
44 4554.964015333305
45 4604.973945271316
46 4568.15

In [15]:
# Our neural network's predictions come as "probabilities" or "confidence levels" with 
# values ranging from -1 to 1 (since last activation layer is Tanh). An example output is:
# [-0.02299153, 0.01232627, 0.21323621, 0.09431585, -0.02633155, 0.07625834, -0.07023935, -0.08304931, 0.61810449, 0.03938915]
# This means that the prediction of the neural network is 8 since it has the highest value (0.61810449)
# To test our network's accuracy, we need to convert it to like this:
# [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
# Since this is how we formatted our test labels
def get_max_of_matrix_per_row(matrix: np.ndarray) -> np.ndarray:
    max_matrix = np.zeros_like(matrix)
    max_matrix[np.arange(len(matrix)), matrix.argmax(1)] = 1
    return max_matrix

print("Model Evaluation:")
y_pred = net.forward(X_test)
y_pred = get_max_of_matrix_per_row(y_pred)
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)

Model Evaluation:
0.7264


In [14]:
print("Saving model...")
pickle.dump(net, open('models/mnist_net.p', 'wb'))
print("Saving complete!")

Saving model...
Saving complete!
