In [1]:
# This code is just the same as mnist_training.py (this is just the jupyter notebook version)
# This code is based from the following:
# https://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_logistic_regression_mnist.html
# Author: Arthur Mensch <arthur.mensch@m4x.org>
# License: BSD 3 clause
#
# https://github.com/Fedzbar/deepfedz/blob/master/mnist.py
# Author: Federico Barbero <fb548@cam.ac.uk>
# License: MIT License

import pickle
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.utils import check_random_state

from bbnet.nn import NeuralNet
from bbnet.layers import Linear, Tanh
from bbnet.train import train
from bbnet.data import MiniBatchStochastic
from bbnet.optimizers import GD

# Specify number of examples for training and testing
train_samples = 10000
test_samples = 10000

In [2]:
# Load data from https://www.openml.org/d/554
print("Fetching mnist_784 data...")
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

Fetching mnist_784 data...


In [7]:
# Randomize and reshape data before taking training and test examples
print("Randomizing and reshaping data...")
random_state = check_random_state(0)
permutation = random_state.permutation(X.shape[0])
X = X.iloc[permutation].values
y = y.iloc[permutation].values
# Reshape from 28x28 matrix to list of 784 values per example
X = X.reshape((X.shape[0], -1))

# Scale data to range from 0.01 to 1.0
X = (X / 255.0 * 0.99) + 0.01

# Split data
print("Splitting data...")
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=train_samples, test_size=test_samples)

Randomizing and reshaping data...
Splitting data...


In [9]:
# Labels come as ['1', '5', ...], we want to convert them to a one hot matrix structure
# Like so: '1' => [0, 1, 0, 0, 0, 0, 0, 0, 0, 0] '5' => [0, 0, 0, 0, 0, 1, 0, 0, 0, 0] and so on
def string_array_to_one_hot_matrix(to_convert: np.ndarray) -> np.ndarray:
    # matrix size: number of labels * 10 options (numbers 0 - 9)
    matrix = np.zeros((to_convert.size, 10))
    matrix[np.arange(to_convert.size), to_convert.astype(int)] = 1
    return matrix

print("Converting labels to one hot matrix...")
y_train = string_array_to_one_hot_matrix(y_train)
y_test = string_array_to_one_hot_matrix(y_test)

Converting labels to one hot matrix...


In [10]:
print("Initializing neural network...")
net = NeuralNet([
  Linear(input_size=784, output_size=300),
  Tanh(),
  Linear(input_size=300, output_size=100),
  Tanh(),
  Linear(input_size=100, output_size=10),
  Tanh()
])

Initializing neural network...


In [11]:
print("Training neural network...")
train(inputs=X_train, targets=y_train, net=net, iterator=MiniBatchStochastic(batch_size=64), num_epochs=10000, optimizer=GD(lr=0.01))
print("Training complete!")

Training neural network...
0 101803.12687269434
1 98437.88583788443
2 96178.56672386342
3 94265.8820731227
4 92437.58459721357
5 90824.66548993443
6 89295.45017323513
7 87869.89358514156
8 86814.28768219467
9 85877.87224286408
10 85076.75863236052
11 84257.45965181422
12 83676.43281722542
13 83024.88874979285
14 82534.6686095868
15 82044.57666490419
16 81546.84563194182
17 81141.73966761968
18 80782.31142414967
19 80396.02489402653
20 80088.48384864033
21 79747.96285958802
22 79459.80083756454
23 79208.76652052034
24 78946.16999052941
25 78682.87341656217
26 78442.1838587374
27 78196.0887222518
28 78022.35172577492
29 77811.67216690414
30 77606.92185247502
31 77430.34793290068
32 77253.89637318501
33 77143.61712246708
34 76961.76675344276
35 76833.1283788393
36 76630.35831063964
37 76519.55144248428
38 76397.1163745567
39 76225.21399800823
40 76074.00324008954
41 75960.21552463177
42 75873.29563290588
43 75723.04394427076
44 75625.31436353829
45 75475.11156368775
46 75370.5787009045
47

In [12]:
# Our neural network's predictions come as "probabilities" or "confidence levels" with 
# values ranging from -1 to 1 (since last activation layer is Tanh). An example output is:
# [-0.02299153, 0.01232627, 0.21323621, 0.09431585, -0.02633155, 0.07625834, -0.07023935, -0.08304931, 0.61810449, 0.03938915]
# This means that the prediction of the neural network is 8 since it has the highest value (0.61810449)
# To test our network's accuracy, we need to convert it to like this:
# [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
# Since this is how we formatted our test labels
def get_max_of_matrix_per_row(matrix: np.ndarray) -> np.ndarray:
    max_matrix = np.zeros_like(matrix)
    max_matrix[np.arange(len(matrix)), matrix.argmax(1)] = 1
    return max_matrix

print("Model Evaluation:")
y_pred = net.forward(X_test)
y_pred = get_max_of_matrix_per_row(y_pred)
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)

Model Evaluation:
0.8131


In [13]:
print("Saving model...")
pickle.dump(net, open('models/mnist_net.p', 'wb'))
print("Saving complete!")

Saving model...
Saving complete!
