# This is an implementation of a Multilyer Perceptron using Scikit-learn

It uses the whole mnist dataset to try to learn a model that accurately predicts class labels for handwritten digits.


In [1]:
import numpy as np
from scipy.ndimage import convolve
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.externals import joblib
import os.path
import scipy as sp
import pylab as pl
%matplotlib inline

In [3]:
PATH = 'mlp_model.pkl'

#only executes if this is run as the 'main'-program but not when it is imported somewhere else
if __name__ == '__main__':
    print('Fetching and loading MNIST data')
    
    #sklearn package that loades data from a file or from the web
    mnist = fetch_mldata('MNIST original')
    
    #Assigning data and labels to X and y
    X, y = mnist.data, mnist.target
    
    #sklearn-package that splits a dataset into test- and trainsets; the 'random_state' parameter is the seed used by the
    #random-number-generator that is used to randomize which datapoints are put into test- and trainset, respectively
    X_train, X_test, y_train, y_test = train_test_split(X / 255., y, test_size=0.25, random_state=0)

    print('Got MNIST with %d training- and %d test samples' % (len(y_train), len(y_test)))
    print('Digit distribution in whole dataset:', np.bincount(y.astype('int64')))

    clf = None
    
    #loads model from file if the path exists
    if os.path.exists(PATH):
        print('Loading model from file.')
        clf = joblib.load(PATH).best_estimator_
    
    else:
        print('Training model.')
        params = {'hidden_layer_sizes': [(256,), (512,), (128, 256, 128,)]}
        
        #using the MLP-model from sklearn; verbose=10 means that progress messages are printed; 
        mlp = MLPClassifier(verbose=10, learning_rate='adaptive')
        
        #implements a grid search over different network-sizes; this is currently commented out, because it put too big
        #a strain on the author's machine's memory; instead clf ist just set equal to mlp this also lead to the
        #commenting out of three more lines further down
        #clf = GridSearchCV(mlp, params, verbose=10, n_jobs=-1, cv=5)
        clf = mlp

        clf.fit(X_train, y_train)
        #print('Finished with grid search with best mean cross-validated score:', clf.best_score_)
        #print('Best params appeared to be', clf.best_params_)
        joblib.dump(clf, PATH)
        #clf = clf.best_estimator_

Fetching and loading MNIST data
Got MNIST with 52500 training- and 17500 test samples
Digit distribution in whole dataset: [6903 7877 6990 7141 6824 6313 6876 7293 6825 6958]
Training model.
Iteration 1, loss = 0.44156265
Iteration 2, loss = 0.21363238
Iteration 3, loss = 0.16140911
Iteration 4, loss = 0.13016280
Iteration 5, loss = 0.10667933
Iteration 6, loss = 0.09193744
Iteration 7, loss = 0.07891474
Iteration 8, loss = 0.06828535
Iteration 9, loss = 0.06039063
Iteration 10, loss = 0.05365600
Iteration 11, loss = 0.04711893
Iteration 12, loss = 0.04117964
Iteration 13, loss = 0.03707792
Iteration 14, loss = 0.03269249
Iteration 15, loss = 0.02882466
Iteration 16, loss = 0.02595738
Iteration 17, loss = 0.02373293
Iteration 18, loss = 0.02051003
Iteration 19, loss = 0.01810366
Iteration 20, loss = 0.01622967
Iteration 21, loss = 0.01421112
Iteration 22, loss = 0.01333426
Iteration 23, loss = 0.01180498
Iteration 24, loss = 0.01028507
Iteration 25, loss = 0.00937378
Iteration 26, loss

In [4]:
print('Training accuracy: ', clf.score(X_train, y_train))
print('Test accuracy: ', clf.score(X_test, y_test))

Training accuracy:  0.9999047619047619
Test accuracy:  0.9766857142857143
