In [None]:
## Run MLP with 2 hidden layers without Bayesian updating 

In [1]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import scale

import theano
floatX = theano.config.floatX 
import theano.tensor as T 
import pymc3 as pm

import matplotlib.pyplot as plt 
from pipetools import pipe # similar to pipe operator in R 

In [2]:
# Download MNIST data from mldata.org

import tempfile
mnist_home = tempfile.mkdtemp()
mnist = fetch_mldata("MNIST original", data_home = mnist_home)

print(mnist.data.shape) # 70,000 observations with 28*28 pixels 
print(mnist.target.shape) # 70,000 targets 

(70000, 784)
(70000,)


In [201]:
# Divide the data into X matrix and Y matrix 
def get_mnist_X_Y(mnist):
    X = mnist.data 
    Y = mnist.target
    k = len(np.unique(Y)) # the number of categories, which is 10 
    n = X.shape[0] # the number of observations 
    
    X = (X - X.min())  / (X.max() - X.min()) # scaling data
    
    print("Number of images: ", n)
    return X, Y

X, Y = get_mnist_X_Y(mnist)
print("X shape is {} \n Y shape is {}".format(X.shape, Y.shape))

Number of images:  70000
X shape is (70000, 784) 
 Y shape is (70000,)


In [202]:
# Split the data into training and testing sets for both X and Y matrices

X_tr, X_ts, Y_tr, Y_ts = train_test_split(X, Y, test_size=0.1, random_state=1)
print("X_tr shape is {} \n Y_tr shape is {}".format(X_tr.shape, Y_tr.shape))
print("X_ts shape is {} \n Y_ts shape is {}".format(X_ts.shape, Y_ts.shape))

X_tr shape is (63000, 784) 
 Y_tr shape is (63000,)
X_ts shape is (7000, 784) 
 Y_ts shape is (7000,)


In [None]:
"""
We will run MLP classifier with two layers, 100 neurons each,
without Bayesian update. 
Code based on http://scikit-learn.org/stable/modules/neural_networks_supervised.html 
"""
import webbrowser

url = "http://scikit-learn.org/stable/modules/neural_networks_supervised.html"
# Open url in a new tab, if a browser window is already open.
webbrowser.open_new_tab(url)

In [203]:
from sklearn.neural_network import MLPClassifier

hidden1, hidden2 = 100, 100
mlp = MLPClassifier(hidden_layer_sizes=(hidden1, hidden2))
mlp.fit(X_tr, Y_tr)
predictions = mlp.predict(X_ts)

from sklearn.metrics import classification_report,confusion_matrix

print(confusion_matrix(Y_ts, predictions))
print(classification_report(Y_ts, predictions))

[[652   1   1   0   1   3   4   0   3   1]
 [  0 781   2   0   1   0   0   2   1   1]
 [  1   4 715   5   1   2   2   5   3   0]
 [  1   1   2 667   0   6   0   7   6   1]
 [  0   1   1   0 646   0   2   2   0   7]
 [  1   0   2   2   1 603   1   0   1   1]
 [  2   1   0   0   4   4 696   0   0   0]
 [  0   1   4   1   0   0   0 769   1   1]
 [  1   4   0   3   1   7   2   2 650   7]
 [  3   0   0   5   7   0   0   6   1 663]]
             precision    recall  f1-score   support

        0.0       0.99      0.98      0.98       666
        1.0       0.98      0.99      0.99       788
        2.0       0.98      0.97      0.98       738
        3.0       0.98      0.97      0.97       691
        4.0       0.98      0.98      0.98       659
        5.0       0.96      0.99      0.97       612
        6.0       0.98      0.98      0.98       707
        7.0       0.97      0.99      0.98       777
        8.0       0.98      0.96      0.97       677
        9.0       0.97      0.97      

The MLP classifier had an average precision of 98%, which is higher than the accuracy of the Bayesian result. Like the Bayesian result, it did relatively poorly on predicting the 9's, but it did better than Bayesian on predicting the 2's. 

In [171]:
mlp.get_params() # parameters of the MLP classifier 

{'activation': 'relu',
 'alpha': 0.0001,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (100, 100),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_iter': 200,
 'momentum': 0.9,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [172]:
[weight.shape for weight in mlp.coefs_] # inspect the shape of w0, w1, w2

[(784, 100), (100, 100), (100, 10)]

In [173]:
[bias.shape for bias in mlp.intercepts_] # inspect the shape of b0, b1, b2

[(100,), (100,), (10,)]