In [1]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV

In [2]:
mnist = load_digits()
mnist

{'data': array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
        [ 0.,  0.,  0., ..., 10.,  0.,  0.],
        [ 0.,  0.,  0., ..., 16.,  9.,  0.],
        ...,
        [ 0.,  0.,  1., ...,  6.,  0.,  0.],
        [ 0.,  0.,  2., ..., 12.,  0.,  0.],
        [ 0.,  0., 10., ..., 12.,  1.,  0.]]),
 'target': array([0, 1, 2, ..., 8, 9, 8]),
 'frame': None,
 'feature_names': ['pixel_0_0',
  'pixel_0_1',
  'pixel_0_2',
  'pixel_0_3',
  'pixel_0_4',
  'pixel_0_5',
  'pixel_0_6',
  'pixel_0_7',
  'pixel_1_0',
  'pixel_1_1',
  'pixel_1_2',
  'pixel_1_3',
  'pixel_1_4',
  'pixel_1_5',
  'pixel_1_6',
  'pixel_1_7',
  'pixel_2_0',
  'pixel_2_1',
  'pixel_2_2',
  'pixel_2_3',
  'pixel_2_4',
  'pixel_2_5',
  'pixel_2_6',
  'pixel_2_7',
  'pixel_3_0',
  'pixel_3_1',
  'pixel_3_2',
  'pixel_3_3',
  'pixel_3_4',
  'pixel_3_5',
  'pixel_3_6',
  'pixel_3_7',
  'pixel_4_0',
  'pixel_4_1',
  'pixel_4_2',
  'pixel_4_3',
  'pixel_4_4',
  'pixel_4_5',
  'pixel_4_6',
  'pixel_4_7',
  'pixel_5_0',
  'pixel_5_1',
 

In [3]:
X, y = mnist['data'], mnist['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X.shape,y.shape)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

(1797, 64) (1797,)


((1437, 64), (360, 64), (1437,), (360,))

### Default multilayer perceptron classifier

In [4]:
mlp_clf = MLPClassifier(early_stopping=True).fit(X_train, y_train)
y_pred = mlp_clf.predict(X_test)
y_pred 

array([6, 9, 3, 7, 2, 2, 5, 2, 5, 2, 1, 9, 4, 0, 4, 2, 3, 7, 8, 8, 4, 3,
       9, 7, 5, 6, 3, 5, 6, 3, 4, 9, 1, 4, 4, 6, 9, 4, 7, 6, 6, 9, 1, 3,
       6, 1, 3, 0, 6, 5, 5, 1, 9, 5, 6, 0, 9, 0, 0, 1, 0, 4, 5, 2, 4, 5,
       7, 0, 7, 5, 9, 9, 5, 4, 7, 0, 4, 5, 5, 9, 9, 0, 2, 3, 8, 0, 6, 4,
       4, 9, 1, 2, 8, 3, 5, 2, 9, 0, 4, 4, 4, 3, 5, 3, 1, 3, 5, 9, 4, 2,
       7, 7, 4, 4, 1, 9, 2, 7, 8, 7, 2, 6, 9, 4, 0, 7, 2, 7, 5, 8, 7, 5,
       7, 9, 0, 6, 6, 4, 2, 8, 0, 9, 4, 6, 9, 9, 6, 9, 0, 5, 5, 6, 6, 0,
       6, 4, 3, 9, 3, 8, 7, 2, 9, 0, 4, 5, 3, 6, 5, 9, 9, 8, 4, 2, 1, 3,
       7, 7, 2, 2, 3, 9, 8, 0, 3, 2, 2, 5, 6, 9, 9, 4, 1, 5, 4, 2, 3, 6,
       4, 8, 5, 9, 5, 7, 8, 9, 4, 8, 1, 5, 4, 4, 9, 6, 1, 8, 6, 0, 4, 5,
       2, 7, 4, 6, 4, 5, 6, 0, 3, 2, 3, 6, 7, 1, 9, 1, 4, 7, 6, 5, 8, 5,
       5, 1, 0, 1, 8, 8, 9, 5, 7, 6, 2, 2, 2, 3, 4, 8, 8, 3, 6, 0, 9, 7,
       7, 0, 1, 0, 4, 5, 1, 5, 3, 6, 0, 4, 1, 0, 0, 3, 6, 5, 9, 7, 3, 5,
       5, 9, 9, 8, 5, 3, 3, 2, 0, 5, 8, 3, 4, 0, 2,

In [5]:
y_test

array([6, 9, 3, 7, 2, 1, 5, 2, 5, 2, 1, 9, 4, 0, 4, 2, 3, 7, 8, 8, 4, 3,
       9, 7, 5, 6, 3, 5, 6, 3, 4, 9, 1, 4, 4, 6, 9, 4, 7, 6, 6, 9, 1, 3,
       6, 1, 3, 0, 6, 5, 5, 1, 9, 5, 6, 0, 9, 0, 0, 1, 0, 4, 5, 2, 4, 5,
       7, 0, 7, 5, 9, 5, 5, 4, 7, 0, 4, 5, 5, 9, 9, 0, 2, 3, 8, 0, 6, 4,
       4, 9, 1, 2, 8, 3, 5, 2, 9, 0, 4, 4, 4, 3, 5, 3, 1, 3, 5, 9, 4, 2,
       7, 7, 4, 4, 1, 9, 2, 7, 8, 7, 2, 6, 9, 4, 0, 7, 2, 7, 5, 8, 7, 5,
       7, 7, 0, 6, 6, 4, 2, 8, 0, 9, 4, 6, 9, 9, 6, 9, 0, 3, 5, 6, 6, 0,
       6, 4, 3, 9, 3, 9, 7, 2, 9, 0, 4, 5, 3, 6, 5, 9, 9, 8, 4, 2, 1, 3,
       7, 7, 2, 2, 3, 9, 8, 0, 3, 2, 2, 5, 6, 9, 9, 4, 1, 5, 4, 2, 3, 6,
       4, 8, 5, 9, 5, 7, 8, 9, 4, 8, 1, 5, 4, 4, 9, 6, 1, 8, 6, 0, 4, 5,
       2, 7, 4, 6, 4, 5, 6, 0, 3, 2, 3, 6, 7, 1, 5, 1, 4, 7, 6, 8, 8, 5,
       5, 1, 6, 2, 8, 8, 9, 9, 7, 6, 2, 2, 2, 3, 4, 8, 8, 3, 6, 0, 9, 7,
       7, 0, 1, 0, 4, 5, 1, 5, 3, 6, 0, 4, 1, 0, 0, 3, 6, 5, 9, 7, 3, 5,
       5, 9, 9, 8, 5, 3, 3, 2, 0, 5, 8, 3, 4, 0, 2,

In [6]:
accuracy_score(y_test, y_pred)

0.9611111111111111

In [7]:
confusion_matrix(y_test, y_pred).ravel()

array([33,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 27,  1,  0,  0,  0,  0,
        0,  0,  0,  0,  1, 31,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0, 33,
        0,  1,  0,  0,  0,  0,  0,  0,  0,  0, 46,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0, 44,  1,  0,  0,  2,  1,  0,  0,  0,  0,  0, 34,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0, 33,  0,  1,  0,  0,  0,  0,  1,
        1,  0,  0, 27,  1,  0,  0,  0,  0,  0,  1,  0,  0,  1, 38],
      dtype=int64)

### Best MLP hyper parameter tuning using GridSearchCV

In [8]:
# too much time to try all hyper parameters
parameters = {'hidden_layer_sizes':[(10,), (10,10,10,),(50,), (15,15,15,15,15,) ,(100,), (25,25,25,25,25,),(200,), (50,50,50,50,50,), (300,)], 
              'activation' : ('identity', 'logistic', 'tanh','relu'),
              'solver' :('lbfgs', 'sgd','adam') ,
              # 'alpha' : [0.1,0.01,0.001,0.0001,0.00001,0.000001],
              'learning_rate':('constant', 'invscaling','adaptive'),
              'learning_rate_init' : [0.1,0.01,0.001,0.0001],
              # 'power_t' : [0.5,0.1,0.01,0.001,0.0001],
              'early_stopping' : [True],
              # 'validation_fraction' : [0.1,0.2,0.3,0.4] 
             }

In [9]:
clf = GridSearchCV(MLPClassifier(), parameters,n_jobs=-1)
clf.fit(mnist.data, mnist.target)
clf.cv_results_.keys()

76 fits failed out of a total of 6480.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
76 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Asus\anaconda3\envs\mlenv\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Asus\anaconda3\envs\mlenv\lib\site-packages\sklearn\base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\Asus\anaconda3\envs\mlenv\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py", line 753, in fit
    return self._fit(X, y, incremental=False)
  File "C:\Users\Asus\anaconda3\envs\mlenv\lib\site-packages\sklearn\neural_ne

dict_keys(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_activation', 'param_early_stopping', 'param_hidden_layer_sizes', 'param_learning_rate', 'param_learning_rate_init', 'param_solver', 'params', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'split3_test_score', 'split4_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score'])

In [10]:
clf.best_estimator_

In [11]:
avg = []
for i in range(10):
    best_mlp = MLPClassifier(activation='tanh', early_stopping=True, hidden_layer_sizes=(300,),learning_rate='adaptive', learning_rate_init=0.01, solver='sgd')
    best_mlp.fit(X_train, y_train)
    y_pred_best = best_mlp.predict(X_test)
    acc= accuracy_score(y_test, y_pred_best)
    print(acc)
    avg.append(acc)
sum(avg)/10

0.9777777777777777
0.9722222222222222
0.9833333333333333
0.9777777777777777
0.9694444444444444
0.9638888888888889
0.9722222222222222
0.9722222222222222
0.975
0.9666666666666667


0.9730555555555556

In [12]:
avg = []
for i in range(10):
    best_mlp = MLPClassifier(early_stopping=True)
    best_mlp.fit(X_train, y_train)
    y_pred_best = best_mlp.predict(X_test)
    acc= accuracy_score(y_test, y_pred_best)
    print(acc)
    avg.append(acc)
sum(avg)/10

0.9472222222222222
0.9583333333333334
0.9722222222222222
0.9611111111111111
0.9527777777777777
0.9666666666666667
0.9777777777777777
0.9611111111111111
0.9527777777777777
0.9555555555555556


0.9605555555555556