### Digit Recognition ANN Model

### ANN Model for 40x40 pixel dataset

In [3]:
from sklearn.model_selection import GridSearchCV #To find Best number of nodes
from sklearn.neural_network import MLPClassifier #MLP Classifier
from sklearn.metrics import confusion_matrix # To generate confusion matrix
import numpy as np #To convert into numpy arrays
import pandas as pd #To import datasets
import os #To change root directory
from sklearn.metrics import accuracy_score,classification_report #To calculate accuracy, precision,recall

In [4]:
#Importing Dataset
os.chdir("C:\\Users\\Prasanna Kumar\\Desktop\\Projects\\handwritten digit recognition\\dataset")
label_train = pd.read_csv('train_numbers.csv')
y_train = label_train['Label']
label_test = pd.read_csv('test_numbers.csv')
y_test = label_test['Label']

In [5]:
X_train = label_train.iloc[:,1:1601]
X_test = label_test.iloc[:,1:1601]

In [4]:
#Building an MLP Classifier
ann = MLPClassifier(hidden_layer_sizes=(80,), # one hidden layer with 80 nodes
                    activation='logistic', # logistic/ sigmoid function
                    solver='adam', # stochastic gradient descent
                    alpha=1e-5, # regularization
                    random_state=1,
                    max_iter=2000,
                    learning_rate = 'adaptive',
                    learning_rate_init = 0.002)
ann.fit(X_train, y_train)

MLPClassifier(activation='logistic', alpha=1e-05, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(80,), learning_rate='adaptive',
       learning_rate_init=0.002, max_iter=2000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)

In [5]:
y_predict = ann.predict(X_test)

In [6]:
score = accuracy_score(y_test, y_predict)
print("Accuracy score: %0.2f" % score)

Accuracy score: 0.88


In [7]:
ConfusionMatrix(y_test, y_predict)


Predicted  Eight  Five  Four  Nine  One  Seven  Six  Three  Two  Zero  __all__
Actual                                                                        
Eight        399    31     9     9   10      1    2     41    5     6      513
Five          16   418     3     6   11      3    5     29    4    18      513
Four          12     5   429    31    4      1   10      0   10    11      513
Nine           6     5    17   460    1     18    0      4    0     2      513
One            9     6     1     8  469      5    3      8    1     3      513
Seven          5     2     3    22    4    474    0      0    1     2      513
Six            6     3     6     0    2      0  483      2    5     6      513
Three          7    25     3     3    7     10    0    448    5     5      513
Two           13     7     6     1    1      0    7     17  456     5      513
Zero           6    10     5     0    3      4    1      2    2   480      513
__all__      479   512   482   540  512    516  511 

In [54]:
report = classification_report(y_test,y_predict)
print(report)

             precision    recall  f1-score   support

      Eight       0.88      0.77      0.82       513
       Five       0.84      0.83      0.83       513
       Four       0.87      0.86      0.87       513
       Nine       0.88      0.87      0.87       513
        One       0.90      0.93      0.92       513
      Seven       0.86      0.93      0.90       513
        Six       0.94      0.93      0.93       513
      Three       0.82      0.85      0.83       513
        Two       0.87      0.89      0.88       513
       Zero       0.93      0.94      0.93       513

avg / total       0.88      0.88      0.88      5130



#### Calculating the optimum Number of nodes

In [8]:
# create a new ann classifier
ann = MLPClassifier(activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)

# set neurons of hidden layer to numbers between 50 and 200
params = { 'hidden_layer_sizes' : [[i] for i in range(50, 200)] }

In [9]:
# setup grid search and cross validation
CV_ann = GridSearchCV(estimator=ann, param_grid=params, cv=10, scoring='accuracy')

In [10]:
CV_ann.fit(X_train, y_train);


In [11]:
#Best number of neurons
best_neurons = CV_ann.best_params_['hidden_layer_sizes'][0]
print(best_neurons)

184


In [12]:
ann = MLPClassifier(hidden_layer_sizes=(184,),
                    activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)
ann.fit(X_train, y_train)
y_predict = ann.predict(X_test)

In [13]:
score = accuracy_score(y_test, y_predict)
print("Accuracy score: %0.2f" % score)

Accuracy score: 0.91


In [14]:
ConfusionMatrix(y_test, y_predict)

Predicted  Eight  Five  Four  Nine  One  Seven  Six  Three  Two  Zero  __all__
Actual                                                                        
Eight        427     9    13     6   12      2    3     30    5     6      513
Five           1   451     6     6    7      1    4     19    4    14      513
Four           5     7   437    39    1      0    7      2   10     5      513
Nine           5     3     8   473    1     17    0      4    0     2      513
One            9     4     0     4  490      0    1      2    1     2      513
Seven          1     1     3    11    5    483    0      4    4     1      513
Six            4     4     6     0    3      0  486      0    4     6      513
Three          6    13     1     8    7      5    0    462    7     4      513
Two           12     3     3     1    1      0    7     14  468     4      513
Zero           3     3     2     1    2      4    0      4    2   492      513
__all__      473   498   479   549  529    512  508 

In [53]:
report = classification_report(y_test,y_predict)
print(report)

             precision    recall  f1-score   support

      Eight       0.88      0.77      0.82       513
       Five       0.84      0.83      0.83       513
       Four       0.87      0.86      0.87       513
       Nine       0.88      0.87      0.87       513
        One       0.90      0.93      0.92       513
      Seven       0.86      0.93      0.90       513
        Six       0.94      0.93      0.93       513
      Three       0.82      0.85      0.83       513
        Two       0.87      0.89      0.88       513
       Zero       0.93      0.94      0.93       513

avg / total       0.88      0.88      0.88      5130



### ANN Model After PCA

In [15]:
import numpy as np
from sklearn.decomposition import PCA #to generate pca

train = np.array(X_train)
test = np.array(X_test)

#No.of components
n_components = 1600
pca = PCA(n_components=n_components,svd_solver='randomized').fit(train)
s = pca.explained_variance_ratio_
sum=0.0
comp=0
#No.of components required to explain 99% variance
for _ in s:
    sum += _
    comp += 1
    if(sum>=0.99):
        break
n_components = comp


#fitting pca
pca = PCA(n_components=n_components,svd_solver='randomized').fit(train)

#Transforming data
xtrain = pca.transform(train)
xtest = pca.transform(test)

In [16]:
ann = MLPClassifier(hidden_layer_sizes=(184,),
                    activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate = 'adaptive',
                    learning_rate_init = 0.002)
ann.fit(xtrain, y_train)
y_predict = ann.predict(xtest)
score = accuracy_score(y_test, y_predict)
print("Accuracy score: %0.2f" % score)
ConfusionMatrix(y_test, y_predict)

Accuracy score: 0.88


Predicted  Eight  Five  Four  Nine  One  Seven  Six  Three  Two  Zero  __all__
Actual                                                                        
Eight        394    34    19    10   14      7    2     20    6     7      513
Five           5   412    12     3   16      6   12     34    5     8      513
Four           9     5   444    18    1      2   11      2   14     7      513
Nine           6     3    16   443    4     25    0     11    4     1      513
One            2     3     3     4  492      2    0      3    3     1      513
Seven          2     3     6     9    8    481    0      1    3     0      513
Six            5     6     9     0    5      0  478      0    5     5      513
Three          9    17     7     3    9     14    0    444    7     3      513
Two           11     5    13     1    0      2    4     15  456     6      513
Zero           3     5     4     0    0      7    5      2    5   482      513
__all__      446   493   533   491  549    546  512 

In [None]:
report = classification_report(y_test,y_predict)
print(report)

In [17]:
# create a new ann classifier
ann = MLPClassifier(activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)

# set neurons of hidden layer to numbers between 50 and 200
params = { 'hidden_layer_sizes' : [[i] for i in range(50, 200)] }
# setup grid search and cross validation
CV_ann = GridSearchCV(estimator=ann, param_grid=params, cv=10, scoring='accuracy')
CV_ann.fit(xtrain, y_train);
best_neurons = CV_ann.best_params_['hidden_layer_sizes'][0]
print(best_neurons)

156


In [18]:
ann = MLPClassifier(hidden_layer_sizes=(best_neurons,),
                    activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate = 'adaptive',
                    learning_rate_init = 0.002)
ann.fit(xtrain, y_train)
y_predict = ann.predict(xtest)
score = accuracy_score(y_test, y_predict)
print("Accuracy score: %0.2f" % score)
ConfusionMatrix(y_test, y_predict)

Accuracy score: 0.89


Predicted  Eight  Five  Four  Nine  One  Seven  Six  Three  Two  Zero  __all__
Actual                                                                        
Eight        409    36    13     8    7      6    3     20    5     6      513
Five           9   419     7     6   12      8    9     32    5     6      513
Four          11     5   431    34    2      2    8      4   13     3      513
Nine           3     5    13   455    2     18    0     11    2     4      513
One           10     3     6     7  481      0    0      5    1     0      513
Seven          1     1     7    14    5    480    0      4    0     1      513
Six            4     5     9     0    7      0  480      0    4     4      513
Three          7    20     6     3    3     11    1    453    8     1      513
Two           17     3     8     1    0      1    6     20  451     6      513
Zero           3     6     3     0    2      5    1      4    1   488      513
__all__      474   503   503   528  521    531  508 

In [38]:
report = classification_report(y_test,y_predict)
print(report)

             precision    recall  f1-score   support

      Eight       0.87      0.79      0.83       513
       Five       0.86      0.81      0.84       513
       Four       0.85      0.85      0.85       513
       Nine       0.87      0.87      0.87       513
        One       0.89      0.95      0.92       513
      Seven       0.89      0.95      0.92       513
        Six       0.94      0.92      0.93       513
      Three       0.84      0.85      0.85       513
        Two       0.88      0.88      0.88       513
       Zero       0.92      0.96      0.94       513

avg / total       0.88      0.88      0.88      5130



### 100x100 Pixel Images

In [39]:
os.chdir("C:/Users/Sanjeev Varma/Desktop/Capstone/Signature Recognition/Data")
label_train = pd.read_csv('train100.csv')
y_train = label_train['Label']
label_test = pd.read_csv('test100.csv')
y_test = label_test['Label']

In [40]:
X_train = label_train.iloc[:,1:10001]
X_test = label_test.iloc[:,1:10001]

In [41]:
ann = MLPClassifier(hidden_layer_sizes=(189,),
                    activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)
ann.fit(X_train, y_train)
y_predict = ann.predict(X_test)
score = accuracy_score(y_test, y_predict)
print("Accuracy score: %0.2f" % score)
ConfusionMatrix(y_test, y_predict)

Accuracy score: 0.84


Predicted  Eight  Five  Four  Nine  One  Seven  Six  Three  Two  Zero  __all__
Actual                                                                        
Eight        398    27    20     8    5      2    7     21    6    19      513
Five          27   379    12     5   12      3   12     37    7    19      513
Four          19     4   396    45    0      1   18      3   10    17      513
Nine          17     1    37   389    1     38    0     24    1     5      513
One           22     2     5     4  443      4    4     20    3     6      513
Seven          4     0     8    14    9    470    0      3    4     1      513
Six            6     4     9     0    4      0  472      1   10     7      513
Three         15    23     3     4    2      6    1    438   13     8      513
Two           16     8     8     2    1      2   14     13  443     6      513
Zero           7     5    10     1    2      5    7      5    1   470      513
__all__      531   453   508   472  479    531  535 

In [42]:
report = classification_report(y_test,y_predict)
print(report)

             precision    recall  f1-score   support

      Eight       0.75      0.78      0.76       513
       Five       0.84      0.74      0.78       513
       Four       0.78      0.77      0.78       513
       Nine       0.82      0.76      0.79       513
        One       0.92      0.86      0.89       513
      Seven       0.89      0.92      0.90       513
        Six       0.88      0.92      0.90       513
      Three       0.78      0.85      0.81       513
        Two       0.89      0.86      0.88       513
       Zero       0.84      0.92      0.88       513

avg / total       0.84      0.84      0.84      5130



In [43]:
# create a new ann classifier
ann = MLPClassifier(activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)

# set neurons of hidden layer to numbers between 50 and 200
params = { 'hidden_layer_sizes' : [[i] for i in range(50, 200)] }
# setup grid search and cross validation
CV_ann = GridSearchCV(estimator=ann, param_grid=params, cv=10, scoring='accuracy')
CV_ann.fit(xtrain, y_train);
best_neurons = CV_ann.best_params_['hidden_layer_sizes'][0]
print(best_neurons)

197


In [23]:
ann = MLPClassifier(hidden_layer_sizes=(best_neurons,),
                    activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)
ann.fit(X_train, y_train)
y_predict = ann.predict(X_test)
score = accuracy_score(y_test, y_predict)
print("Accuracy score: %0.2f" % score)
ConfusionMatrix(y_test, y_predict)

Accuracy score: 0.80


Predicted  Eight  Five  Four  Nine  One  Seven  Six  Three  Two  Zero  __all__
Actual                                                                        
Eight        306    38    12    23   25      6    5     46   34    18      513
Five          19   356    16    10   10      1   17     59   10    15      513
Four          10     3   379    74    1      1    8      3   21    13      513
Nine           9     6    35   406    0     42    0     12    3     0      513
One           28     4    10     9  430     14    3      5    3     7      513
Seven          4     0     9    38    4    450    0      1    5     2      513
Six            5     2     6     0    7      0  464      1   24     4      513
Three          3    15     9    11   12      7    0    430   21     5      513
Two            9     6     7     0    5      2   15     10  453     6      513
Zero           0     7    11     3    7      4    4      4   24   449      513
__all__      393   437   494   574  501    527  516 

In [44]:
report = classification_report(y_test,y_predict)
print(report)

             precision    recall  f1-score   support

      Eight       0.75      0.78      0.76       513
       Five       0.84      0.74      0.78       513
       Four       0.78      0.77      0.78       513
       Nine       0.82      0.76      0.79       513
        One       0.92      0.86      0.89       513
      Seven       0.89      0.92      0.90       513
        Six       0.88      0.92      0.90       513
      Three       0.78      0.85      0.81       513
        Two       0.89      0.86      0.88       513
       Zero       0.84      0.92      0.88       513

avg / total       0.84      0.84      0.84      5130



### PCA 

In [24]:
import numpy as np
from sklearn.decomposition import PCA
train = np.array(X_train)
n_components = 2000
pca = PCA(n_components=n_components,svd_solver='randomized').fit(train)
s = pca.explained_variance_ratio_
sum=0.0
comp=0

for _ in s:
    sum += _
    comp += 1
    if(sum>=0.99):
        break
train = np.array(X_train)
test = np.array(X_test)

n_components = comp

#fitting pca
pca = PCA(n_components=n_components,svd_solver='randomized').fit(train)

xtrain = pca.transform(train)
xtest = pca.transform(test)

In [25]:
ann = MLPClassifier(hidden_layer_sizes=(189,),
                    activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)
ann.fit(xtrain, y_train)
y_predict = ann.predict(xtest)
score = accuracy_score(y_test, y_predict)
print("Accuracy score: %0.2f" % score)
ConfusionMatrix(y_test, y_predict)

Accuracy score: 0.85


Predicted  Eight  Five  Four  Nine  One  Seven  Six  Three  Two  Zero  __all__
Actual                                                                        
Eight        384    27    24    16   15      4    4     23    8     8      513
Five          13   406    12     6   14      6   11     25    5    15      513
Four          16     8   424    28    2      2    7      2   17     7      513
Nine           9    10    25   419    4     33    0      8    2     3      513
One           10     7     4     2  471      3    2     12    0     2      513
Seven          4     3     2    20    6    464    1      4    7     2      513
Six            5     8    19     0    5      0  462      2    8     4      513
Three         13    23    11     5   10     11    0    427   11     2      513
Two           24     7    14     1    3      6    8     16  426     8      513
Zero           5    11     6     0    3      5    6      3    4   470      513
__all__      483   510   541   497  533    534  501 

In [45]:
report = classification_report(y_test,y_predict)
print(report)

             precision    recall  f1-score   support

      Eight       0.75      0.78      0.76       513
       Five       0.84      0.74      0.78       513
       Four       0.78      0.77      0.78       513
       Nine       0.82      0.76      0.79       513
        One       0.92      0.86      0.89       513
      Seven       0.89      0.92      0.90       513
        Six       0.88      0.92      0.90       513
      Three       0.78      0.85      0.81       513
        Two       0.89      0.86      0.88       513
       Zero       0.84      0.92      0.88       513

avg / total       0.84      0.84      0.84      5130



In [26]:
# create a new ann classifier
ann = MLPClassifier(activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)

# set neurons of hidden layer to numbers between 50 and 200
params = { 'hidden_layer_sizes' : [[i] for i in range(50, 200)] }
# setup grid search and cross validation
CV_ann = GridSearchCV(estimator=ann, param_grid=params, cv=10, scoring='accuracy')
CV_ann.fit(xtrain, y_train);
best_neurons = CV_ann.best_params_['hidden_layer_sizes'][0]
print(best_neurons)

143


In [27]:
ann = MLPClassifier(hidden_layer_sizes=(best_neurons,),
                    activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)
ann.fit(xtrain, y_train)
y_predict = ann.predict(xtest)
score = accuracy_score(y_test, y_predict)
print("Accuracy score: %0.2f" % score)
ConfusionMatrix(y_test, y_predict)

Accuracy score: 0.85


Predicted  Eight  Five  Four  Nine  One  Seven  Six  Three  Two  Zero  __all__
Actual                                                                        
Eight        391    26    22    15   10      8    2     22   12     5      513
Five          12   402     8     8   13      9   15     30    6    10      513
Four           7    12   428    29    3      3   10      3   13     5      513
Nine          14     2    33   420    2     26    0     11    2     3      513
One           11     2     9     8  468      7    1      3    2     2      513
Seven          5     4     2    11    6    476    0      3    3     3      513
Six            3    10    17     0   10      0  456      0    9     8      513
Three         15    22     6     8    7     16    4    418   10     7      513
Two           20     6     9     0    3      5    7     19  439     5      513
Zero           5     7     7     1    3      4    2     10    3   471      513
__all__      483   493   541   500  525    554  497 

In [46]:
report = classification_report(y_test,y_predict)
print(report)

             precision    recall  f1-score   support

      Eight       0.75      0.78      0.76       513
       Five       0.84      0.74      0.78       513
       Four       0.78      0.77      0.78       513
       Nine       0.82      0.76      0.79       513
        One       0.92      0.86      0.89       513
      Seven       0.89      0.92      0.90       513
        Six       0.88      0.92      0.90       513
      Three       0.78      0.85      0.81       513
        Two       0.89      0.86      0.88       513
       Zero       0.84      0.92      0.88       513

avg / total       0.84      0.84      0.84      5130



### Reverse the Integers

In [28]:
os.chdir("C:/Users/Sanjeev Varma/Desktop/Capstone/Signature Recognition/Data")
label_train = pd.read_csv('trainzero.csv')
y_train = label_train['Label']
label_test = pd.read_csv('testzero.csv')
y_test = label_test['Label']

In [29]:
X_train = label_train.iloc[:,1:1601]
X_test = label_test.iloc[:,1:1601]

In [30]:
ann = MLPClassifier(hidden_layer_sizes=(189,),
                    activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)
ann.fit(X_train, y_train)
y_predict = ann.predict(X_test)
score = accuracy_score(y_test, y_predict)
print("Accuracy score: %0.2f" % score)

Accuracy score: 0.10


In [47]:
report = classification_report(y_test,y_predict)
print(report)

             precision    recall  f1-score   support

      Eight       0.75      0.78      0.76       513
       Five       0.84      0.74      0.78       513
       Four       0.78      0.77      0.78       513
       Nine       0.82      0.76      0.79       513
        One       0.92      0.86      0.89       513
      Seven       0.89      0.92      0.90       513
        Six       0.88      0.92      0.90       513
      Three       0.78      0.85      0.81       513
        Two       0.89      0.86      0.88       513
       Zero       0.84      0.92      0.88       513

avg / total       0.84      0.84      0.84      5130



In [31]:
# create a new ann classifier
ann = MLPClassifier(activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)

# set neurons of hidden layer to numbers between 50 and 200
params = { 'hidden_layer_sizes' : [[i] for i in range(50, 200)] }
# setup grid search and cross validation
CV_ann = GridSearchCV(estimator=ann, param_grid=params, cv=10, scoring='accuracy')
CV_ann.fit(X_train, y_train);
best_neurons = CV_ann.best_params_['hidden_layer_sizes'][0]
print(best_neurons)

50


In [32]:
ann = MLPClassifier(hidden_layer_sizes=(best_neurons,),
                    activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)
ann.fit(xtrain, y_train)
y_predict = ann.predict(xtest)
score = accuracy_score(y_test, y_predict)
print("Accuracy score: %0.2f" % score)
ConfusionMatrix(y_test, y_predict)

Accuracy score: 0.84


Predicted  Eight  Five  Four  Nine  One  Seven  Six  Three  Two  Zero  __all__
Actual                                                                        
Eight        380    31    18    17   18      4    2     31    8     4      513
Five          15   380    14    11   17      4    9     42    5    16      513
Four          17     7   396    44    2      3   15      6   10    13      513
Nine          14     4    22   409    4     42    0     10    4     4      513
One           12     6     2     6  457      5    5     13    4     3      513
Seven          5     4     6    21   10    463    0      2    2     0      513
Six            5     4     7     0    7      0  477      1    7     5      513
Three         16    24     4     6   10     10    0    428    6     9      513
Two           19     3    10     1    2      5   11     19  437     6      513
Zero           4     8    10     0    6      5    3      7    4   466      513
__all__      487   471   489   515  533    541  522 

In [48]:
report = classification_report(y_test,y_predict)
print(report)

             precision    recall  f1-score   support

      Eight       0.75      0.78      0.76       513
       Five       0.84      0.74      0.78       513
       Four       0.78      0.77      0.78       513
       Nine       0.82      0.76      0.79       513
        One       0.92      0.86      0.89       513
      Seven       0.89      0.92      0.90       513
        Six       0.88      0.92      0.90       513
      Three       0.78      0.85      0.81       513
        Two       0.89      0.86      0.88       513
       Zero       0.84      0.92      0.88       513

avg / total       0.84      0.84      0.84      5130



### PCA

In [34]:
import numpy as np
from sklearn.decomposition import PCA

train = np.array(X_train)
n_components = 1600
pca = PCA(n_components=n_components,svd_solver='randomized').fit(train)
s = pca.explained_variance_ratio_
sum=0.0
comp=0

for _ in s:
    sum += _
    comp += 1
    if(sum>=0.99):
        break

test = np.array(X_test)

n_components = comp

#fitting pca
pca = PCA(n_components=n_components,svd_solver='randomized').fit(train)

xtrain = pca.transform(train)
xtest = pca.transform(test)

In [35]:
ann = MLPClassifier(hidden_layer_sizes=(189,),
                    activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)
ann.fit(xtrain, y_train)
y_predict = ann.predict(xtest)
score = accuracy_score(y_test, y_predict)
print("Accuracy score: %0.2f" % score)
ConfusionMatrix(y_test, y_predict)

Accuracy score: 0.88


Predicted  Eight  Five  Four  Nine  One  Seven  Six  Three  Two  Zero  __all__
Actual                                                                        
Eight        407    21    18    12   10      3    5     24    7     6      513
Five          10   417    10     5   17      4    9     21    8    12      513
Four           6     6   434    25    5      1   10      4   16     6      513
Nine           8     4    11   444    5     29    0      5    3     4      513
One            3     4     2     6  487      2    1      2    4     2      513
Seven          2     0     5     9    7    486    0      1    1     2      513
Six            3     6    12     1    5      0  474      2    7     3      513
Three         11    18     6     6    8     11    0    436   10     7      513
Two           16     5     8     1    2      5    4     20  450     2      513
Zero           1     4     2     0    3      6    1      2    4   490      513
__all__      467   485   508   509  549    547  504 

In [49]:
report = classification_report(y_test,y_predict)
print(report)

             precision    recall  f1-score   support

      Eight       0.75      0.78      0.76       513
       Five       0.84      0.74      0.78       513
       Four       0.78      0.77      0.78       513
       Nine       0.82      0.76      0.79       513
        One       0.92      0.86      0.89       513
      Seven       0.89      0.92      0.90       513
        Six       0.88      0.92      0.90       513
      Three       0.78      0.85      0.81       513
        Two       0.89      0.86      0.88       513
       Zero       0.84      0.92      0.88       513

avg / total       0.84      0.84      0.84      5130



In [50]:
# create a new ann classifier
ann = MLPClassifier(activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)

# set neurons of hidden layer to numbers between 50 and 200
params = { 'hidden_layer_sizes' : [[i] for i in range(50, 200)] }
# setup grid search and cross validation
CV_ann = GridSearchCV(estimator=ann, param_grid=params, cv=10, scoring='accuracy')
CV_ann.fit(xtrain, y_train);
best_neurons = CV_ann.best_params_['hidden_layer_sizes'][0]
print(best_neurons)

197


In [51]:
ann = MLPClassifier(hidden_layer_sizes=(best_neurons,),
                    activation='logistic',
                    solver='adam',
                    alpha=1e-5,
                    max_iter=2000,
                    random_state=1,
                    learning_rate_init = 0.002)
ann.fit(xtrain, y_train)
y_predict = ann.predict(xtest)
score = accuracy_score(y_test, y_predict)
print("Accuracy score: %0.2f" % score)
ConfusionMatrix(y_test, y_predict)

Accuracy score: 0.88


Predicted  Eight  Five  Four  Nine  One  Seven  Six  Three  Two  Zero  __all__
Actual                                                                        
Eight        393    31    18     6   13      7    2     23   10    10      513
Five           7   424     7     5   12      4   11     28    9     6      513
Four          13     3   443    22    4      0    5      4   17     2      513
Nine           3     2    11   444    0     35    0     11    4     3      513
One            5     6     2     6  479      3    1      5    3     3      513
Seven          3     4     2    13    6    479    0      4    1     1      513
Six            4     7    10     0    6      0  475      1    7     3      513
Three          8    21     7     5    7     18    0    435    8     4      513
Two           11     4     8     1    3      5    7     16  455     3      513
Zero           2     4     0     0    3      6    3      4   11   480      513
__all__      449   506   508   502  533    557  504 

In [52]:
report = classification_report(y_test,y_predict)
print(report)

             precision    recall  f1-score   support

      Eight       0.88      0.77      0.82       513
       Five       0.84      0.83      0.83       513
       Four       0.87      0.86      0.87       513
       Nine       0.88      0.87      0.87       513
        One       0.90      0.93      0.92       513
      Seven       0.86      0.93      0.90       513
        Six       0.94      0.93      0.93       513
      Three       0.82      0.85      0.83       513
        Two       0.87      0.89      0.88       513
       Zero       0.93      0.94      0.93       513

avg / total       0.88      0.88      0.88      5130

