In [1]:
# load the 2 npy files created by the process_yale_images.ipynb 
from numpy import load
import numpy as np

# load array
y = load('./yaleExtB_target.npy')
X = load('./yaleExtB_data.npy')

In [2]:
from sklearn.model_selection import train_test_split # loads functions from the ML library sklearn 
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier

In [3]:
# split into a training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

In [4]:
# PCA 
nof_prin_components = 200  # PARAMETER for optimisation in expereiments
pca = PCA(n_components=nof_prin_components, whiten=True).fit(X_train)

# applies PCA to the train and test images to calculate the principal components
X_train_pca = pca.transform(X_train) 
X_test_pca = pca.transform(X_test)

In [4]:
X_train

array([[ 36.,  42.,  52., ..., 128., 117., 115.],
       [ 26.,  28.,  26., ...,   1.,   1.,   1.],
       [162., 171., 175., ...,   8.,  11.,  13.],
       ...,
       [158., 160., 128., ...,   7.,   7.,   7.],
       [ 42.,  44.,  32., ...,  18.,  20.,  20.],
       [103., 103., 105., ...,  32.,  20.,  19.]])

In [5]:
X_train_pca[:5][0]

array([ 0.38201553,  0.52723044, -0.4183909 ,  0.27821595, -1.49481838,
       -0.2725399 , -0.09086642,  0.54044361, -0.40700769,  0.87881918,
       -0.08399127,  1.94185768,  0.84273627, -1.56530585, -1.48482597,
        0.60966217, -0.88148971,  0.99381291,  0.19866044, -0.97484169,
       -0.72403833,  0.08146954, -1.01229255, -0.34948632,  1.05558962,
       -0.86539703,  1.66766705,  1.86029561,  0.02198214,  0.15722326,
        0.13829593,  0.34539639, -0.0105105 , -0.91877239,  0.54167804,
        1.01101864, -0.22923436,  0.15145705, -0.08945736, -0.09765085,
        1.05682228,  0.9339385 , -0.51700293,  0.68179283, -0.31757761,
        1.28712294, -0.46977323, -0.95797275,  0.39567176, -0.56380596,
       -1.3390784 , -0.76515164,  0.4063293 , -0.2414999 , -0.22349709,
        0.32013716, -0.30503687, -0.14282279, -1.02280546, -0.21727412,
       -0.62197681, -1.00132388,  0.75041771, -1.72639612, -1.24418955,
       -1.27235687, -0.62112123,  1.73093036,  0.22052455,  0.94

In [6]:
X_train_pca.shape

(1005, 200)

[Documentation of ML sklearn library](https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html#sklearn.neural_network.MLPClassifier)

In [9]:
# train a neural network using MLP
nohn = 200 #nof hidden neurons
print("Fitting the classifier to the training set")
clf = MLPClassifier(hidden_layer_sizes=(nohn,), solver='sgd', activation='tanh', batch_size=250, verbose=True, 
                    early_stopping=True)
clf.fit(X_train_pca, y_train)

Fitting the classifier to the training set
Iteration 1, loss = 3.72780358
Validation score: 0.029703
Iteration 2, loss = 3.71117326
Validation score: 0.029703
Iteration 3, loss = 3.68579462
Validation score: 0.029703
Iteration 4, loss = 3.65456628
Validation score: 0.039604
Iteration 5, loss = 3.61960865
Validation score: 0.039604
Iteration 6, loss = 3.58257648
Validation score: 0.049505
Iteration 7, loss = 3.54396312
Validation score: 0.049505
Iteration 8, loss = 3.50426658
Validation score: 0.049505
Iteration 9, loss = 3.46415369
Validation score: 0.059406
Iteration 10, loss = 3.42387452
Validation score: 0.059406
Iteration 11, loss = 3.38348752
Validation score: 0.069307
Iteration 12, loss = 3.34307174
Validation score: 0.069307
Iteration 13, loss = 3.30272193
Validation score: 0.079208
Iteration 14, loss = 3.26247601
Validation score: 0.079208
Iteration 15, loss = 3.22261619
Validation score: 0.079208
Iteration 16, loss = 3.18264275
Validation score: 0.079208
Iteration 17, loss = 3

Iteration 143, loss = 0.58767578
Validation score: 0.821782
Iteration 144, loss = 0.58135688
Validation score: 0.831683
Iteration 145, loss = 0.57504215
Validation score: 0.831683
Iteration 146, loss = 0.56894428
Validation score: 0.831683
Iteration 147, loss = 0.56293701
Validation score: 0.831683
Iteration 148, loss = 0.55693203
Validation score: 0.831683
Iteration 149, loss = 0.55109497
Validation score: 0.831683
Iteration 150, loss = 0.54526835
Validation score: 0.831683
Iteration 151, loss = 0.53969092
Validation score: 0.831683
Iteration 152, loss = 0.53399966
Validation score: 0.831683
Iteration 153, loss = 0.52854160
Validation score: 0.841584
Iteration 154, loss = 0.52311216
Validation score: 0.841584
Iteration 155, loss = 0.51775635
Validation score: 0.841584
Iteration 156, loss = 0.51250442
Validation score: 0.841584
Iteration 157, loss = 0.50735863
Validation score: 0.851485
Iteration 158, loss = 0.50223050
Validation score: 0.851485
Iteration 159, loss = 0.49719768
Validat

In [10]:
y_pred = clf.predict(X_test_pca) # reoognises the test images 
print(classification_report(y_test, y_pred)) # the recognition accuracy

              precision    recall  f1-score   support

         2.0       0.85      0.92      0.88        12
         3.0       0.81      0.81      0.81        16
         4.0       0.75      0.94      0.83        16
         5.0       0.89      0.80      0.84        20
         6.0       0.93      1.00      0.96        13
         7.0       0.86      0.95      0.90        19
         8.0       1.00      0.89      0.94        19
         9.0       0.88      0.88      0.88        16
        11.0       0.95      0.78      0.86        23
        12.0       0.92      0.92      0.92        13
        13.0       0.83      0.65      0.73        23
        15.0       0.92      1.00      0.96        12
        16.0       0.93      1.00      0.96        13
        17.0       0.91      0.71      0.80        14
        18.0       0.82      1.00      0.90        14
        20.0       0.80      1.00      0.89        12
        22.0       0.88      0.74      0.80        19
        23.0       0.95    

In [11]:
import numpy as np
from sklearn.model_selection import cross_val_score

clf_cross_val_result = cross_val_score(clf, X_train_pca, y_train, cv=5)

print("Cross Validation Scores : ", clf_cross_val_result)
print("Average Accuracy : ", np.mean(clf_cross_val_result))

Iteration 1, loss = 3.67746047
Validation score: 0.024691
Iteration 2, loss = 3.66663779
Validation score: 0.024691
Iteration 3, loss = 3.65002764
Validation score: 0.024691
Iteration 4, loss = 3.62933634
Validation score: 0.037037
Iteration 5, loss = 3.60565414
Validation score: 0.037037
Iteration 6, loss = 3.57957676
Validation score: 0.049383
Iteration 7, loss = 3.55211994
Validation score: 0.049383
Iteration 8, loss = 3.52355869
Validation score: 0.049383
Iteration 9, loss = 3.49423121
Validation score: 0.049383
Iteration 10, loss = 3.46436407
Validation score: 0.049383
Iteration 11, loss = 3.43418762
Validation score: 0.049383
Iteration 12, loss = 3.40372101
Validation score: 0.061728
Iteration 13, loss = 3.37325915
Validation score: 0.074074
Iteration 14, loss = 3.34261106
Validation score: 0.074074
Iteration 15, loss = 3.31200927
Validation score: 0.086420
Iteration 16, loss = 3.28142440
Validation score: 0.086420
Iteration 17, loss = 3.25112703
Validation score: 0.086420
Iterat

Iteration 31, loss = 2.85067010
Validation score: 0.234568
Iteration 32, loss = 2.82119335
Validation score: 0.246914
Iteration 33, loss = 2.79199379
Validation score: 0.246914
Iteration 34, loss = 2.76301892
Validation score: 0.246914
Iteration 35, loss = 2.73408270
Validation score: 0.259259
Iteration 36, loss = 2.70528730
Validation score: 0.271605
Iteration 37, loss = 2.67684219
Validation score: 0.283951
Iteration 38, loss = 2.64849634
Validation score: 0.283951
Iteration 39, loss = 2.62034632
Validation score: 0.308642
Iteration 40, loss = 2.59247930
Validation score: 0.308642
Iteration 41, loss = 2.56468200
Validation score: 0.308642
Iteration 42, loss = 2.53720380
Validation score: 0.308642
Iteration 43, loss = 2.50988549
Validation score: 0.308642
Iteration 44, loss = 2.48273603
Validation score: 0.320988
Iteration 45, loss = 2.45590391
Validation score: 0.333333
Iteration 46, loss = 2.42905384
Validation score: 0.333333
Iteration 47, loss = 2.40257182
Validation score: 0.3456

Iteration 25, loss = 2.97120569
Validation score: 0.160494
Iteration 26, loss = 2.94143832
Validation score: 0.172840
Iteration 27, loss = 2.91162706
Validation score: 0.172840
Iteration 28, loss = 2.88205319
Validation score: 0.172840
Iteration 29, loss = 2.85250649
Validation score: 0.185185
Iteration 30, loss = 2.82341361
Validation score: 0.185185
Iteration 31, loss = 2.79418819
Validation score: 0.185185
Iteration 32, loss = 2.76533777
Validation score: 0.185185
Iteration 33, loss = 2.73648571
Validation score: 0.185185
Iteration 34, loss = 2.70795253
Validation score: 0.197531
Iteration 35, loss = 2.67957935
Validation score: 0.209877
Iteration 36, loss = 2.65130666
Validation score: 0.209877
Iteration 37, loss = 2.62318088
Validation score: 0.209877
Iteration 38, loss = 2.59551236
Validation score: 0.222222
Iteration 39, loss = 2.56780275
Validation score: 0.222222
Iteration 40, loss = 2.54028643
Validation score: 0.222222
Iteration 41, loss = 2.51310853
Validation score: 0.2592

Iteration 14, loss = 3.36702636
Validation score: 0.111111
Iteration 15, loss = 3.33552149
Validation score: 0.111111
Iteration 16, loss = 3.30427599
Validation score: 0.135802
Iteration 17, loss = 3.27291884
Validation score: 0.135802
Iteration 18, loss = 3.24184088
Validation score: 0.135802
Iteration 19, loss = 3.21077619
Validation score: 0.135802
Iteration 20, loss = 3.17956613
Validation score: 0.135802
Iteration 21, loss = 3.14872701
Validation score: 0.135802
Iteration 22, loss = 3.11801072
Validation score: 0.135802
Iteration 23, loss = 3.08725677
Validation score: 0.135802
Iteration 24, loss = 3.05699625
Validation score: 0.160494
Iteration 25, loss = 3.02693715
Validation score: 0.197531
Iteration 26, loss = 2.99663148
Validation score: 0.209877
Iteration 27, loss = 2.96659686
Validation score: 0.209877
Iteration 28, loss = 2.93676828
Validation score: 0.209877
Iteration 29, loss = 2.90711016
Validation score: 0.209877
Iteration 30, loss = 2.87756989
Validation score: 0.2345

In [13]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier

# train nerual nertwork using Adaboosting
adbost = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2), n_estimators=200, algorithm='SAMME.R',
                            learning_rate=0.5, random_state=42)

model = adbost.fit(X_train_pca, y_train)

In [14]:
y_pred = model.predict(X_test_pca)

In [15]:
print('Accuracy : ', metrics.accuracy_score(y_test, y_pred))

Accuracy :  0.6262626262626263


In [16]:
import numpy as np
from sklearn.model_selection import cross_val_score

ada_cross_val_result = cross_val_score(adbost, X, y, cv=5)

print("Cross Validation Scores : ", ada_cross_val_result)
print("Average Accuracy : ", np.mean(ada_cross_val_result))

Cross Validation Scores :  [0.58       0.69666667 0.75333333 0.62       0.55      ]
Average Accuracy :  0.64
