This notebook trains a MLPClassifier on the data (numpy arrays) created by the notebook - *process_yale_images.ipynb*.

In [1]:
import os
import numpy as np
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn import metrics

In [2]:
# Set paths for the local .npy files
data_path = "D:/biometric/yaleExtB_data.npy"
target_path = "D:/biometric/yaleExtB_target.npy"

In [3]:
# Load the data and target arrays
print("Loading data...")
data = np.load(data_path)
target = np.load(target_path)

Loading data...


In [4]:
# Verify data shapes
print(f"Data shape: {data.shape}")
print(f"Target shape: {target.shape}")
print(f"Number of unique classes: {len(np.unique(target))}")

Data shape: (1500, 5236)
Target shape: (1500,)
Number of unique classes: 30


In [5]:
# Split into features (X) and labels (y)
X, y = data, target


In [6]:
# Split into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
# PCA to reduce dimensionality
nof_prin_components = 150  # Parameter for number of principal components
print("Performing PCA...")
pca = PCA(n_components=nof_prin_components, whiten=True).fit(X_train)


Performing PCA...


In [8]:
# Apply PCA transformation
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)


In [9]:
# Train a neural network classifier
nohn = 300  # Number of hidden neurons
print("Training the classifier...")
clf = MLPClassifier(hidden_layer_sizes=(nohn,), solver='adam', activation='relu', 
                     batch_size=256, verbose=True, early_stopping=True, random_state=42)
clf.fit(X_train_pca, y_train)

Training the classifier...
Iteration 1, loss = 3.51035437
Validation score: 0.123810
Iteration 2, loss = 2.95739466
Validation score: 0.323810
Iteration 3, loss = 2.48286100
Validation score: 0.523810
Iteration 4, loss = 2.05535251
Validation score: 0.571429
Iteration 5, loss = 1.67331018
Validation score: 0.666667
Iteration 6, loss = 1.33641764
Validation score: 0.742857
Iteration 7, loss = 1.05614480
Validation score: 0.800000
Iteration 8, loss = 0.82619808
Validation score: 0.828571
Iteration 9, loss = 0.64771678
Validation score: 0.847619
Iteration 10, loss = 0.50969909
Validation score: 0.857143
Iteration 11, loss = 0.40704763
Validation score: 0.895238
Iteration 12, loss = 0.32918634
Validation score: 0.895238
Iteration 13, loss = 0.27039368
Validation score: 0.895238
Iteration 14, loss = 0.22611886
Validation score: 0.895238
Iteration 15, loss = 0.19124907
Validation score: 0.895238
Iteration 16, loss = 0.16434875
Validation score: 0.895238
Iteration 17, loss = 0.14296162
Valida

[Documentation of ML sklearn library](https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html#sklearn.neural_network.MLPClassifier)

In [10]:
# Evaluate on the test set
print("Evaluating the classifier...")
y_pred = clf.predict(X_test_pca)
print("Classification Report:")
print(classification_report(y_test, y_pred))

Evaluating the classifier...
Classification Report:
              precision    recall  f1-score   support

         2.0       1.00      1.00      1.00        10
         3.0       0.94      1.00      0.97        15
         4.0       0.87      1.00      0.93        13
         5.0       0.86      1.00      0.92        12
         6.0       1.00      0.94      0.97        16
         7.0       0.89      0.94      0.91        17
         8.0       0.93      0.93      0.93        15
         9.0       0.89      0.89      0.89        19
        11.0       0.88      0.88      0.88        17
        12.0       1.00      0.93      0.96        14
        13.0       1.00      0.91      0.95        11
        15.0       1.00      1.00      1.00        14
        16.0       1.00      0.87      0.93        15
        17.0       1.00      0.79      0.88        14
        18.0       1.00      1.00      1.00        16
        20.0       0.92      1.00      0.96        11
        22.0       1.00      

The validation accuracy seen in the above output is computed by testing on the training set, so they are not actual validation accuracy.

In [11]:
# Cross-validation
print("Performing cross-validation...")
kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(clf, X_train_pca, y_train, cv=kf, scoring='accuracy')
print(f"Cross-validation scores: {scores}")
print(f"Mean cross-validation accuracy: {np.mean(scores)}")

Performing cross-validation...
Iteration 1, loss = 3.53710040
Validation score: 0.107143
Iteration 2, loss = 3.09272480
Validation score: 0.202381
Iteration 3, loss = 2.70408700
Validation score: 0.333333
Iteration 4, loss = 2.34449653
Validation score: 0.464286
Iteration 5, loss = 2.01141906
Validation score: 0.583333
Iteration 6, loss = 1.70714608
Validation score: 0.761905
Iteration 7, loss = 1.43367091
Validation score: 0.785714
Iteration 8, loss = 1.19241705
Validation score: 0.821429
Iteration 9, loss = 0.98425986
Validation score: 0.845238
Iteration 10, loss = 0.81151959
Validation score: 0.869048
Iteration 11, loss = 0.66859591
Validation score: 0.869048
Iteration 12, loss = 0.55219372
Validation score: 0.880952
Iteration 13, loss = 0.45841666
Validation score: 0.880952
Iteration 14, loss = 0.38489226
Validation score: 0.892857
Iteration 15, loss = 0.32496028
Validation score: 0.892857
Iteration 16, loss = 0.27735501
Validation score: 0.904762
Iteration 17, loss = 0.23842119
Va

In [12]:
print(scores)


[0.92380952 0.93809524 0.92857143 0.92857143 0.85714286]
