In [2]:
# install humanfriendly if necessary
!pip install humanfriendly

import numpy as np, humanfriendly as hf, warnings
import time
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split,\
     GridSearchCV, cross_val_score
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import classification_report

def see_time(note):
    end = time.perf_counter()
    elapsed = end - start
    print (note,
           hf.format_timespan(elapsed, detailed=True))

def get_cross(model, data, target, groups=10):
    return cross_val_score(model, data, target, cv=groups)

if __name__ == "__main__":
    br = '\n'
    warnings.filterwarnings("ignore",
                            category=DeprecationWarning)
    X = np.load('data/X_faces.npy')
    y = np.load('data/y_faces.npy')
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=0)
    pca = PCA(n_components=0.95, whiten=True, random_state=1)
    pca.fit(X_train)
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test)
    pca_name = pca.__class__.__name__
    print ('<<' + pca_name + '>>')
    print ('features (before PCA):', X.shape[1])
    print ('features (after PCA):', pca.n_components_, br)
    sgd = SGDClassifier(max_iter=1000, tol=.001, random_state=0)
    sgd.fit(X_train_pca, y_train)
    y_pred = sgd.predict(X_test_pca)
    cr = classification_report(y_test, y_pred)
    print (cr)
    sgd_name = sgd.__class__.__name__
    param_grid = {'alpha': [1e-3, 1e-2, 1e-1, 1e0],
                  'max_iter': [1000],
                  'loss': ['log', 'perceptron'],
                  'penalty': ['l1'], 'tol': [.001]}
    grid = GridSearchCV(sgd, param_grid, cv=5)
    start = time.perf_counter()
    grid.fit(X_train_pca, y_train)
    see_time('training time:')
    print ()
    bp = grid.best_params_
    print ('best parameters:')
    print (bp, br)
    sgd = SGDClassifier(**bp, random_state=1)
    sgd.fit(X_train_pca, y_train)
    y_pred = sgd.predict(X_test_pca)
    cr = classification_report(y_test, y_pred)
    print (cr)
    print ('cross-validation:')
    scores = get_cross(sgd, X_train_pca, y_train)
    print (np.mean(scores))

<<PCA>>
features (before PCA): 1850
features (after PCA): 135 

              precision    recall  f1-score   support

           0       0.89      0.57      0.70        28
           1       0.81      0.79      0.80        63
           2       0.82      0.58      0.68        24
           3       0.74      0.89      0.80       132
           4       0.52      0.55      0.54        20
           5       0.88      0.32      0.47        22
           6       0.68      0.76      0.71        33

    accuracy                           0.75       322
   macro avg       0.76      0.64      0.67       322
weighted avg       0.76      0.75      0.74       322

training time: 8 seconds and 50 milliseconds

best parameters:
{'alpha': 0.001, 'loss': 'log', 'max_iter': 1000, 'penalty': 'l1', 'tol': 0.001} 

              precision    recall  f1-score   support

           0       0.70      0.57      0.63        28
           1       0.79      0.83      0.81        63
           2       0.68      0