In [1]:
import numpy as np
import scipy.io
from sklearn.preprocessing import StandardScaler

In [2]:
D = scipy.io.loadmat('data/allFaces.mat')
X = D['faces'] # 32256 x 2410 each column represents an image
y = np.ndarray.flatten(D['nfaces'])
m = D['m'].item() # 168
n = D['n'].item() # 192
n_persons = D['person'].item() # 38

# print the data information
print('X.shape =', X.shape)
print('y.shape =', y.shape)
print('Total number of images =', X.shape[1])
print('Total number of persons =', n_persons)
print('Image size =', m, 'x', n)
print('Number of images for each person =',y) 
print('Total number of images =', y.sum())

X.shape = (32256, 2410)
y.shape = (38,)
Total number of images = 2410
Total number of persons = 38
Image size = 168 x 192
Number of images for each person = [64 62 64 64 62 64 64 64 64 64 60 59 60 63 62 63 63 64 64 64 64 64 64 64
 64 64 64 64 64 64 64 64 64 64 64 64 64 64]
Total number of images = 2410


In [8]:
# 根據 y 產生 0~37 的 label
label_list = []
for person_id, count in enumerate(y):  # y 是每人的圖片數
    label_list.extend([person_id] * count)
y_full = np.array(label_list)  # 長度為 2410

unique, counts = np.unique(y_full, return_counts=True)
print("每個人的圖片數量：")
for label, count in zip(unique, counts):
    print(f"Person {label:2d}: {count} images")


每個人的圖片數量：
Person  0: 64 images
Person  1: 62 images
Person  2: 64 images
Person  3: 64 images
Person  4: 62 images
Person  5: 64 images
Person  6: 64 images
Person  7: 64 images
Person  8: 64 images
Person  9: 64 images
Person 10: 60 images
Person 11: 59 images
Person 12: 60 images
Person 13: 63 images
Person 14: 62 images
Person 15: 63 images
Person 16: 63 images
Person 17: 64 images
Person 18: 64 images
Person 19: 64 images
Person 20: 64 images
Person 21: 64 images
Person 22: 64 images
Person 23: 64 images
Person 24: 64 images
Person 25: 64 images
Person 26: 64 images
Person 27: 64 images
Person 28: 64 images
Person 29: 64 images
Person 30: 64 images
Person 31: 64 images
Person 32: 64 images
Person 33: 64 images
Person 34: 64 images
Person 35: 64 images
Person 36: 64 images
Person 37: 64 images


In [10]:
X = X.T # 2410 x 32256 each row represents an image
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [14]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_full, test_size=0.3, stratify=y_full, random_state=42
)

print(f"Training samples: {X_train.shape[0]}")
print(f"Testing samples: {X_test.shape[0]}")
print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")
print("類別數量：", len(np.unique(y_full)))

Training samples: 1687
Testing samples: 723
Training data shape: (1687, 32256)
Testing data shape: (723, 32256)
類別數量： 38


In [16]:
from HW3_common_utils import run_logistic_regression_cv

# 執行 Logistic Regression CV
result = run_logistic_regression_cv(
    X_train, X_test, y_train, y_test,
    solver='lbfgs',
    Cs=np.logspace(-5, 5, 20),
    cv=5,
    tol=1e-6,
    max_iter=int(1e6),
    verbose=0,
    print_report=True
)


Logistic Regression with CV (solver = lbfgs)
Best C = [0.0483293 0.0483293 0.0483293 0.0483293 0.0483293 0.0483293 0.0483293
 0.0483293 0.0483293 0.0483293 0.0483293 0.0483293 0.0483293 0.0483293
 0.0483293 0.0483293 0.0483293 0.0483293 0.0483293 0.0483293 0.0483293
 0.0483293 0.0483293 0.0483293 0.0483293 0.0483293 0.0483293 0.0483293
 0.0483293 0.0483293 0.0483293 0.0483293 0.0483293 0.0483293 0.0483293
 0.0483293 0.0483293 0.0483293]
Training Accuracy: 99.70%
Testing Accuracy : 96.96%
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       0.95      1.00      0.97        19
           2       0.95      1.00      0.97        19
           3       1.00      1.00      1.00        19
           4       1.00      1.00      1.00        19
           5       0.90      1.00      0.95        19
           6       1.00      1.00      1.00        19
           7       0.95      1.00      0.97        19
           8       0