In [10]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold, train_test_split
from eigenface_project import eigenface_project
# numbers of pixels of x and y axis
xPixel = 48  
yPixel = 48

In [5]:
# Load data
df = pd.read_csv("../data_preprocessing/train_data/face_data_train.csv", header=0) # specify the first row as header

In [6]:
# set up X, y
Class = df['Class'].values
images = df.iloc[:,:-1].values

In [7]:
# the projection of the image data on the eigen vectors
images_project=eigenface_project(images)

In [12]:
# Perform StratifiedKFold cross-validation on knn

# create pipeline object
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier(n_neighbors=5))
])

# Define the parameter grid for n_neighbors
param_grid = {
    'knn__n_neighbors': range(1, 31)  # Test n_neighbors from 1 to 30
}

# stratified CV object
skcv = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)

# Initialize GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=skcv, scoring='accuracy')

# Fit the grid search
grid_search.fit(images, Class)

# Print the results
# Print the best n_neighbors and the corresponding score
print("Best n_neighbors:", grid_search.best_params_['knn__n_neighbors'])
print("Best accuracy:", grid_search.best_score_)


Best n_neighbors: 1
Best accuracy: 0.37914979185994085


In [14]:
# Print the results for each n_neighbors
for i in range(1, 31):
    print(f"n_neighbors = {i}:")
    print(f"  Mean cross-validation score: {grid_search.cv_results_[f'mean_test_score'][i-1]:.4f}")
    # print(f"  Standard deviation: {grid_search.cv_results_[f'std_test_score'][i-1]:.4f}")

n_neighbors = 1:
  Mean cross-validation score: 0.3791
n_neighbors = 2:
  Mean cross-validation score: 0.3406
n_neighbors = 3:
  Mean cross-validation score: 0.3297
n_neighbors = 4:
  Mean cross-validation score: 0.3325
n_neighbors = 5:
  Mean cross-validation score: 0.3376
n_neighbors = 6:
  Mean cross-validation score: 0.3322
n_neighbors = 7:
  Mean cross-validation score: 0.3316
n_neighbors = 8:
  Mean cross-validation score: 0.3307
n_neighbors = 9:
  Mean cross-validation score: 0.3342
n_neighbors = 10:
  Mean cross-validation score: 0.3336
n_neighbors = 11:
  Mean cross-validation score: 0.3339
n_neighbors = 12:
  Mean cross-validation score: 0.3325
n_neighbors = 13:
  Mean cross-validation score: 0.3281
n_neighbors = 14:
  Mean cross-validation score: 0.3283
n_neighbors = 15:
  Mean cross-validation score: 0.3288
n_neighbors = 16:
  Mean cross-validation score: 0.3282
n_neighbors = 17:
  Mean cross-validation score: 0.3266
n_neighbors = 18:
  Mean cross-validation score: 0.3270
n