# K-Nearest Neighbors

In [1]:
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

In [2]:
CSV_PATH = '../csv/'

We choose a dataset with picture's size equals to (64, 64)

In [3]:
pictures = pd.read_csv(CSV_PATH + 'balanced_300_64.csv', header=None).to_numpy()
emotions = pd.read_csv(CSV_PATH + 'labels_300.csv', header=None).to_numpy().flatten()

In [4]:
pictures[:4]

array([[ 10.,  21.,  30., ...,  53.,  56.,  67.],
       [ 92.,  93.,  93., ..., 107., 118., 126.],
       [ 74.,  67.,  66., ...,  92.,  81.,  78.],
       [ 28.,  20.,  23., ..., 180., 179., 185.]])

In [5]:
emotions[:4]

array(['neutral', 'neutral', 'neutral', 'neutral'], dtype=object)

### Split the dataset

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    pictures,
    emotions,
    test_size=0.3,
    train_size=0.7,
    random_state=0
)

In [7]:
knn = KNeighborsClassifier()

grid_params = {
    'n_neighbors': [1,3,5,11,21],
    'weights': ['uniform', 'distance'],
    'metric': ['manhattan', 'euclidean']
}

knn_tuned = GridSearchCV(knn, grid_params, cv=5, iid=False)

knn_tuned.fit(X_train, y_train)

print('KNN tuned parameters:', knn_tuned.best_params_)

print('Accuracy on train set:',knn_tuned.score(X_train, y_train))

print('Accuracy on test set:',knn_tuned.score(X_test, y_test))

KNN tuned parameters: {'metric': 'manhattan', 'n_neighbors': 11, 'weights': 'distance'}
Accuracy on train set: 1.0
Accuracy on test set: 0.38832997987927564


### Results

We get really poor results with the K-nearest neighbors model:
*  Train accuracy = 1.0
*  Test accuracy = 0.39

This can be explain by the results got with KMeans algorithm which show that it's not the pixel values that determine the emotion on a picture's face. So we can't use distances between our instances to classify our dataset.