# KNN

#### The k-nearest neighbors (KNN) algorithm is a non-parametric, supervised learning classifier, which uses proximity to make classifications or predictions about the grouping of an individual data point.

## Import Libraries

In [2]:
import optuna
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

## Load the dataset

In [3]:
mnist_train = pd.read_csv("./MNIST_training.csv")
mnist_test = pd.read_csv("./MNIST_test.csv")
mnist_train

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
944,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
945,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
946,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
947,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Split X, y

In [4]:
x_train = mnist_train.drop(columns='label')
y_train = mnist_train['label']
x_test = mnist_test.drop(columns='label')
y_test = mnist_test['label']

## Check the train and test are divided well

In [5]:
print('train data lebel bincount:', np.bincount(y_train))
print('test data label bincount:', np.bincount(y_test))

train data lebel bincount: [95 95 95 95 95 95 94 95 95 95]
test data label bincount: [5 5 5 5 5 5 5 5 5 5]


## Check KNeighborsClassifier parameters

In [6]:
KNeighborsClassifier().get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

## Parameter optimization using optuna

In [7]:
def objective(trial):
    n_neighbors = trial.suggest_int('n_neighbors', 2, 15)
    metric = trial.suggest_categorical('metric', ['euclidean', 'manhattan'])

    model = KNeighborsClassifier(n_neighbors=n_neighbors, metric=metric)