#### Configure project

In [None]:
!cd .. && mkdir build
!cd ../build/ && rm -rf *
!rm -f *.so
!cd ../build && cmake -DCMAKE_BUILD_TYPE=Release ..

#### Compile and install

In [None]:
!cd ../build && make install

In [None]:
import kNN
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from timeit import default_timer as timer
from sklearn.metrics import accuracy_score
import numpy as np
np.random.seed(1998)

### Util

In [None]:
def data_labels(dataset):
    data = dataset[dataset.columns[1:]].values
    labels = dataset["label"].values.reshape(-1,1)
    return(data, labels)

def save_res(cols, rows):
    res = pd.DataFrame(rows, columns=cols)
    res.to_csv("res/{}.csv".format(rows[0][0]), index=False, header=True)

# Run kNN for different k values on train dataset and validate
def run_KNN(train, validate, klist):
    train_data, train_labels = data_labels(train)
    val_data, val_labels = data_labels(validate)
    
    # Fit classifier with train data
    clf = kNN.KNNClassifier(0, 10)
    clf.fit(train_data, train_labels)

    results = []
    klist_loop = tqdm(klist)
    for k in klist_loop:
        clf.setneighbors(k)
        start = timer()

        pred_labels = clf.predict(val_data)

        end = timer()

        results.append([k, accuracy_score(val_labels, pred_labels), end - start])

    return results

# Experimentacion

In [None]:
df_train = pd.read_csv("../data/fashion-mnist_train.csv")
df_test = pd.read_csv("../data/fashion-mnist_test.csv")

### k size test

In [None]:
percent = 0.1
train = df_train.sample(int(df_train.shape[0] * percent))
test = df_test.sample(int(df_test.shape[0] * percent))

In [None]:
k_list = np.arange(1, 101, 1)
cols = ["exp", "k", "acc", "time"]
save_res(cols, run_KNN(train, test, k_list))

In [None]:
results = pd.read_csv("res/k_size.csv")
g = sns.lineplot(data=results, x="k", y="acc", linewidth=2, label='accuracy')
plt.show()

g = sns.scatterplot(data=results, x='k', y='time')