In [0]:
# load data
(img_train, val_train),(img_test, val_test) = mnist.load_data()
print(img_train.shape)
print(img_test.shape)

(60000, 28, 28)
(10000, 28, 28)


In [0]:
from matplotlib import pyplot as plt
from sklearn import datasets, metrics
from sklearn.neighbors import KNeighborsClassifier
from keras.datasets import mnist
import pickle
import time
import pandas as pd
from scipy.stats import spearmanr


In [0]:
# convert images (matrixs) to vectors
n = len(img_train)
data = img_train.reshape(n, -1)
print(data.shape)

(60000, 784)


In [0]:
def spearmancorr(x,y):
    rho, pval = spearmanr(x,y, axis=0)
    return rho * (-1)

In [0]:
# declare and asign values for variables
max_k = 29
min_K = 1
dist_algo = 'spearmancorr'
index_name = []
train_time = []
predict_time = []
accuracy = []
headers = ['train-time','predict-time','accuracy']

In [0]:
# training
for k in range(min_K, max_k+1, 2):
    print('Training ' + str(k) + '-NN with ' + dist_algo + ' distance algorithm')
    index_name.append(str(k) + '-NN')
    
    # create KNN classifier
    start = time.time()
    knn = KNeighborsClassifier(algorithm='ball_tree', metric = spearmancorr, p=2, n_neighbors=k)
    knn.fit(data, val_train)
    end = time.time()
    print("Training time: %s seconds" % str(end - start))
    train_time.append(round(end-start,2))

    # save model
    filename = str(k) + '-NN ' + dist_algo + '.sav'
    pickle.dump(knn, open(filename, 'wb'))
    print("%s saved\n" %filename)
print('Train complete!')

Training 1-NN with spearmancorr distance algorithm
Training time: 478.75164890289307 seconds
1-NN spearmancorr.sav saved

Training 3-NN with spearmancorr distance algorithm
Training time: 472.8403661251068 seconds
3-NN spearmancorr.sav saved

Training 5-NN with spearmancorr distance algorithm
Training time: 478.21985936164856 seconds
5-NN spearmancorr.sav saved

Training 7-NN with spearmancorr distance algorithm
Training time: 482.6258509159088 seconds
7-NN spearmancorr.sav saved

Training 9-NN with spearmancorr distance algorithm
Training time: 478.2412164211273 seconds
9-NN spearmancorr.sav saved

Training 11-NN with spearmancorr distance algorithm
Training time: 483.1867084503174 seconds
11-NN spearmancorr.sav saved

Training 13-NN with spearmancorr distance algorithm
Training time: 481.29787945747375 seconds
13-NN spearmancorr.sav saved

Training 15-NN with spearmancorr distance algorithm
Training time: 480.9067232608795 seconds
15-NN spearmancorr.sav saved

Training 17-NN with spe

In [0]:
# convert test images (matrixs) to vectors
start = time.time()
n = len(img_test)
test_data = img_test.reshape(n, -1)

In [0]:
# predict data
for k in range(1, max_k+1, 2):
    filename = str(k) + '-NN ' + dist_algo + '.sav'
    loaded_knn = pickle.load(open(filename, 'rb'))
    
    start = time.time()
    print('Predicting ' + str(k) + '-NN')
    predicted = loaded_knn.predict(test_data)
    print(len(predicted))
    end = time.time()
    print("Predict time: %s seconds" % str(end - start))
    predict_time.append(round(end-start,2))
    
    # calculate accuracy average
    num_correct=0
    for i in range(0,len(val_test)):
        if val_test[i] == predicted[i]:
            num_correct +=1
    accuracy.append(float(num_correct / float(len(val_test))))
print('Predict complete')

Predicting 1-NN
10000
Predict time: 519.1169557571411 seconds
Predicting 3-NN
10000
Predict time: 530.6325991153717 seconds
Predicting 5-NN
10000
Predict time: 520.5927894115448 seconds
Predicting 7-NN
10000
Predict time: 521.6868863105774 seconds
Predicting 9-NN
10000
Predict time: 520.4148144721985 seconds
Predicting 11-NN
10000
Predict time: 523.4284977912903 seconds
Predicting 13-NN
10000
Predict time: 523.0299201011658 seconds
Predicting 15-NN
10000
Predict time: 518.3375358581543 seconds
Predicting 17-NN
10000
Predict time: 523.4118409156799 seconds
Predicting 19-NN
10000
Predict time: 516.5065884590149 seconds
Predicting 21-NN
10000
Predict time: 520.4366612434387 seconds
Predicting 23-NN
10000
Predict time: 527.9417576789856 seconds
Predicting 25-NN
10000
Predict time: 528.9725277423859 seconds
Predicting 27-NN
10000
Predict time: 526.1371343135834 seconds
Predicting 29-NN
10000
Predict time: 524.3957931995392 seconds
Predict complete


In [0]:
# save result
list_of_result = list(zip(train_time, predict_time, accuracy))
df = pd.DataFrame(list_of_result, columns=headers, index=index_name)
print(df)
df.to_csv('result.csv', sep=',')

       train-time  predict-time  accuracy
1-NN       478.75        519.12    0.7119
3-NN       472.84        530.63    0.5931
5-NN       478.22        520.59    0.5432
7-NN       482.63        521.69    0.4940
9-NN       478.24        520.41    0.4684
11-NN      483.19        523.43    0.4425
13-NN      481.30        523.03    0.4103
15-NN      480.91        518.34    0.3829
17-NN      483.26        523.41    0.3612
19-NN      478.11        516.51    0.3371
21-NN      477.02        520.44    0.3139
23-NN      476.16        527.94    0.2944
25-NN      479.53        528.97    0.2730
27-NN      475.32        526.14    0.2519
29-NN      474.32        524.40    0.2314
