In [0]:
from matplotlib import pyplot as plt
from sklearn import datasets, metrics
from sklearn.neighbors import KNeighborsClassifier
from keras.datasets import mnist
import pickle
import time
import pandas as pd
from scipy.stats import spearmanr


Using TensorFlow backend.


In [0]:
# load data
(img_train, val_train),(img_test, val_test) = mnist.load_data()
print(img_train.shape)
print(img_test.shape)

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
(60000, 28, 28)
(10000, 28, 28)


In [0]:
# convert images (matrixs) to vectors
n = len(img_train)
data = img_train.reshape(n, -1)
print(data.shape)

(60000, 784)


In [0]:
def spearmancorr(x,y):
    rho, pval = spearmanr(x,y, axis=0)
    return rho * (-1)

In [0]:
# declare and asign values for variables
max_k = 29
min_k = 1
dist_algo = 'spearmancorr'
index_name = []
train_time = []
predict_time = []
accuracy = []
headers = ['train-time','predict-time','accuracy']

In [7]:
# training
for k in range(min_k, max_k+1, 2):
    print('Training ' + str(k) + '-NN with ' + dist_algo + ' distance algorithm')
    index_name.append(str(k) + '-NN')
    
    # create KNN classifier
    start = time.time()
    knn = KNeighborsClassifier(algorithm='ball_tree', metric = spearmancorr, p=2, n_neighbors=k)
    knn.fit(data, val_train)
    end = time.time()
    print("Training time: %s seconds" % str(end - start))
    train_time.append(round(end-start,2))

    # save model
    filename = str(k) + '-NN ' + dist_algo + '.sav'
    pickle.dump(knn, open(filename, 'wb'))
    print("%s saved\n" %filename)
print('Train complete!')

Training 1-NN with spearmancorr distance algorithm
Training time: 480.13702511787415 seconds
1-NN spearmancorr.sav saved

Training 3-NN with spearmancorr distance algorithm
Training time: 473.5757074356079 seconds
3-NN spearmancorr.sav saved

Training 5-NN with spearmancorr distance algorithm
Training time: 476.1208083629608 seconds
5-NN spearmancorr.sav saved

Training 7-NN with spearmancorr distance algorithm
Training time: 473.68279576301575 seconds
7-NN spearmancorr.sav saved

Training 9-NN with spearmancorr distance algorithm
Training time: 475.47895407676697 seconds
9-NN spearmancorr.sav saved

Training 11-NN with spearmancorr distance algorithm
Training time: 472.47741317749023 seconds
11-NN spearmancorr.sav saved

Training 13-NN with spearmancorr distance algorithm
Training time: 474.6598298549652 seconds
13-NN spearmancorr.sav saved

Training 15-NN with spearmancorr distance algorithm
Training time: 477.94314074516296 seconds
15-NN spearmancorr.sav saved

Training 17-NN with s

In [0]:
# convert test images (matrixs) to vectors
start = time.time()
n = len(img_test)
test_data = img_test.reshape(n, -1)

In [9]:
# predict data
for k in range(1, max_k+1, 2):
    filename = str(k) + '-NN ' + dist_algo + '.sav'
    loaded_knn = pickle.load(open(filename, 'rb'))
    
    start = time.time()
    print('Predicting ' + str(k) + '-NN')
    predicted = loaded_knn.predict(test_data)
    print(len(predicted))
    end = time.time()
    print("Predict time: %s seconds" % str(end - start))
    predict_time.append(round(end-start,2))
    
    # calculate accuracy average
    num_correct=0
    for i in range(0,len(val_test)):
        if val_test[i] == predicted[i]:
            num_correct +=1
    accuracy.append(float(num_correct / float(len(val_test))))
print('Predict complete')

Predicting 1-NN
10000
Predict time: 519.2686026096344 seconds
Predicting 3-NN
10000
Predict time: 516.2768034934998 seconds
Predicting 5-NN
10000
Predict time: 520.6115148067474 seconds
Predicting 7-NN
10000
Predict time: 519.0487415790558 seconds
Predicting 9-NN
10000
Predict time: 516.5125572681427 seconds
Predicting 11-NN
10000
Predict time: 512.1844000816345 seconds
Predicting 13-NN
10000
Predict time: 515.3345911502838 seconds
Predicting 15-NN
10000
Predict time: 517.8593020439148 seconds
Predicting 17-NN
10000
Predict time: 519.0254964828491 seconds
Predicting 19-NN
10000
Predict time: 517.036502122879 seconds
Predicting 21-NN
10000
Predict time: 519.9543220996857 seconds
Predicting 23-NN
10000
Predict time: 520.1397709846497 seconds
Predicting 25-NN
10000
Predict time: 521.4888851642609 seconds
Predicting 27-NN
10000
Predict time: 519.3523495197296 seconds
Predicting 29-NN
10000
Predict time: 526.076936006546 seconds
Predict complete


In [10]:
# save result
list_of_result = list(zip(train_time, predict_time, accuracy))
df = pd.DataFrame(list_of_result, columns=headers, index=index_name)
print(df)
df.to_csv('result2.csv', sep=',')

       train-time  predict-time  accuracy
1-NN       480.14        519.27    0.7119
3-NN       473.58        516.28    0.5931
5-NN       476.12        520.61    0.5432
7-NN       473.68        519.05    0.4940
9-NN       475.48        516.51    0.4684
11-NN      472.48        512.18    0.4425
13-NN      474.66        515.33    0.4103
15-NN      477.94        517.86    0.3829
17-NN      477.15        519.03    0.3612
19-NN      476.22        517.04    0.3371
21-NN      476.94        519.95    0.3139
23-NN      474.46        520.14    0.2944
25-NN      473.96        521.49    0.2730
27-NN      476.70        519.35    0.2519
29-NN      472.63        526.08    0.2314
