In [0]:
from matplotlib import pyplot as plt
from sklearn import datasets, metrics
from sklearn.neighbors import KNeighborsClassifier
from keras.datasets import mnist
import pickle
import time
import pandas as pd

Using TensorFlow backend.


In [0]:
# load data
(img_train, val_train),(img_test, val_test) = mnist.load_data()
print(img_train.shape)
print(img_test.shape)

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
(60000, 28, 28)
(10000, 28, 28)


In [0]:
# convert images (matrixs) to vectors
n = len(img_train)
data = img_train.reshape(n, -1)
print(data.shape)

(60000, 784)


In [0]:
# declare and asign values for variables
max_k = 29
dist_algo = 'manhattan'
index_name = []
train_time = []
predict_time = []
accuracy = []
headers = ['train-time','predict-time','accuracy']

In [0]:
# training
for k in range(1, max_k+1, 2):
    print('Training ' + str(k) + '-NN with ' + dist_algo + ' distance algorithm')
    index_name.append(str(k) + '-NN')
    
    # create KNN classifier
    start = time.time()
    knn = KNeighborsClassifier(algorithm='auto', metric = dist_algo, p=2, n_neighbors=k)
    knn.fit(data, val_train)
    end = time.time()
    print("Training time: %s seconds" % str(end - start))
    train_time.append(round(end-start,2))

    # save model
    filename = str(k) + '-NN ' + dist_algo + '.sav'
    pickle.dump(knn, open(filename, 'wb'))
    print("%s saved\n" %filename)
print('Train complete!')

Training 1-NN with manhattan distance algorithm
Training time: 33.52864193916321 seconds
1-NN manhattan.sav saved

Training 3-NN with manhattan distance algorithm
Training time: 33.46123695373535 seconds
3-NN manhattan.sav saved

Training 5-NN with manhattan distance algorithm
Training time: 33.707517862319946 seconds
5-NN manhattan.sav saved

Training 7-NN with manhattan distance algorithm
Training time: 33.632203102111816 seconds
7-NN manhattan.sav saved

Training 9-NN with manhattan distance algorithm
Training time: 33.45721650123596 seconds
9-NN manhattan.sav saved

Training 11-NN with manhattan distance algorithm
Training time: 33.27318716049194 seconds
11-NN manhattan.sav saved

Training 13-NN with manhattan distance algorithm
Training time: 33.49655485153198 seconds
13-NN manhattan.sav saved

Training 15-NN with manhattan distance algorithm
Training time: 33.52378010749817 seconds
15-NN manhattan.sav saved

Training 17-NN with manhattan distance algorithm
Training time: 33.07918

In [0]:
# convert test images (matrixs) to vectors
start = time.time()
n = len(img_test)
test_data = img_test.reshape(n, -1)

In [0]:
# predict data
for k in range(1, max_k+1, 2):
    filename = str(k) + '-NN ' + dist_algo + '.sav'
    loaded_knn = pickle.load(open(filename, 'rb'))
    
    start = time.time()
    print('Predicting ' + str(k) + '-NN')
    predicted = loaded_knn.predict(test_data)
    print(len(predicted))
    end = time.time()
    print("Predict time: %s seconds" % str(end - start))
    predict_time.append(round(end-start,2))
    
    # calculate accuracy average
    num_correct=0
    for i in range(0,len(val_test)):
        if val_test[i] == predicted[i]:
            num_correct +=1
    accuracy.append(float(num_correct / float(len(val_test))))
print('Predict complete')

Predicting 1-NN
10000
Predict time: 865.8316278457642 seconds
Predicting 3-NN
10000
Predict time: 855.7593400478363 seconds
Predicting 5-NN
10000
Predict time: 874.2554695606232 seconds
Predicting 7-NN
10000
Predict time: 890.6276416778564 seconds
Predicting 9-NN
10000
Predict time: 895.627721786499 seconds
Predicting 11-NN
10000
Predict time: 890.3176383972168 seconds
Predicting 13-NN
10000
Predict time: 892.0731589794159 seconds
Predicting 15-NN
10000
Predict time: 884.1991374492645 seconds
Predicting 17-NN
10000
Predict time: 890.8542957305908 seconds
Predicting 19-NN
10000
Predict time: 886.4772300720215 seconds
Predicting 21-NN
10000
Predict time: 884.8515822887421 seconds
Predicting 23-NN
10000
Predict time: 871.1994881629944 seconds
Predicting 25-NN
10000
Predict time: 868.4420745372772 seconds
Predicting 27-NN
10000
Predict time: 870.8819432258606 seconds
Predicting 29-NN
10000
Predict time: 880.213972568512 seconds
Predict complete


In [0]:
# save result
list_of_result = list(zip(train_time, predict_time, accuracy))
df = pd.DataFrame(list_of_result, columns=headers, index=index_name)
print(df)
df.to_csv('result.csv', sep=',')

       train-time  predict-time  accuracy
1-NN        33.53        865.83    0.9631
3-NN        33.46        855.76    0.9633
5-NN        33.71        874.26    0.9618
7-NN        33.63        890.63    0.9615
9-NN        33.46        895.63    0.9597
11-NN       33.27        890.32    0.9585
13-NN       33.50        892.07    0.9581
15-NN       33.52        884.20    0.9571
17-NN       33.08        890.85    0.9571
19-NN       33.19        886.48    0.9561
21-NN       33.34        884.85    0.9544
23-NN       33.36        871.20    0.9540
25-NN       33.33        868.44    0.9532
27-NN       33.50        870.88    0.9521
29-NN       33.47        880.21    0.9521
