In [1]:
from matplotlib import pyplot as plt
from sklearn import datasets, metrics
from sklearn.neighbors import KNeighborsClassifier
from keras.datasets import mnist
import pickle
import time
import pandas as pd

Using TensorFlow backend.


In [2]:
# load data
(img_train, val_train),(img_test, val_test) = mnist.load_data()
print(img_train.shape)
print(img_test.shape)

(60000, 28, 28)
(10000, 28, 28)


In [3]:
# convert images (matrixs) to vectors
n = len(img_train)
data = img_train.reshape(n, -1)
print(data.shape)

(60000, 784)


In [4]:
# declare and asign values for variables
max_k = 29
dist_algo = 'manhattan'
index_name = []
train_time = []
predict_time = []
accuracy = []
headers = ['train-time','predict-time','accuracy']

In [5]:
# training
for k in range(1, max_k+1, 2):
    print('Training ' + str(k) + '-NN with ' + dist_algo + ' distance algorithm')
    index_name.append(str(k) + '-NN')
    
    # create KNN classifier
    start = time.time()
    knn = KNeighborsClassifier(algorithm='kd_tree', metric = dist_algo, p=2, n_neighbors=k)
    knn.fit(data, val_train)
    end = time.time()
    print("Training time: %s seconds" % str(end - start))
    train_time.append(round(end-start,2))

    # save model
    filename = str(k) + '-NN ' + dist_algo + '.sav'
    pickle.dump(knn, open(filename, 'wb'))
    print("%s saved\n" %filename)
print('Train complete!')

Training 1-NN with manhattan distance algorithm
Training time: 30.00669765472412 seconds
1-NN manhattan.sav saved

Training 3-NN with manhattan distance algorithm
Training time: 30.138401985168457 seconds
3-NN manhattan.sav saved

Training 5-NN with manhattan distance algorithm
Training time: 29.827011108398438 seconds
5-NN manhattan.sav saved

Training 7-NN with manhattan distance algorithm
Training time: 29.395301342010498 seconds
7-NN manhattan.sav saved

Training 9-NN with manhattan distance algorithm
Training time: 29.58243680000305 seconds
9-NN manhattan.sav saved

Training 11-NN with manhattan distance algorithm
Training time: 30.288622617721558 seconds
11-NN manhattan.sav saved

Training 13-NN with manhattan distance algorithm
Training time: 30.184314250946045 seconds
13-NN manhattan.sav saved

Training 15-NN with manhattan distance algorithm
Training time: 30.196791410446167 seconds
15-NN manhattan.sav saved

Training 17-NN with manhattan distance algorithm
Training time: 29.9

In [6]:
# convert test images (matrixs) to vectors
start = time.time()
n = len(img_test)
test_data = img_test.reshape(n, -1)

In [7]:
# predict data
for k in range(1, max_k+1, 2):
    filename = str(k) + '-NN ' + dist_algo + '.sav'
    loaded_knn = pickle.load(open(filename, 'rb'))
    
    print('Predicting ' + str(k) + '-NN')
    predicted = loaded_knn.predict(test_data)
    print(len(predicted))
    end = time.time()
    print("Predict time: %s seconds" % str(end - start))
    predict_time.append(round(end-start,2))
    
    # calculate accuracy average
    num_correct=0
    for i in range(0,len(val_test)):
        if val_test[i] == predicted[i]:
            num_correct +=1
    accuracy.append(float(num_correct / float(len(val_test))))
print('Predict complete')

Predicting 1-NN
10000
Predict time: 535.0440394878387 seconds
Predicting 3-NN
10000
Predict time: 1064.9019770622253 seconds
Predicting 5-NN
10000
Predict time: 1593.1719906330109 seconds
Predicting 7-NN
10000
Predict time: 2121.3077862262726 seconds
Predicting 9-NN
10000
Predict time: 2645.4096236228943 seconds
Predicting 11-NN
10000
Predict time: 3177.589206933975 seconds
Predicting 13-NN
10000
Predict time: 3699.310397386551 seconds
Predicting 15-NN
10000
Predict time: 4229.5382244586945 seconds
Predicting 17-NN
10000
Predict time: 4755.382059812546 seconds
Predicting 19-NN
10000
Predict time: 5288.849094867706 seconds
Predicting 21-NN
10000
Predict time: 5813.115791082382 seconds
Predicting 23-NN
10000
Predict time: 6341.821734905243 seconds
Predicting 25-NN
10000
Predict time: 6867.501853227615 seconds
Predicting 27-NN
10000
Predict time: 7397.885201454163 seconds
Predicting 29-NN
10000
Predict time: 7925.774642467499 seconds
Predict complete


In [8]:
# save result
list_of_result = list(zip(train_time, predict_time, accuracy))
df = pd.DataFrame(list_of_result, columns=headers, index=index_name)
print(df)
df.to_csv('result.csv', sep=',')

       train-time  predict-time  accuracy
1-NN        30.01        535.04    0.9631
3-NN        30.14       1064.90    0.9633
5-NN        29.83       1593.17    0.9618
7-NN        29.40       2121.31    0.9615
9-NN        29.58       2645.41    0.9597
11-NN       30.29       3177.59    0.9585
13-NN       30.18       3699.31    0.9581
15-NN       30.20       4229.54    0.9571
17-NN       29.94       4755.38    0.9571
19-NN       29.78       5288.85    0.9561
21-NN       29.26       5813.12    0.9544
23-NN       29.30       6341.82    0.9540
25-NN       28.81       6867.50    0.9532
27-NN       29.18       7397.89    0.9521
29-NN       29.26       7925.77    0.9521
