In [1]:
from matplotlib import pyplot as plt
from sklearn import datasets, metrics
from sklearn.neighbors import KNeighborsClassifier
from keras.datasets import mnist
import pickle
import time
import pandas as pd

Using TensorFlow backend.


In [2]:
# load data
(img_train, val_train),(img_test, val_test) = mnist.load_data()
print(img_train.shape)
print(img_test.shape)

(60000, 28, 28)
(10000, 28, 28)


In [3]:
# convert images (matrixs) to vectors
n = len(img_train)
data = img_train.reshape(n, -1)
print(data.shape)

(60000, 784)


In [4]:
# declare and asign values for variables
max_k = 29
dist_algo = 'minkowski'
index_name = []
train_time = []
predict_time = []
accuracy = []
headers = ['train-time','predict-time','accuracy']

In [5]:
# training
for k in range(1, max_k+1, 2):
    print('Training ' + str(k) + '-NN with ' + dist_algo + ' distance algorithm')
    index_name.append(str(k) + '-NN')
    
    # create KNN classifier
    start = time.time()
    knn = KNeighborsClassifier(algorithm='auto', metric = dist_algo, p=2, n_neighbors=k)
    knn.fit(data, val_train)
    end = time.time()
    print("Training time: %s seconds" % str(end - start))
    train_time.append(round(end-start,2))

    # save model
    filename = str(k) + '-NN ' + dist_algo + '.sav'
    pickle.dump(knn, open(filename, 'wb'))
    print("%s saved\n" %filename)
print('Train complete!')

Training 1-NN with minkowski distance algorithm
Training time: 20.350151300430298 seconds
1-NN minkowski.sav saved

Training 3-NN with minkowski distance algorithm
Training time: 21.160536527633667 seconds
3-NN minkowski.sav saved

Training 5-NN with minkowski distance algorithm
Training time: 20.04659652709961 seconds
5-NN minkowski.sav saved

Training 7-NN with minkowski distance algorithm
Training time: 21.631746530532837 seconds
7-NN minkowski.sav saved

Training 9-NN with minkowski distance algorithm
Training time: 22.265865325927734 seconds
9-NN minkowski.sav saved

Training 11-NN with minkowski distance algorithm
Training time: 23.14009404182434 seconds
11-NN minkowski.sav saved

Training 13-NN with minkowski distance algorithm
Training time: 22.678683280944824 seconds
13-NN minkowski.sav saved

Training 15-NN with minkowski distance algorithm
Training time: 21.01505160331726 seconds
15-NN minkowski.sav saved

Training 17-NN with minkowski distance algorithm
Training time: 22.27

In [6]:
# convert test images (matrixs) to vectors
start = time.time()
n = len(img_test)
test_data = img_test.reshape(n, -1)

In [7]:
# predict data
for k in range(1, max_k+1, 2):
    filename = str(k) + '-NN ' + dist_algo + '.sav'
    loaded_knn = pickle.load(open(filename, 'rb'))
    
    start = time.time()
    print('Predicting ' + str(k) + '-NN')
    predicted = loaded_knn.predict(test_data)
    print(len(predicted))
    end = time.time()
    print("Predict time: %s seconds" % str(end - start))
    predict_time.append(round(end-start,2))
    
    # calculate accuracy average
    num_correct=0
    for i in range(0,len(val_test)):
        if val_test[i] == predicted[i]:
            num_correct +=1
    accuracy.append(float(num_correct / float(len(val_test))))
print('Predict complete')

Predicting 1-NN
10000
Predict time: 873.6929380893707 seconds
Predicting 3-NN
10000
Predict time: 848.7092046737671 seconds
Predicting 5-NN
10000
Predict time: 894.2036201953888 seconds
Predicting 7-NN
10000
Predict time: 811.653119802475 seconds
Predicting 9-NN
10000
Predict time: 844.6570854187012 seconds
Predicting 11-NN
10000
Predict time: 814.6990456581116 seconds
Predicting 13-NN
10000
Predict time: 840.1894521713257 seconds
Predicting 15-NN
10000
Predict time: 799.0764000415802 seconds
Predicting 17-NN
10000
Predict time: 826.1790955066681 seconds
Predicting 19-NN
10000
Predict time: 897.9032068252563 seconds
Predicting 21-NN
10000
Predict time: 861.779942035675 seconds
Predicting 23-NN
10000
Predict time: 853.4000067710876 seconds
Predicting 25-NN
10000
Predict time: 844.5542941093445 seconds
Predicting 27-NN
10000
Predict time: 869.9431896209717 seconds
Predicting 29-NN
10000
Predict time: 912.558842420578 seconds
Predict complete


In [8]:
# save result
list_of_result = list(zip(train_time, predict_time, accuracy))
df = pd.DataFrame(list_of_result, columns=headers, index=index_name)
print(df)
df.to_csv('result.csv', sep=',')

       train-time  predict-time  accuracy
1-NN        20.35        873.69    0.9691
3-NN        21.16        848.71    0.9705
5-NN        20.05        894.20    0.9688
7-NN        21.63        811.65    0.9694
9-NN        22.27        844.66    0.9659
11-NN       23.14        814.70    0.9668
13-NN       22.68        840.19    0.9653
15-NN       21.02        799.08    0.9633
17-NN       22.28        826.18    0.9630
19-NN       18.17        897.90    0.9632
21-NN       21.73        861.78    0.9630
23-NN       26.41        853.40    0.9619
25-NN       27.14        844.55    0.9609
27-NN       26.35        869.94    0.9604
29-NN       24.36        912.56    0.9593
