In [3]:
from matplotlib import pyplot as plt
from sklearn import datasets, metrics
from sklearn.neighbors import KNeighborsClassifier
from keras.datasets import mnist
import time
import pandas as pd
import pickle

Using TensorFlow backend.


In [4]:
# Load data
(img_train, val_train), (img_test, val_test) = mnist.load_data()
print(img_train.shape)
print(img_test.shape)

(60000, 28, 28)
(10000, 28, 28)


In [5]:
# Convert Image (matrix) to vectors
n = len(img_train)
data = img_train.reshape(n, -1)
print(data.shape)

# Declare and asign values for variable
max_k = 29
dist_algorithm = 'chebyshev'
index_name = []
train_time = []
predict_time = []
accuracy = []
headers = ['Training time', 'Predict time', 'Accuracy']

# Training
for k in range (1, max_k+1, 2):
    print('Training ' + str(k) + '-NN with ' + dist_algorithm + ' distance algorithm')
    index_name.append(str(k) + '-NN')

    # create knn classifier
    start = time.time()
    knn = KNeighborsClassifier(algorithm='auto', metric=dist_algorithm, p=2, n_neighbors=k)
    knn.fit(data, val_train)
    end = time.time()
    print("Training time: %s seconds" %str(end - start))
    train_time.append(round(end-start,2))

    # Save model
    filename = str(k) + '-NN' + dist_algorithm + '.sav'
    pickle.dump(knn, open(filename, 'wb'))
    print('%s saved\n' %filename)
print('Train complete')

(60000, 784)
Training 1-NN with chebyshev distance algorithm
Training time: 23.437156200408936 seconds
1-NNchebyshev.sav saved

Training 3-NN with chebyshev distance algorithm
Training time: 22.335971117019653 seconds
3-NNchebyshev.sav saved

Training 5-NN with chebyshev distance algorithm
Training time: 22.98640203475952 seconds
5-NNchebyshev.sav saved

Training 7-NN with chebyshev distance algorithm
Training time: 20.913590908050537 seconds
7-NNchebyshev.sav saved

Training 9-NN with chebyshev distance algorithm
Training time: 25.460252285003662 seconds
9-NNchebyshev.sav saved

Training 11-NN with chebyshev distance algorithm
Training time: 25.87964701652527 seconds
11-NNchebyshev.sav saved

Training 13-NN with chebyshev distance algorithm
Training time: 29.480480909347534 seconds
13-NNchebyshev.sav saved

Training 15-NN with chebyshev distance algorithm
Training time: 23.682814836502075 seconds
15-NNchebyshev.sav saved

Training 17-NN with chebyshev distance algorithm
Training time:

In [6]:
# Convert test image (matrix) to vectors
n = len(img_test)
test_data = img_test.reshape(n, -1)

# Predict data
for k in range(1, max_k+1, 2):
    filename = str(k) + '-NN' + dist_algorithm + '.sav'
    loaded_knn = pickle.load(open(filename, 'rb'))
    
    start = time.time()
    print('Predicting ' + str(k) + '-NN')
    predicted = loaded_knn.predict(test_data)
    print(len(predicted))
    end = time.time()
    print("Predict time: %s seconds" %str(end-start))
    predict_time.append(round(end-start,2))
    
    #Calculate accuracy average
    num_correct = 0
    for i in range (0, len(val_test)):
        if val_test [i] == predicted [i]:
            num_correct += 1
    accuracy.append(float(num_correct / float(len(val_test))))
print('Predict complete')

Predicting 1-NN
10000
Predict time: 692.4571189880371 seconds
Predicting 3-NN
10000
Predict time: 733.4472541809082 seconds
Predicting 5-NN
10000
Predict time: 763.0604660511017 seconds
Predicting 7-NN
10000
Predict time: 676.8932678699493 seconds
Predicting 9-NN
10000
Predict time: 665.5059099197388 seconds
Predicting 11-NN
10000
Predict time: 665.9005959033966 seconds
Predicting 13-NN
10000
Predict time: 670.3112831115723 seconds
Predicting 15-NN
10000
Predict time: 657.0918228626251 seconds
Predicting 17-NN
10000
Predict time: 691.3494699001312 seconds
Predicting 19-NN
10000
Predict time: 711.113322019577 seconds
Predicting 21-NN
10000
Predict time: 719.5016930103302 seconds
Predicting 23-NN
10000
Predict time: 663.0905690193176 seconds
Predicting 25-NN
10000
Predict time: 672.8677198886871 seconds
Predicting 27-NN
10000
Predict time: 674.5666410923004 seconds
Predicting 29-NN
10000
Predict time: 674.0396251678467 seconds
Predict complete


In [8]:
# save result
list_of_result = list(zip(train_time, predict_time, accuracy))
df = pd.DataFrame(list_of_result, columns=headers, index=index_name)
print(df)
df.to_csv('result.csv', sep=',')

       Training time  Predict time  Accurancy
1-NN           23.44        692.46     0.8271
3-NN           22.34        733.45     0.8064
5-NN           22.99        763.06     0.8111
7-NN           20.91        676.89     0.8092
9-NN           25.46        665.51     0.8057
11-NN          25.88        665.90     0.8015
13-NN          29.48        670.31     0.8018
15-NN          23.68        657.09     0.7988
17-NN          21.93        691.35     0.7958
19-NN          21.66        711.11     0.7943
21-NN          21.62        719.50     0.7936
23-NN          23.56        663.09     0.7877
25-NN          23.49        672.87     0.7874
27-NN          23.45        674.57     0.7841
29-NN          23.48        674.04     0.7833
