In [1]:
from matplotlib import pyplot as plt
from sklearn import datasets, metrics
from sklearn.neighbors import KNeighborsClassifier
from keras.datasets import mnist
import pickle
import time
import pandas as pd

# load data
(img_train, val_train),(img_test, val_test) = mnist.load_data()
print(img_train.shape)
print(img_test.shape)

# convert images (matrixs) to vectors
n = len(img_train)
data = img_train.reshape(n, -1)
print(data.shape)

# declare and asign values for variables
max_k = 29
dist_algo = 'correlation'
index_name = []
train_time = []
predict_time = []
accuracy = []
headers = ['train-time','predict-time','accuracy']

# training
for k in range(1, 40, 2):
    print('Training ' + str(k) + '-NN with ' + dist_algo + ' distance algorithm')
    index_name.append(str(k) + '-NN')
    
    # create KNN classifier
    start = time.time()
    knn = KNeighborsClassifier( metric = dist_algo, p=2, n_neighbors=k)
    knn.fit(data, val_train)
    end = time.time()
    print("Training time: %s seconds" % str(end - start))
    train_time.append(round(end-start,2))

    # save model
    filename = str(k) + '-NN ' + dist_algo + '.sav'
    pickle.dump(knn, open(filename, 'wb'))
    print("%s saved\n" %filename)
print('Train complete!')

# convert test images (matrixs) to vectors
n = len(img_test)
test_data = img_test.reshape(n, -1)
print('Complete')

# predict data
for k in range(1, 40, 2):
    filename = str(k) + '-NN ' + dist_algo + '.sav'
    loaded_knn = pickle.load(open(filename, 'rb'))
    
    print('Predicting ' + str(k) + '-NN')
    start = time.time()
    predicted = loaded_knn.predict(test_data)
    end = time.time()
    print(len(predicted)) 
    print("Predict time: %s seconds" % str(end - start))
    predict_time.append(round(end-start,2))
    
    # calculate accuracy average
    num_correct=0
    for i in range(0,len(val_test)):
        if val_test[i] == predicted[i]:
            num_correct +=1
    accuracy.append(float(num_correct / float(len(val_test))))
print('Predict complete')

# save result
list_of_result = list(zip(train_time, predict_time, accuracy))
df = pd.DataFrame(list_of_result, columns=headers, index=index_name)
print(df)
df.to_csv('result.csv', sep=',')


Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
(60000, 28, 28)
(10000, 28, 28)
(60000, 784)
Training 1-NN with correlation distance algorithm
Training time: 0.006345987319946289 seconds
1-NN correlation.sav saved

Training 3-NN with correlation distance algorithm
Training time: 0.006032466888427734 seconds
3-NN correlation.sav saved

Training 5-NN with correlation distance algorithm
Training time: 0.0058252811431884766 seconds
5-NN correlation.sav saved

Training 7-NN with correlation distance algorithm
Training time: 0.009004354476928711 seconds
7-NN correlation.sav saved

Training 9-NN with correlation distance algorithm
Training time: 0.008899211883544922 seconds
9-NN correlation.sav saved

Training 11-NN with correlation distance algorithm
Training time: 0.008839845657348633 seconds
11-NN correlation.sav saved

Training 13-NN with correlation distance algorithm
Training time: 0.00615239143371582 seconds
13-NN correlation.sav saved

Training 15-NN with correla