In [53]:
import numpy as np
from time import time

#Dimension, Number of training points
d, N = 1000, 10000
X = np.random.randn(N,d)
z = np.random.randn(d)

In [54]:
def dist_pp(z, x):
  d = z - x.reshape(z.shape)
  return np.sum(d*d)

def dist_ps_naive(z, X):
  N = X.shape[0]
  res = np.zeros((1,N))
  for i in range(N):
    res[0][i] = dist_pp(z, X[i])
  return res

def dist_ps_fast(z, X):
  X2 = np.sum(X*X, 1)
  z2 = np.sum(z*z)
  return X2 + z2 - 2*X.dot(z)

In [55]:
#Distance from a point in test set to each point in training set

t1 = time()
D1 = dist_ps_naive(z,X)
print('Naive point to set, running time: ', time() - t1, 's')

t1 = time()
D2 = dist_ps_fast(z,X)
print('Fast point to set, running time: ', time() - t1, 's')

print("Result difference: ", np.linalg.norm(D1 - D2))

Naive point to set, running time:  0.08423280715942383 s
Fast point to set, running time:  0.04024791717529297 s
Result difference:  1.99662053596139e-11


In [58]:
#Distance from each point test set to each point in training set
Z = np.random.rand(100, d)

def dist_ss_naive(Z, X):
  M,N = Z.shape[0], X.shape[0]
  res = np.zeros((M,N))
  for i in range(M):
    res[i] = dist_ps_fast(Z[i], X)
  return res

def dist_ss_fast(Z, X):
  X2 = np.sum(X*X, 1)
  Z2 = np.sum(Z*Z, 1)
  print(X2.shape, Z2.shape)
  return Z2.reshape(-1, 1) + X2.reshape(1, -1) - 2*Z.dot(X.T)

In [60]:
t1 = time()
D3 = dist_ss_naive(Z,X)
print('Naive set to set, running time: ', time() - t1, 's')

t1 = time()
D4 = dist_ss_fast(Z,X)
print(D4.shape)
print('Fast set to set, running time: ', time() - t1, 's')

print('Result difference:', np.linalg.norm(D3 - D4))

Naive set to set, running time:  4.025696516036987 s
(10000,) (100,)
(100, 10000)
Fast set to set, running time:  0.10313940048217773 s
Result difference: 6.761108310705763e-11


In [67]:
#Iris flowers project
from __future__ import print_function
import numpy as np
from sklearn import neighbors, datasets
from sklearn.model_selection import train_test_split #For splitting data
from sklearn.metrics import accuracy_score #For evaluating results

iris = datasets.load_iris()
iris_X = iris.data
iris_y = iris.target

In [70]:
print('Labels:', np.unique(iris_y))

#Split train and test set
np.random.seed(7) #Ensure that the results among different runnings is the same
X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size = 130)
print('Training size:', X_train.shape[0], ', Test size:', X_test.shape[0])

Labels: [0 1 2]
Training size: 20 , Test size: 130


In [73]:
K = 7
model = neighbors.KNeighborsClassifier(n_neighbors = K, p = 2, weights = 'distance')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('Accuracy of %dNN: %.2f %%' % (K, 100 * accuracy_score(y_test, y_pred)))

Accuracy of 7NN: 94.62 %
