In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from collections import Counter

In [2]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2)**2))

In [3]:
class KNN:
    def __init__(self, k = 3):
        self.k = k
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)
    def _predict(self, x):
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]
def accuracy_score(y_pred, y_test):
    score = 0
    for i in range(len(y_pred)):
        if y_pred[i] == y_test[i]:
            score += 1
    return score/len(y_pred)


In [4]:

iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=50)

In [5]:
print(y_test.shape)

(50,)


In [6]:
knn = KNN(10)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
print('predict  :' ,y_pred[20:40])
print('truth    :', y_test[20:40])

predict  : [0 0 1 1 2 2 1 0 1 1 0 2 1 1 1 0 0 2 2 2]
truth    : [0 0 1 1 1 2 1 0 1 1 0 2 1 1 1 0 0 2 2 2]


In [7]:
print('accuracy: ', accuracy_score(y_pred, y_test))

accuracy:  0.98
