In [7]:
from sklearn import datasets
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Unpack the iris dataset, from UCI Machine Learning Repository
iris = datasets.load_iris()
X = iris['data']
y = iris['target']
# Preprocess data
X = StandardScaler().fit_transform(X)
# Split data into train & test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [1]:
def most_common(lst):
    '''Returns the most common element in a list'''
    return max(set(lst), key=lst.count)
def euclidean(point, data):
    '''Euclidean distance between a point  & data'''
    return np.sqrt(np.sum((point - data)**2, axis=1))

In [2]:
class KNNClassifier():
    def __init__(self, k=5, dist_metric=euclidean):
        self.k = k
        self.dist_metric = dist_metric
    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
    def predict(self, X_test):
        neighbors = []
        for x in X_test:
            distances = self.dist_metric(x, self.X_train)
            y_sorted = [y for _, y in sorted(zip(distances, self.y_train))]
            neighbors.append(y_sorted[:self.k])
        return list(map(most_common, neighbors))
    def evaluate(self, X_test, y_test):
        y_pred = self.predict(X_test)
        accuracy = sum(y_pred == y_test) / len(y_test)
        return accuracy

In [8]:
aaccuracies = []
ks = range(1, 30,3)
for k in ks:
    knn = KNNClassifier(k=k)
    knn.fit(X_train, y_train)
    accuracy = knn.evaluate(X_test, y_test)
    print("k= " , k , "accuracy= ", (accuracy * 100))
    aaccuracies.append(accuracy)

k=  1 accuracy=  90.0
k=  4 accuracy=  93.33333333333333
k=  7 accuracy=  93.33333333333333
k=  10 accuracy=  93.33333333333333
k=  13 accuracy=  93.33333333333333
k=  16 accuracy=  93.33333333333333
k=  19 accuracy=  93.33333333333333
k=  22 accuracy=  93.33333333333333
k=  25 accuracy=  100.0
k=  28 accuracy=  100.0
