In [120]:
import numpy as np
import pandas as pd
from collections import Counter

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [5]:
iris = load_iris()

In [11]:
iris_data = iris['data']
iris_target = iris['target']

In [58]:
X = pd.DataFrame(iris_data)
y = pd.DataFrame(iris_target)

In [121]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42, stratify = y)

# Define algorithms

In [177]:
def euclid_dist(data_1, data_2):
    return np.sqrt(np.sum((data_1 - data_2)**2, axis = 1))

In [216]:
def calc_all_distances(data1, data2):
    data2 = np.repeat([data2], len(data1), axis = 0)
    dist = euclid_dist(data1, data2)
    return dist

In [217]:
def return_n_smallest_indexes(data1, data2, n):
    distance = calc_all_distances(data1, data2)
    index = np.argsort(distance)[:n]
    return index

In [229]:
def get_majority_class(xtrain, ytrain, xtest):
    test_labels = []
    for row in np.arange(xtest.shape[0]):
        indexes = return_n_smallest_indexes(xtrain.values, xtest.iloc[row, :].values, 5)
        closest_classes = ytrain.iloc[indexes, :].values.reshape(-1)
        winner = Counter(closest_classes).most_common(1)[0][0]
        test_labels.append(winner)
    return test_labels

In [230]:
ytest = get_majority_class(X_train, y_train, X_test)

In [231]:
list(zip(y_test.values.reshape(-1).T, ytest))

[(0, 0),
 (2, 2),
 (1, 1),
 (1, 1),
 (0, 0),
 (1, 1),
 (0, 0),
 (0, 0),
 (2, 2),
 (1, 1),
 (2, 2),
 (2, 2),
 (2, 2),
 (1, 1),
 (0, 0),
 (0, 0),
 (0, 0),
 (1, 1),
 (1, 1),
 (2, 2),
 (0, 0),
 (2, 2),
 (1, 1),
 (2, 2),
 (2, 2),
 (1, 1),
 (1, 1),
 (0, 0),
 (2, 2),
 (0, 0)]