Manually coding the kNN algorithm.

In [21]:
import numpy as np
from collections import Counter

def euclidean_distance(x1, x2):
    distance = np.sqrt(np.sum((x1-x2)**2))
    return distance

class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return predictions

    def _predict(self, x):
        # compute the distance
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]

        # get the closest k
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        # majority voye
        most_common = Counter(k_nearest_labels).most_common()
        return most_common[0][0]

Importing necessary libraries and making a euclidean distance function for calculating distances.

In [22]:
from sklearn.metrics import *
from numpy.linalg import norm


def euclidean(a, b):
    """Compute and return the euclidean distance between a and b."""
    return norm(a-b)

Creating a test and train dataset and implementing the model.

In [23]:
# testing the kNN model

raw_data = np.array([[1, 2, 1], [3, 2, 1], [2, 4, 1],
                     [3, 3, 1], [2, 5, 1], [-1, -2, 0],
                     [-3, -2, 0], [-2, -4, 0], [-3, -3, 0],
                     [-2, -5, 0]], dtype=float)

X = raw_data[:, :2]
y = raw_data[:, -1]


model = KNearestNeighbors(k=3, distance_metric=euclidean)
model.train(X, y)

print("Value: {},\tPrediction: {}".format([1, 0], model.predict(np.array([1, 0]))))
print("Value: {},\tPrediction: {}".format([0, 1], model.predict(np.array([0, 1]))))
print("Value: {},\tPrediction: {}".format([0, 0], model.predict(np.array([0, 0]))))
print("Value: {},\tPrediction: {}".format([-1, 0], model.predict(np.array([-1, 0]))))
print("Value: {},\tPrediction: {}".format([0, -1], model.predict(np.array([0, -1]))))

Value: [1, 0],	Prediction: 1.0
Value: [0, 1],	Prediction: 1.0
Value: [0, 0],	Prediction: 0.0
Value: [-1, 0],	Prediction: 0.0
Value: [0, -1],	Prediction: 0.0
