In [None]:
# default_exp models.KNN

# models.KNN

> Implementation of K Nearest Neighbours with Euclidean Distance and Weighted based on distance.(Weighted KNN)

In [None]:
#export
import warnings 
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
from collections import defaultdict, Counter

from sklearn.model_selection import train_test_split
from sklearn import datasets

In [None]:
#hide
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=41)

Iris DataSet is loaded from sklearn.

In [None]:
X_train[:3]

array([[5.2, 3.5, 1.5, 0.2],
       [5.5, 4.2, 1.4, 0.2],
       [5.9, 3. , 5.1, 1.8]])

In [None]:
y_train[:3]

array([0, 0, 2])

In [None]:
print(f"Classes = {list(Counter(y_train).keys())}")

Classes = [0, 2, 1]


In [None]:
#export
class KNearestNeighbors:
    """K Nearest Neighbour Algorithm , Standard and Weighted KNN"""
    
    def __init__(self, k, distance_metric = 'standard'):
        """
        Args:
        
            k(int): Take K nearest neighbours into account
            
            distance_metric(str): "weighted KNN" | "standard"
        """
        self.k = k
        self.distance_metric = distance_metric
        
    def fit(self, X, y):
        """Train KNN"""
        self.X = X
        self.y = y
        self.labels = list(Counter(y).keys())
    
    def predict(self, X):
        """ Prediction for samples.
        
        Args:
        
            X: Numpy array of shape(m, n)
            
        Returns:
        
            array of shape (m,)
        """
        predictions = np.array([])
        
        for test_sample in X:
            distance = np.array([])
            
            for train_sample in self.X:
                distance = np.append(distance , euclidean_distance(train_sample, test_sample))
            
            # get indexes for k closest points
            k_min_distance_indexes = distance.argsort()[:self.k]
            # get gold predictions using indexes.
            prediction_pool = self.y[k_min_distance_indexes]
            # get unique predictions and their counts.
            k_min_values , k_min_counts = np.unique(prediction_pool, return_counts=True)
            
            #weighted KNN
            if self.distance_metric == "weighted KNN":
                pool = zip(prediction_pool, np.sort(distance)[:self.k])
                pred = distance_weights(pool, self.labels)
                predictions = np.append(predictions, pred)
                
            else:
                max_count_index = np.argmax(k_min_counts)
                predictions = np.append(predictions, k_min_values[max_count_index])
            
        return predictions  
    
def euclidean_distance(x1, x2):
    """Euclidean distance between numpy arrays"""
    
    return np.sqrt(np.sum((x1 - x2)*(x1 - x2)))

def distance_weights(pool, labels):
    """Calculate weights based on distance"""

    weights = [0 for _ in range(len(labels))]
    
    for i in pool:
        for label in labels:
            pool_label = i[0]
            distance   = i[1]
            if pool_label == label:
                weights[label] += 1 / distance
                
    return weights.index(max(weights))

In [None]:
#hide
def accuracy(y_test, y_pred):
    correct = len([True for idx in range(len(y_test)) if y_test[idx] == y_pred[idx]])
    total = len(y_test)
    return correct / total

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
show_doc(KNearestNeighbors.predict)

<h4 id="KNearestNeighbors.predict" class="doc_header"><code>KNearestNeighbors.predict</code><a href="__main__.py#L22" class="source_link" style="float:right">[source]</a></h4>

> <code>KNearestNeighbors.predict</code>(**`X`**)

Prediction for samples.

Args:

    X: Numpy array of shape(m, n)
    
Returns:

    array of shape (m,)

In [None]:
knn =  KNearestNeighbors(5, distance_metric = "standard")

knn.fit(X_train, y_train)

pred = knn.predict(X_test)

In [None]:
 accuracy(y_test, pred)

0.9433962264150944

In [None]:
knn =  KNearestNeighbors(5, distance_metric = "weighted KNN")

knn.fit(X_train, y_train)

pred = knn.predict(X_test)

In [None]:
 accuracy(y_test, pred)

0.9433962264150944

In [None]:
#hide
from nbdev.export import *
notebook2script()

Converted 01_count_vectorizer.ipynb.
Converted 02_naive_bayes.ipynb.
Converted 03_KNN.ipynb.
Converted index.ipynb.
Converted main.ipynb.
