In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn')
from scipy import stats

from sklearn.datasets import make_blobs,make_classification
from sklearn.model_selection import train_test_split

## Distance Computation

In [8]:
def EuclidianDistance(x1,x2):
    return np.sqrt(np.sum((x1 - x2)**2,axis = 1))

def ManhattanDistance(x1,x2):
    return np.sum(np.abs(x1 - x2),axis = 1)

In [10]:
class KNN():
    def __init__(self,k,distance_metric = EuclidianDistance,task_type = "Classification"):
        self.k = k
        self.distance_metric = distance_metric
        self.task_type = task_type
        
    def fit(self,X,y):
        self._X = X
        self._y = y
        
    def predict(self,X,newExample):
        distance_vector = distance_metric(self._X,newExample)
        
        k_nearest_neigbours_indices = np.argpartition(distance_vector,self.k)[:self.k]
        
        k_nearest_neigbours = self._y[k_nearest_neigbours_indices]
        
        if self.task_type == "Classification":
            label = stats.mode(k_nearest_neigbours)[0]
            
        else:
            label = k_nearest_neigbours.mean()
            
        return label,k_nearest_neigbours_indices
    
    def eval(self,X_test,y_test):
        if self.task_type == "Classification":
            y_predicted = np.zeros(y_test.shape)
            for i in range(y_test.shape[0]):
                y_predicted[i],_ = self.predict(X_test[i,:])
                
            error = np.mean(y_test==y_predicted,axis = 0)
            
        else:
            y_predicted = np.zeros(y_test.shape)
            for i in range(y_test.shape[0]):
                y_predicted[i] = self.predict(X_test[i,:])
                
            error_vector = y_predicted - y_test
            error = np.sqrt((error_vector.T@error_vector)/error_vector.ravel().shape[0])
            
        