In [50]:
# Implementing KNN from scratch.
# As of KNN doesnt have any training Phase.

from scipy.stats import mode
import numpy as np
import pandas as pd
from scipy.spatial.distance import euclidean

class K_NearestNeighbours():
    '''This class implements the KNN from scratch.
    The parameters takes by this class is n_neighbours , distance_metric.
    Information regarding the Parameter:
    
    n_neighbours --> No of Nearest Neighbours to take.
    distance_metric --> We are taking two types of Distance metric i.e ['Eucledian', 'Manhattan']
    '''    
    
    def __init__(self, n_neighbours=1, distance_metric = 'Eucledian'):
        '''INITIALIZING the Parameters with the parametric Values.'''
        self.n_neighbours = n_neighbours
        self.metric = distance_metric
        
    def fit(self, X, y):
        ''' The function is used to fit the data But as of KNN is an instance Based Learning so this fit
        function will only store the X and y variables it will not learn.'''
        
        self.X = X
        self.y = y
        """This is what the fit function will perform. Will be only storing X and y."""
        
        return f'K_NearestNeighbours(n_neighbours={self.n_neighbours}, distance_metric={self.metric})'
    
    
    def compute_eucledian(self, train_data, test_data):
        '''Computing the distance using Eucledian distance formula.'''
        distance = np.sqrt(np.sum((train_data-test_data)**2))
        #distance = euclidean(train_data, test_data)
        # print(type(train_data),type(test_data))
        # print((np.array(train_data)-np.array(test_data))**2)
        #print(train_data-test_data)
        return distance 
    
    def compute_manhattan(self, train_data,test_data):
        """COmpute the distance using Manhattan Distance Formula."""
        distance = np.sqrt(np.sum(np.abs(train_data-test_data)))
        return distance
    
    def majority_vote_classifier(self,labels):
        '''To find the Majority Votes between the labels.'''
        labels = np.array(labels)
        y_pred = []
        for each_instance in labels:
            unique , counts = np.unique(each_instance,return_counts=True)
            unique, counts = list(unique), list(counts)
            index = counts.index(max(counts))
            y_pred.append(int(unique[index]))
        return np.array(y_pred)
        
        
        
    # Now Lets make the Predict function for the model.    
    def predict(self, x_test):
        """This function will predict the output labels for the test data points. """
        
        labels = []
        self.x_test = x_test
        for test_data in self.x_test:
            # Now I have to make the list to store the distance of the each point.
            each_point_dist = []
            
            for train_data in self.X:
                if self.metric == 'Eucledian':
                    distance = self.compute_eucledian(train_data, test_data)
                elif self.metric == 'Manhattan':
                    distance = self.compute_manhattan(train_data, test_data)
                each_point_dist.append(distance)
            
            # Now we have calculated the distance of each an every training point with the one of the test point. 
            each_point_dist = np.array(each_point_dist)
            
            # sort the array using the argsort function of numpy to store the indexes of the shortest distances 
            # so that further we can use it for the prediction on that index. 
            k_nearest_distance_index = np.argsort(each_point_dist)[:self.n_neighbours]
            
            # Labels at this index.
            labels_at_this_index = self.y[k_nearest_distance_index]
            labels.append(labels_at_this_index)
            
        y_pred = self.majority_vote_classifier(labels)
        return y_pred
    
    def error_function(self, y_true, y_pred):
        """To find the Error."""
        error = np.mean(y_true!=y_pred)
        return error
    
    def accuracy_score(self, y_true, y_pred):
        """To find teh accuracy score of the Model."""
        accuracy = np.mean(y_true==y_pred)
        return accuracy

In [51]:
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from numpy.random import randint
from sklearn.model_selection import train_test_split
 
#Loading the Data
iris= load_iris()
 
# Store features matrix in X
X= iris.data
#Store target vector in 
y= iris.target
 
 
#Creating the training Data
X_train, X_test, y_train ,y_test = train_test_split(X, y, test_size=0.2, random_state=10)

In [52]:
X_train[0]

array([6.6, 2.9, 4.6, 1.3])

In [53]:
knn = K_NearestNeighbours(n_neighbours=90,distance_metric='Eucledian')

In [54]:
knn.fit(X_train, y_train)

'K_NearestNeighbours(n_neighbours=90, distance_metric=Eucledian)'

In [55]:
y_pred = knn.predict(X_test)

In [56]:
y_pred

array([2, 2, 0, 2, 0, 1, 2, 1, 0, 2, 2, 2, 1, 0, 0, 2, 2, 0, 0, 0, 2, 2,
       2, 0, 2, 0, 2, 2, 2, 2])

In [57]:
accuracy_score(y_test, y_pred)

0.6666666666666666

In [58]:
knn.error_function(y_test,y_pred)

0.3333333333333333

In [59]:
X_train[0]-X_test[0]

array([0.3, 0.6, 0.2, 0. ])

In [60]:
knn.accuracy_score(y_test, y_pred)

0.6666666666666666