# K Nearest Neighbours from Scratch


In [44]:
import numpy as np

In [45]:
data = np.array([
    [150, 7.0, 1, 'Apple'],
    [120, 6.5, 0, 'Banana'],
    [180, 7.5, 2, 'Orange'],
    [155, 7.2, 1, 'Apple'],
    [110, 6.0, 0, 'Banana'],
    [190, 7.8, 2, 'Orange'],
    [145, 7.1, 1, 'Apple'],
    [115, 6.3, 0, 'Banana']
])


In [46]:
data.shape

(8, 4)

In [47]:
X= data[:,:-1].astype(np.float32)
y = data[:,-1]

In [48]:
label_encoding = {'Apple':0,'Banana':1,'Orange':2}
for i  in range(len(y)):
    y[i] = label_encoding[y[i]]

y = y.astype(np.int32)

In [49]:
X

array([[150. ,   7. ,   1. ],
       [120. ,   6.5,   0. ],
       [180. ,   7.5,   2. ],
       [155. ,   7.2,   1. ],
       [110. ,   6. ,   0. ],
       [190. ,   7.8,   2. ],
       [145. ,   7.1,   1. ],
       [115. ,   6.3,   0. ]], dtype=float32)

In [50]:
y

array([0, 1, 2, 0, 1, 2, 0, 1], dtype=int32)

In [51]:
def normDistance(x,y,l):
    n = len(x)
    sum =0
 
    for i in range(n):
        sum += np.abs((x[i] - y[i]))**l
    
    return np.sqrt(sum)

print(normDistance(X[0],X[1],2))


30.02082610455615


In [52]:
# Takes an array and return indices of k smallest elements in theta(nk) time 

def k_smallest_indices(arr,k):


    arr_index_map = []
    
    n = len(arr)
    for i in range(n):
        arr_index_map.append([i,arr[i]])
    
    smallest_indices = []

    for i in range(k):
        for j in range(0,n-i-1):
            if arr_index_map[j][1]< arr_index_map[j+1][1]:
                arr_index_map[j] ,arr_index_map[j+1] = arr_index_map[j+1] , arr_index_map[j]
    
        smallest_indices.append(arr_index_map[n-i-1][0])
    return smallest_indices


In [53]:
class KNeighborsClassifier:
    def __init__(self,k=3,distance_metric =2,isWeighted=False):
        self.n_neighbors = k
        self.X_train = None
        self.y_train = None
        self.distance_metric = distance_metric
        self.inverse_distance_weighted_classification = isWeighted


    def fit(self,X,y):
        self.X_train = X
        self.y_train =  y

    def predict(self,X_test):
        predict_labels = []
        for x_test in X_test: 
            predict_labels.append(self.predict_one(x_test))
        return np.array(predict_labels)
    
    def predict_one(self, x):
         
    
        distances = np.array([normDistance(x,x_train,self.distance_metric) for x_train in self.X_train])
        xnn = k_smallest_indices(distances,self.n_neighbors)
        ynn = self.y_train[xnn]  
        if self.inverse_distance_weighted_classification ==False:
           labels,counts = np.unique(ynn,return_counts=True)
           predict_label = labels[np.argmax(counts)] 
           return predict_label
        else:
            k_smallest_distances =distances[xnn]

            # Each ynn is given weightage inversely corresponding to it's distance
            
            weights = np.array([1/distance for distance in k_smallest_distances])
        

            classes = np.unique(self.y_train)
            contributions = np.zeros(np.size(classes))
      

            for i in range(self.n_neighbors):
                contributions[ynn[i]] += weights[i]


            
            #The class with most contribution
            return np.argmax(contributions)
    

    

    
        

        









# Testing the Classifier

In [54]:
X_test = np.array([
    [118, 6.2, 0],  # Expected: Banana
    [160, 7.3, 1],  # Expected: Apple
    [185, 7.7, 2]   # Expected: Orange
])
y_test = np.array([1,0,2])
y_test


array([1, 0, 2])

In [55]:
my_knn = KNeighborsClassifier(3,2)
my_knn.fit(X,y)

y_pred = my_knn.predict(X_test)

print(y_pred)

[1 0 2]


In [56]:
my_knn.predict_one(X_test[1])

0

# Evaluation

In [57]:
def accuracy(y,y_hat):
    return (np.sum(y == y_hat)/np.size(y) )*100
    

In [58]:
print(accuracy(y_test,y_pred))

100.0


### Observing by changing the values of k

In [59]:
k_values = [1,3,5]
for k in k_values:
    my_knn = KNeighborsClassifier(3)
    my_knn.fit(X,y)
    y_pred = my_knn.predict(X_test)
    print(f'K :{k} , Accuracy : {accuracy(y_test,y_pred)}')
    

K :1 , Accuracy : 100.0
K :3 , Accuracy : 100.0
K :5 , Accuracy : 100.0


In [60]:
## Observation by changing the distance metric while keeping the k =3 

In [61]:
distance_metrics =[0,1,2]
for distance_metric in distance_metrics:
    myknn = KNeighborsClassifier(3,distance_metric)
    my_knn.fit(X,y)
    y_pred = my_knn.predict(X_test)
    print(f' Distance Metric : l_{distance_metric} ,  K :{3} , Accuracy : {accuracy(y_test,y_pred)}')




 Distance Metric : l_0 ,  K :3 , Accuracy : 100.0
 Distance Metric : l_1 ,  K :3 , Accuracy : 100.0
 Distance Metric : l_2 ,  K :3 , Accuracy : 100.0


# Weighted KNN

In [62]:
# Implemented weighted KNN 


my_knn = KNeighborsClassifier(3,2,True)
my_knn.fit(X,y)

y_pred = my_knn.predict(X_test)

print(y_pred)
print(accuracy(y_test,y_pred))


[1 0 2]
100.0
