In [None]:
#Let us Build a KNN classification model from scratch and compare it with standerd knn algorthm built from libraries

In [40]:
#import necessary libraries 

import numpy as np
from collections import Counter
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [41]:
# Defining a function "euclidean_distance" for calculating distance between two points

def euclidean_distance(x1, x2):
    distance = np.sqrt(np.sum((x1-x2)**2))
    return distance

#Defining "KNN" function which actually finds the distance between the point with its K nearest neighbours and selects the neighbour with majority votes
 
class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return predictions

    def _predict(self, x):
        # compute the distance
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
    
        # get the closest k
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        # majority voye
        most_common = Counter(k_nearest_labels).most_common()
        return most_common[0][0]
    

In [42]:
#Taking iris dataset for for knn algorithm

iris = datasets.load_iris()
X, y = iris.data, iris.target

#Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=301)

#Lets take K = 5 for suppose
clf = KNN(k=5)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(predictions)

[2, 0, 1, 2, 0, 2, 2, 0, 0, 1, 2, 1, 0, 1, 0, 0, 0, 2, 2, 1, 0, 1, 0, 0, 0, 2, 0, 2, 1, 2]


In [43]:
#Checking accuracy
acc = np.sum(predictions == y_test) / len(y_test)
print(acc)

0.9666666666666667


In [44]:
#We got 96.67% accuracy through this algorithm we built from scratch
#Lets compare the accuracy with the algorithm made with standard libraries

In [45]:
from sklearn.neighbors import KNeighborsClassifier 

In [50]:
# Loading data 
irisData = datasets.load_iris() 
  
# Create feature and target arrays 
X = irisData.data 
y = irisData.target 
  
# Split into training and test set 
X_train, X_test, y_train, y_test = train_test_split( 
             X, y, test_size = 0.2, random_state=42) 
  
knn = KNeighborsClassifier(n_neighbors=7) 
  
knn.fit(X_train, y_train) 
predictions=knn.predict(X_test)
print(knn.predict(X_test)) 


[1 0 2 1 1 0 1 2 2 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]


In [52]:
#Checking accuracy
print(knn.score(X_test, y_test)) 

from sklearn.metrics import classification_report,confusion_matrix
print(classification_report(predictions,y_test))
print(confusion_matrix(predictions,y_test))

0.9666666666666667
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.89      1.00      0.94         8
           2       1.00      0.92      0.96        12

    accuracy                           0.97        30
   macro avg       0.96      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30

[[10  0  0]
 [ 0  8  0]
 [ 0  1 11]]


In [None]:
# Comparision : Scratch-built KNN vs Library-based KNN:

# 1. Accuracy:
#  Both models have similar accuracy (~96.67%), but the library may be a bit better due to optimizations.

# 2. Training Time:
#  Scratch-built is slower, while the library model is faster thanks to optimizations like KD-trees.

# 3. Code Complexity:
#  Scratch version is more complex, as it requires manual implementation.
#  Library version is much simpler and easier to write.

# 4. Scalability:
#  Scratch-built struggles with large datasets, while the library model scales much better.
