## KNN Classifier algorithm from scratch

#### seperating data into feature matrix and label vector

In [None]:
import numpy as np

# Original dataset
data = [
    [150, 7.0, 1, 'Apple'],
    [120, 6.5, 0, 'Banana'],
    [180, 7.5, 2, 'Orange'],
    [155, 7.2, 1, 'Apple'],
    [110, 6.0, 0, 'Banana'],
    [190, 7.8, 2, 'Orange'],
    [145, 7.1, 1, 'Apple'],
    [115, 6.3, 0, 'Banana']
]
#converting to numpy array
data_np = np.array(data)
X = data_np[:, :3].astype(float)
y = data_np[:, 3]

#encoding the 
categories, y_numeric = np.unique(y, return_inverse=True)
label_map = {name: i for i, name in enumerate(categories)}
reverse_label_map = {i: name for name, i in label_map.items()}
print("Label encoding:", label_map)

Label encoding: {np.str_('Apple'): 0, np.str_('Banana'): 1, np.str_('Orange'): 2}


##### Wrirting eculidean distance function and KNN Classifier class

In [None]:
def Euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

# KNN Classifier
class KNN_Classifier:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X = X
        self.y = y

    def predict_one(self, x):
        distances = []
        for i in range(len(self.X)):
            dis = Euclidean_distance(self.X[i], x)
            distances.append((dis, self.y[i]))
        distances.sort(key=lambda x: x[0])
        k_nearest = distances[:self.k]
        
        # Count votes dynamically
        class_votes = {}
        for _, label in k_nearest:
            class_votes[label] = class_votes.get(label, 0) + 1
        
        return max(class_votes, key=class_votes.get)

    def predict(self, X_test):
        return [self.predict_one(x) for x in X_test]


#### Checking KNN Algorithm for some test data

In [6]:
# Data for testing
test_data = np.array([
    [118, 6.2, 0],  # Expected: Banana
    [160, 7.3, 1],  # Expected: Apple
    [185, 7.7, 2]   # Expected: Orange
])

# Run KNN classifier
knn = KNN_Classifier(k=3)
knn.fit(X, y_numeric)
predictions = knn.predict(test_data)

#Decoding predictions
predicted_labels = [reverse_label_map[i] for i in predictions]
print("Predictions:", predicted_labels)



Predictions: [np.str_('Banana'), np.str_('Apple'), np.str_('Orange')]
