In [3]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris

class KNNDistanceCalculator:
    def __init__(self, k, distance_metric="euclidean"):
        self.k = k
        self.distance_metric = distance_metric

    def fit(self, train_data, train_labels):
        self.train_data = train_data
        self.train_labels = train_labels

    def euclidean_distance(self, row1, row2):
        return np.sqrt(np.sum((row1 - row2) ** 2))

    def manhattan_distance(self, row1, row2):
        return np.sum(np.abs(row1 - row2))

    #method to calculate the distance based on the metric chosen
    def calculate_distance(self, row1, row2):
        if self.distance_metric == "euclidean":
            return self.euclidean_distance(row1, row2)
        elif self.distance_metric == "manhattan":
            return self.manhattan_distance(row1, row2)
        else:
            raise ValueError(f"Unknown distance metric: {self.distance_metric}")

    def calculate_distances(self, test_point):
        distances = []
        for i in range(len(self.train_data)):
            distance = self.calculate_distance(self.train_data.iloc[i].values, test_point.iloc[0].values)
            distances.append(distance)
        distances_df = pd.DataFrame({
            'train_index': self.train_data.index,
            'distance': distances,
            'species': self.train_labels
        })
        return distances_df.sort_values(by='distance')

    def predict_class(self, test_points):
        predictions = []
        for index, test_point in test_points.iterrows():
            sorted_distances = self.calculate_distances(pd.DataFrame(test_point).T)
            nearest_neighbors = sorted_distances.head(self.k)
            predicted_class = nearest_neighbors['species'].mode()[0]
            predictions.append(predicted_class)
        return predictions


iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['species'] = iris.target
model = KNNDistanceCalculator(k=5, distance_metric="euclidean")
model.fit(df.drop('species', axis=1), df['species'])
test_points = pd.DataFrame({
    'sepal_length': [5.0, 6.0],
    'sepal_width': [3.4, 2.7],
    'petal_length': [1.5, 4.5],
    'petal_width': [0.2, 1.5]
    })
model.predict_class(test_points)




[0, 1]