In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import datasets
from collections import Counter

In [16]:
# Function to calculate the Euclidean distance between two points
def euclidean_distance(x1, x2):
    # Square root of the sum of squared differences between corresponding elements
    return np.sqrt(np.sum(np.power(x1-x2, 2)))

# K-Nearest Neighbors (KNN) Classifier
class KNN:
    def __init__(self, k=3) -> None:
        # Initialize the number of neighbors (default is 3)
        self.k = k
    
    # Method to fit the model with training data
    def fit(self, X, y):
        # Store the training data
        self.X_train = X
        self.y_train = y

    # Method to predict the labels of new data points
    def predict(self, X):
        # For each data point in X, predict its label
        preds = [self._predict(x) for x in X]
        return preds
    
    # Method to predict the label of a single data point
    def _predict(self, x):
        # Calculate distances between the input data point and all training data points
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]

        # Get the indices of the k nearest neighbors
        k_indices = np.argsort(distances)[:self.k]

        # Get the labels of the k nearest neighbors
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        # Count the occurrences of each label among the k nearest neighbors
        most_common = Counter(k_nearest_labels).most_common()

        # Return the label with the highest frequency among the k nearest neighbors
        return most_common[0][0]


In [17]:
# Load the Iris dataset
iris = datasets.load_iris()
X, y = iris.data, iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Initialize a KNN classifier with k=20
knn = KNN(20)

# Train the KNN classifier on the training data
knn.fit(X_train, y_train)

# Predict the labels for the test data
preds = knn.predict(X_test)

# Calculate the accuracy of the predictions
acc = np.sum(np.equal(preds, y_test)) / len(y_test)

# Print the accuracy
acc

0.9736842105263158