In [None]:
# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np

# Load the iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target labels

# Define your custom KNN function 
def my_knn(X_train, y_train, X_test, n_neighbors=3, weights='uniform'):
    from collections import Counter

    def euclidean_distance(x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))

    def get_neighbors(X_train, y_train, test_sample, k):
        distances = []
        for i in range(len(X_train)):
            dist = euclidean_distance(test_sample, X_train[i])
            distances.append((y_train[i], dist))
        distances.sort(key=lambda x: x[1])
        return distances[:k]

    y_pred = []
    for test_sample in X_test:
        neighbors = get_neighbors(X_train, y_train, test_sample, n_neighbors)
        if weights == 'uniform':
            # Majority voting (uniform weights)
            classes = [neighbor[0] for neighbor in neighbors]
            vote = Counter(classes).most_common(1)[0][0]
        else:
            # Weighted voting (inverse distance weights)
            class_votes = {}
            for neighbor in neighbors:
                label, distance = neighbor
                weight = 1 / (distance + 1e-5)  # to avoid division by zero
                if label in class_votes:
                    class_votes[label] += weight
                else:
                    class_votes[label] = weight
            vote = max(class_votes, key=class_votes.get)
        y_pred.append(vote)
    
    return np.array(y_pred)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.31, random_state=42)

# Train and evaluate custom KNN
y_pred_custom = my_knn(X_train, y_train, X_test, n_neighbors=5)
custom_knn_acc = np.mean(y_pred_custom == y_test)

# Train and evaluate sklearn KNN for comparison
from sklearn.neighbors import KNeighborsClassifier
sklearn_knn = KNeighborsClassifier(n_neighbors=5)
sklearn_knn.fit(X_train, y_train)
y_pred_sklearn = sklearn_knn.predict(X_test)
sklearn_knn_acc = np.mean(y_pred_sklearn == y_test)

print(f"Custom KNN Accuracy: {custom_knn_acc:.4f}")
print(f"Sklearn KNN Accuracy: {sklearn_knn_acc:.4f}")


# Performance

Both models perform similarly in terms of accuracy, but this is dependent on the dataset, parameters (like number of neighbors), and test data.For small datasets like Iris, the performance difference in terms of accuracy is negligible.

# Complexity

Time Complexity:

The KNN algorithm has a time complexity of O(n⋅d) per query, where:n is the number of training samples,d is the number of features.This holds for both the custom and sklearn versions since KNN requires calculating the distance from every test point to every training point (brute-force approach).

The custom version may be slower due to lack of optimizations.

Sklearn's KNN implementation will generally outperform a custom brute-force implementation, especially for large datasets.

Memory Complexity:

Custom KNN:
Stores all training data in memory, as KNN is a lazy learner.Depending on the dataset size, memory usage may become inefficient, especially for large datasets.
Sklearn KNN:
Similar to custom KNN, but optimized storage and possibly lower memory overhead due to efficient internal structures.

# Conclusion
Custom KNN is a good learning exercise and works fine for small datasets. However, it lacks the optimizations required for large-scale problems, which makes it inefficient in terms of time and memory for larger datasets.
Sklearn KNN is the better choice for real-world applications due to its optimizations, flexibility, and ease of use. It is faster and more memory-efficient, particularly for large datasets.