In [12]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [13]:
# Load the Wine dataset
data = load_wine()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [14]:
# Custom k-NN implementation
class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return np.array(predictions)

    def _predict(self, x):
        # Compute distances between x and all examples in the training set
        distances = [np.linalg.norm(x - x_train) for x_train in self.X_train]
        # Sort by distance and return indices of the first k neighbors
        k_indices = np.argsort(distances)[:self.k]
        # Extract the labels of the k nearest neighbor training samples
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        # Return the most common class label
        most_common = np.bincount(k_nearest_labels).argmax()
        return most_common

In [15]:
# Sklearn k-NN
knn_sklearn = KNeighborsClassifier(n_neighbors=3)
knn_sklearn.fit(X_train, y_train)

# Predictions
y_pred_sklearn = knn_sklearn.predict(X_test)
y_pred_train_sklearn = knn_sklearn.predict(X_train)

# Print accuracy
print("Sklearn k-NN Test Accuracy:", accuracy_score(y_test, y_pred_sklearn))
print("Sklearn k-NN Train Accuracy:", accuracy_score(y_train, y_pred_train_sklearn))

Sklearn k-NN Test Accuracy: 0.7407407407407407
Sklearn k-NN Train Accuracy: 0.8387096774193549


In [16]:
# Let us now try K = 1 -> Over Fitting
# Custom k-NN
knn_custom = KNN(k=1)
knn_custom.fit(X_train, y_train)

# Predictions
y_pred_custom = knn_custom.predict(X_test)
y_pred_train_custom = knn_custom.predict(X_train)

# Print accuracy
print("Trying k = 1:" + str(accuracy_score(y_train, y_pred_train_custom)) + " which is over fitting")
print("Custom k-NN Test Accuracy:", accuracy_score(y_test, y_pred_custom))
print("Custom k-NN Train Accuracy:", accuracy_score(y_train, y_pred_train_custom))

Trying k = 1:1.0 which is over fitting
Custom k-NN Test Accuracy: 0.7962962962962963
Custom k-NN Train Accuracy: 1.0


In [17]:
# Let us now try K = 100 -> under Fitting
# Custom k-NN
knn_custom = KNN(k=100)
knn_custom.fit(X_train, y_train)

# Predictions
y_pred_custom = knn_custom.predict(X_test)
y_pred_train_custom = knn_custom.predict(X_train)

# Print accuracy
print("Trying k = 100:" + str(accuracy_score(y_train, y_pred_train_custom)) + " which is under fitting")
print("Custom k-NN Test Accuracy:", accuracy_score(y_test, y_pred_custom))
print("Custom k-NN Train Accuracy:", accuracy_score(y_train, y_pred_train_custom))

Trying k = 100:0.6370967741935484 which is under fitting
Custom k-NN Test Accuracy: 0.7037037037037037
Custom k-NN Train Accuracy: 0.6370967741935484


In [26]:
# Let us now try K = 5 -> Good Fit
# Custom k-NN
knn_custom = KNN(k=2)
knn_custom.fit(X_train, y_train)

# Predictions
y_pred_custom = knn_custom.predict(X_test)
y_pred_train_custom = knn_custom.predict(X_train)

# Print accuracy
print("Trying k = 5:" + str(accuracy_score(y_train, y_pred_train_custom)) + " which is Good fit")
print("Custom k-NN Test Accuracy:", accuracy_score(y_test, y_pred_custom))
print("Custom k-NN Train Accuracy:", accuracy_score(y_train, y_pred_train_custom))

Trying k = 5:0.8548387096774194 which is Good fit
Custom k-NN Test Accuracy: 0.7037037037037037
Custom k-NN Train Accuracy: 0.8548387096774194


In [19]:
# Create a comparison table
results = {
    'Model': ['Sklearn k-NN', 'Custom k-NN'],
    'Train Accuracy': [accuracy_score(y_train, y_pred_train_sklearn), accuracy_score(y_train, y_pred_train_custom)],
    'Test Accuracy': [accuracy_score(y_test, y_pred_sklearn), accuracy_score(y_test, y_pred_custom)]
}

comparison_table = pd.DataFrame(results)

# Display the comparison table
print(comparison_table)

          Model  Train Accuracy  Test Accuracy
0  Sklearn k-NN        0.838710       0.740741
1   Custom k-NN        0.774194       0.740741
