In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

df = pd.read_csv("/kaggle/input/glass/glass.csv")

print(df.head())

X = df.iloc[:, 1:-1].values  
y = df.iloc[:, -1].values  

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


        RI     Na    Mg    Al     Si     K    Ca   Ba   Fe  Type
0  1.52101  13.64  4.49  1.10  71.78  0.06  8.75  0.0  0.0     1
1  1.51761  13.89  3.60  1.36  72.73  0.48  7.83  0.0  0.0     1
2  1.51618  13.53  3.55  1.54  72.99  0.39  7.78  0.0  0.0     1
3  1.51766  13.21  3.69  1.29  72.61  0.57  8.22  0.0  0.0     1
4  1.51742  13.27  3.62  1.24  73.08  0.55  8.07  0.0  0.0     1


In [8]:
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))

class CustomKNN:
    def __init__(self, n_neighbors=5, weighted=True):
        self.n_neighbors = n_neighbors
        self.weighted = weighted

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        y_pred = [self._predict(x) for x in X_test]
        return np.array(y_pred)

    def _predict(self, x):
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        k_indices = np.argsort(distances)[:self.n_neighbors]
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        if self.weighted:
            k_distances = [distances[i] for i in k_indices]
            weight_sum = {}
            for label, distance in zip(k_nearest_labels, k_distances):
                weight = 1 / (distance + 1e-5)  
                if label in weight_sum:
                    weight_sum[label] += weight
                else:
                    weight_sum[label] = weight
            most_common = max(weight_sum, key=weight_sum.get)
        else:
            most_common = Counter(k_nearest_labels).most_common(1)[0][0]

        return most_common

X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

knn = CustomKNN(n_neighbors=5)

knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

accuracy = (y_pred == y_test).mean()
print(f'KNN Accuracy: {accuracy * 100:.2f}%')


KNN Accuracy: 70.37%


In [10]:
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from collections import Counter

k_values = [3, 5, 7, 9]
custom_accuracies = {}

for k in k_values:
    knn = CustomKNN(n_neighbors=k, weighted=True)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    custom_accuracies[k] = accuracy
    print(f'Custom KNN Accuracy with k={k}: {accuracy * 100:.2f}%')

sklearn_accuracies = {}

for k in k_values:
    knn_sklearn = KNeighborsClassifier(n_neighbors=k)
    knn_sklearn.fit(X_train, y_train)
    y_pred_sklearn = knn_sklearn.predict(X_test)
    accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)
    sklearn_accuracies[k] = accuracy_sklearn
    print(f'sklearn KNN Accuracy with k={k}: {accuracy_sklearn * 100:.2f}%')

best_custom_k = max(custom_accuracies, key=custom_accuracies.get)
best_sklearn_k = max(sklearn_accuracies, key=sklearn_accuracies.get)

print(f'Best Custom KNN Accuracy: {custom_accuracies[best_custom_k] * 100:.2f}% with k={best_custom_k}')
print(f'Best sklearn KNN Accuracy: {sklearn_accuracies[best_sklearn_k] * 100:.2f}% with k={best_sklearn_k}')


Custom KNN Accuracy with k=3: 70.37%
Custom KNN Accuracy with k=5: 70.37%
Custom KNN Accuracy with k=7: 68.52%
Custom KNN Accuracy with k=9: 68.52%
sklearn KNN Accuracy with k=3: 70.37%
sklearn KNN Accuracy with k=5: 62.96%
sklearn KNN Accuracy with k=7: 59.26%
sklearn KNN Accuracy with k=9: 59.26%
Best Custom KNN Accuracy: 70.37% with k=3
Best sklearn KNN Accuracy: 70.37% with k=3
