In [66]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [72]:
# Load Dataset
data = load_iris()
x = data['data']
y = data['target']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, train_size=0.8)

In [259]:
class KNNClassifier():
    def __init__(self, k):
        self.k = k

    def fit(self, x, y):
        assert x.shape[0] == y.shape[0], 'Number of data points in x is not the same as that in y'
        self.data = x
        self.labels = y

    def MinkowskiDistance(self, x, z, p):
        return np.power(np.sum(np.power(np.abs(np.tile(x,(z.shape[0])).reshape(x.shape[0],z.shape[0],z.shape[-1])-z),p), axis=-1),1/p)

    def predict(self, x):
        if len(x.shape)==2:
            distances=self.MinkowskiDistance(x, self.data, 2)
            indices = np.argsort(np.array(distances))
            labels = self.labels[indices[:,:10]]
            res = []
            for li in labels:
                unique, counts = np.unique(li, return_counts=True)
                res.append(unique[np.argmax(counts)])
        return np.array(res)


In [260]:
model  = KNNClassifier(k=10)
model.fit(x_train, y_train)

In [265]:
out = model.predict(x_test)

In [266]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, out))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        12

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [267]:
# Using sklearn example
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X=x_train, y=y_train)
out = knn.predict(x_test)
print(classification_report(y_test, out))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        12

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [268]:
from sklearn.datasets import fetch_20newsgroups
newsgroups_train = fetch_20newsgroups(subset='train')