In [5]:
from abc import ABC, abstractmethod

import numpy as np
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [6]:
data = load_digits()
x = data['data']
y = data['target']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, shuffle=False)

In [7]:
class BaseKNN(ABC):
    def __init__(self, k, metric='minkowski', p=2):
        assert metric in ['euclidean', 'minkowski'], 'Wrong metric'
        self.k = k
        self.p = p
        if metric == 'euclidean':
            self.metric = self._euclidean
        elif metric == 'minkowski':
            self.metric = self._minkowski
            
    @staticmethod
    def _euclidean(x, y):
        return np.sqrt(np.sum((x - y)**2, axis=1))
    
    def _minkowski(self, x, y):
        diff = np.abs(x - y)
        return np.sum(diff**self.p, axis=1)**(1/self.p)
    
    @abstractmethod
    def fit(self, x, y):
        pass
    
    @abstractmethod
    def predict(self, x):
        pass
    
class KNNClassifier(BaseKNN):
    def fit(self, x, y):
        self._x = x
        self._y = y.reshape(-1, 1)
        self.unique_classes = np.unique(y)
        return self
    
    def predict(self, x):
        self.result = []
        for sample in x:
            metrics = self.metric(sample, self._x).reshape(-1, 1)
            metrics_with_target = np.hstack((metrics, self._y))
            idxs = np.argsort(metrics_with_target[:, 0])
            metrics_with_target = metrics_with_target[idxs]
            
            k_neib = metrics_with_target[:self.k, 1]
            amount_of_classes = []
            for cls in self.unique_classes:
                n_of_the_class = np.sum(k_neib == cls)
                amount_of_classes.append(n_of_the_class)
                
            self.result.append(np.argmax(amount_of_classes))
        return np.array(self.result)
    
    def __repr__(self):
        return f'KNNClassifier(k={self.k})'

In [8]:
knn = KNNClassifier(k=1)

In [9]:
knn.fit(x_train, y_train)

KNNClassifier(k=1)

In [13]:
(~(knn.predict(x_test) == y_test)).sum() / len(y_test)

0.03777777777777778

In [14]:
def write_answer(answer, n):
    with open(f'knn_vs_rf_{n}.txt', 'w') as f:
        f.write(str(answer))

In [15]:
write_answer(0.03777777777777778, 1)

In [16]:
rf = RandomForestClassifier(n_estimators=1000)
rf.fit(x_train, y_train)

RandomForestClassifier(n_estimators=1000)

In [17]:
(~(rf.predict(x_test) == y_test)).sum() / len(y_test)

0.06888888888888889

In [18]:
write_answer(0.06888888888888889, 2)