In [25]:
import numpy as np
import matplotlib.pyplot as plt
import scipy
import sklearn
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.base import ClassifierMixin

from sklearn.neighbors.base import NeighborsBase, KNeighborsMixin, SupervisedIntegerMixin 
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor

from sklearn.neighbors import KDTree

from sklearn.metrics import r2_score, f1_score
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold

In [26]:
class MyKNeighborsClassifier(NeighborsBase, KNeighborsMixin, SupervisedIntegerMixin, ClassifierMixin):
    def __init__(self, n_neighbors, metric="euclidean"):
        self.n_neighbors = n_neighbors
        self.metric = metric
        
    
    def fit(self, X, y):
        self.X_train = np.array(X)
        self.y_train = np.array(y)
        self.unique = np.unique(y).shape[0]
        
    
    def predict(self, X):
        X = np.array(X)
        dist = metrics.pairwise_distances(X, self.X_train, metric=self.metric)
        n_minimal_neighbors = self.y_train[np.argpartition(dist, self.n_neighbors)[:,0:self.n_neighbors]]

        y_res = []
        for i in range(X.shape[0]):
            neigh_and_count_in_str = np.unique(n_minimal_neighbors[i],return_counts=True)
            y_res.append(neigh_and_count_in_str[0][neigh_and_count_in_str[1].argmax()])


        y_res = np.array(y_res)
        return y_res
    
    def predict_proba(self, X):
        dist = metrics.pairwise_distances(X, self.X_train)
        n_minimal_neighbors = self.y_train[np.argpartition(dist, self.n_neighbors)[:,0:self.n_neighbors]]
        prob_res = np.array([np.bincount(n_minimal_neighbors[i], 
                                         minlength=self.unique) for i in range(X.shape[0])])
        return prob_res / self.n_neighbors
        
    
    def score(self, X, y):
        X = np.array(X)
        y = np.array(y)
        return (np.isclose(self.predict(X), y)).sum() / y.shape[0]

In [27]:
class MyKNeighborsRegressor(NeighborsBase, KNeighborsMixin, SupervisedIntegerMixin, ClassifierMixin):
    def __init__(self, n_neighbors, metric="euclidean"):
        self.n_neighbors = n_neighbors
        self.metric = metric
        
    
    def fit(self, X, y):
        self.X_train = np.array(X)
        self.y_train = np.array(y)
        self.unique = np.unique(y).shape[0]
        
    
    def predict(self, X):
        X = np.array(X)
        dist = metrics.pairwise_distances(X, self.X_train, metric=self.metric)
        n_minimal_neighbors = self.y_train[np.argpartition(dist, self.n_neighbors)[:,0:self.n_neighbors]]
        
        y_res = []
        for i in range(X.shape[0]):
            neigh_in_str = n_minimal_neighbors[i]
            y_res.append(neigh_in_str.mean())
        y_res = np.array(y_res)
        return y_res
        
        
    
    def predict_proba(self, X):
        dist = metrics.pairwise_distances(X, self.X_train)
        n_minimal_neighbors = self.y_train[np.argpartition(dist, self.n_neighbors)[:,0:self.n_neighbors]]
        prob_res = np.array([np.bincount(n_minimal_neighbors[i], 
                                         minlength=self.unique) for i in range(X.shape[0])])
        return prob_res / self.n_neighbors
        
    
    def score(self, X, y):
        X = np.array(X)
        y = np.array(y)
        return (np.isclose(self.predict(X), y)).sum() / y.shape[0]

# KNN-классификация, датасет ирисов

In [28]:
data = datasets.load_iris()

In [29]:
X = data["data"]
X = StandardScaler().fit_transform(X)
y = data["target"]

In [32]:
X.shape

(150, 4)

In [33]:
%%time
y_ans = []
for train_index, test_index in KFold(n_splits=4).split(X):
    mdl = MyKNeighborsClassifier(3)
    X_train = X[train_index,:]
    y_train = y[train_index]

    X_test = X[test_index,:]
    y_test = y[test_index]
    
    mdl.fit(X_train, y_train)
    print(mdl.score(X_test, y_test))
    y_ans += list(mdl.predict(X_test))
    

1.0
0.9210526315789473
0.918918918918919
0.7567567567567568
Wall time: 28.4 ms


In [34]:
%%time
y_ans = []
for train_index, test_index in KFold(n_splits=4).split(X):
    mdl = KNeighborsClassifier(3)
    X_train = X[train_index,:]
    y_train = y[train_index]

    X_test = X[test_index,:]
    y_test = y[test_index]
    
    mdl.fit(X_train, y_train)
    print(mdl.score(X_test, y_test))
    y_ans += list(mdl.predict(X_test))
    

1.0
0.9210526315789473
0.918918918918919
0.7567567567567568
Wall time: 50.7 ms


### Видим одинаковые точности. Скорость работы также одинаковая

# KNN-регрессия, датасет Бостон


In [36]:
data = datasets.load_boston()
X = data["data"]
X = StandardScaler().fit_transform(X)
y = data["target"]

In [37]:
%%time
y_ans = []
for train_index, test_index in KFold(n_splits=10).split(X):
    mdl = MyKNeighborsRegressor(10)
    X_train = X[train_index,:]
    y_train = y[train_index]

    X_test = X[test_index,:]
    y_test = y[test_index]
    
    mdl.fit(X_train, y_train)
    y_ans += list(mdl.predict(X_test))
    

Wall time: 55.9 ms


In [38]:
r2_score(y, y_ans)

0.6544193040015016

In [39]:
%%time
y_ans = []
for train_index, test_index in KFold(n_splits=10).split(X):
    mdl = KNeighborsRegressor(10, algorithm="brute", metric="euclidean")
    X_train = X[train_index,:]
    y_train = y[train_index]

    X_test = X[test_index,:]
    y_test = y[test_index]
    
    mdl.fit(X_train, y_train)
    y_ans += list(mdl.predict(X_test))
    

Wall time: 29.6 ms


In [40]:
r2_score(y, y_ans)

0.6544193040015016

### Видим одинаковые точности. Скорость работы также одинаковая