Detailed instructions for question 1:
The nearest neighbor classifier predicts for a point X_i the target y_k of
the training sample X_k which is the closest to X_i. We measure proximity with
the Euclidean distance. The model will be evaluated with the accuracy (average
number of samples corectly classified). You need to implement the `fit`,
`predict` and `score` methods for this class. The code you write should pass
the test we implemented. You can run the tests by calling at the root of the
repo `pytest test_sklearn_questions.py`. Note that to be fully valid, a
scikit-learn estimator needs to check that the input given to `fit` and
`predict` are correct using the `check_*` functions imported in the file.
You can find more information on how they should be used in the following doc:
https://scikit-learn.org/stable/developers/develop.html#rolling-your-own-estimator.
Make sure to use them to pass `test_nearest_neighbor_check_estimator`.

In [163]:
"""
Hints
-----
- You can use the function:

from sklearn.metrics.pairwise import pairwise_distances

to compute distances between 2 sets of samples.
"""
import numpy as np
import pandas as pd
from scipy import stats

from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin

from sklearn.model_selection import BaseCrossValidator

from sklearn.utils.validation import check_X_y, check_is_fitted
from sklearn.utils.validation import check_array
from sklearn.utils.multiclass import check_classification_targets
from sklearn.metrics.pairwise import pairwise_distances


class KNearestNeighbors(BaseEstimator, ClassifierMixin):
    """KNearestNeighbors classifier."""

    def __init__(self, n_neighbors=1):  # noqa: D107
        self.n_neighbors = n_neighbors

    def fit(self, X, y):
        """Fitting function.

        Parameters
        ----------
        X : ndarray, shape (n_samples, n_features)
            Data to train the model.
        y : ndarray, shape (n_samples,)
            Labels associated with the training data.

        Returns
        ----------
        self : instance of KNearestNeighbors
            The current instance of the classifier
        """
        
        self.X_train = X
        self.y_train = y
        self.classes = np.unique(y)
        
        return self

    def predict(self, X):
        """Predict function.

        Parameters
        ----------
        X : ndarray, shape (n_test_samples, n_features)
            Data to predict on.

        Returns
        ----------
        y : ndarray, shape (n_test_samples,)
            Predicted class labels for each test data sample.
        """
        
        dist = pairwise_distances(X,self.X_train , metric='euclidean')
        sort_samples = np.argsort(dist,axis=1)
        mask = sort_samples[:,:self.n_neighbors]
        classes = self.y_train[mask]
        
        y_pred = stats.mode(classes, keepdims=False, axis = 1)[0].ravel()
        
        return y_pred

    def score(self, X, y):
        """Calculate the score of the prediction.

        Parameters
        ----------
        X : ndarray, shape (n_samples, n_features)
            Data to score on.
        y : ndarray, shape (n_samples,)
            target values.

        Returns
        ----------
        score : float
            Accuracy of the model computed for the (X, y) pairs.
        """
        return np.mean(self.predict(X) == y)

In [6]:
from sklearn.utils.estimator_checks import check_estimator
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.datasets import make_classification
from sklearn.neighbors import KNeighborsClassifier

In [8]:
X, y = make_classification(n_samples=200, n_features=20,
                               random_state=42)
X_train, X_test, y_train, y_test = \
        train_test_split(X, y, random_state=42)

In [10]:
k = 3

In [11]:
knn = KNeighborsClassifier(n_neighbors=k)
y_pred_sk = knn.fit(X_train, y_train).predict(X_test)

In [15]:
distance = pairwise_distances(X_test, metric='euclidean')

In [16]:
X_test.shape

(50, 20)

In [19]:
X_train.shape

(150, 20)

In [18]:
distance.shape

(50, 50)

In [50]:
a = pairwise_distances(X_test,X_train , metric='euclidean')

In [51]:
a.shape

(50, 150)

In [54]:
np.argmin(a[0])

95

In [83]:
np.argmin(a,axis=1)

array([ 95,  26,  83,  61,  77,  19, 111,  23,  17, 149,  28,   7, 122,
        16, 142,  26, 147,  54, 135,  74,  39,  90,  82,  90,  41, 111,
       132,  46, 129,  76,  96, 116,  61,  45, 112, 139,  63, 116,  92,
        13, 142,  90,  32,   3, 149,  72, 112,   3, 105,  21], dtype=int64)

In [36]:
a[0,37:45]

array([6.7143502 , 5.81182126, 6.42780856, 7.77513284, 6.66672997,
       6.48200465, 6.10086089, 8.03890805])

In [71]:
b = np.argsort(a,axis=1)

In [72]:
b.shape

(50, 150)

In [80]:
b[0]

array([ 95,  50,  48, 114,  78, 142, 134,  45,  63,  85,  91,  17,  88,
        83, 105,  64, 110,  98, 146,  26, 109, 119, 111,  19, 116, 147,
        29, 108,  46,  59, 115,   1,  28, 135,  56,  38,  74,  33, 126,
       131,  73, 112,   8,  82,   2,  47,  84, 143, 140,  72,  16, 113,
       148,  43,  70,  18, 137,   6,  60,  71,  67,  30, 145,  25, 125,
         7,  39,  23,  90, 127, 130,  42,  66,  51,  14,   4,  69, 149,
        92,  58,  31,  97, 141,  41,  37,  10,  79,  34,  96,  20,  11,
        53, 124,  80,  77, 121,  15,  81, 129,  61, 138,  65,  49,  54,
         3, 102,  75,  36,  35,  87, 101, 139,   5, 100, 132, 122, 133,
        27, 117,  13,  12, 118,   9,   0,  76,  32, 144,  52,  55,  89,
        21,  24,  40,  22,  99, 120, 123, 103,  86,  44, 128, 106, 107,
        93, 136,  68,  62, 104,  57,  94], dtype=int64)

In [79]:
a[0,38]

5.811821257781309

In [86]:
np.unique(y)

array([0, 1])

In [87]:
n_neighbors = 3

In [97]:
mask = b[:,:n_neighbors]

In [117]:
elements = y_train[mask]

In [121]:
elements[5]

array([1, 0, 0])

In [122]:
from collections import Counter

In [123]:
Counter(elements[5])

Counter({1: 1, 0: 2})

In [127]:
values, counts = np.unique(elements[5], return_counts=True)

In [133]:
np.unique(elements, axis=1, return_counts=True)[1]

array([1, 1, 1], dtype=int64)

In [136]:
from scipy import stats

In [137]:
elements

array([[0, 0, 0],
       [0, 0, 0],
       [0, 1, 0],
       [1, 1, 1],
       [1, 1, 1],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 1],
       [1, 0, 0],
       [1, 0, 1],
       [0, 1, 0],
       [0, 0, 0],
       [1, 1, 0],
       [1, 1, 1],
       [0, 1, 1],
       [0, 1, 0],
       [1, 1, 1],
       [1, 1, 0],
       [0, 1, 1],
       [0, 0, 0],
       [0, 0, 0],
       [1, 1, 1],
       [1, 1, 0],
       [1, 1, 1],
       [1, 0, 0],
       [1, 1, 0],
       [0, 1, 0],
       [1, 0, 1],
       [1, 0, 1],
       [0, 0, 1],
       [1, 0, 0],
       [0, 1, 1],
       [1, 0, 0],
       [1, 1, 0],
       [1, 1, 0],
       [1, 1, 1],
       [0, 1, 1],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 1],
       [0, 0, 1],
       [1, 0, 1],
       [1, 1, 0],
       [0, 0, 1],
       [1, 0, 1],
       [0, 0, 0],
       [1, 1, 1],
       [0, 0, 0],
       [0, 0, 0],
       [0, 1, 0]])

In [148]:
stats.mode(elements, keepdims=False, axis = 1)[0].ravel().shape

(50,)

In [164]:
k = 5

In [165]:
onn = KNearestNeighbors(k)
y_pred_me = onn.fit(X_train, y_train).predict(X_test)
y_pred_me

array([0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1,
       1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 0, 0, 0])

In [166]:
knn = KNeighborsClassifier(n_neighbors=k)
y_pred_sk = knn.fit(X_train, y_train).predict(X_test)
y_pred_sk

array([0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1,
       1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 0, 0, 0])

In [167]:
y_pred_me == y_pred_sk

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True])