In [1]:
import numpy as np
import pandas as pd

import random

from sklearn.datasets import make_classification

In [2]:
X, y = make_classification(n_samples=50, n_features=5, n_informative=2, random_state=42)
X = pd.DataFrame(X)
y = pd.Series(y)
X.columns = [f'col_{col}' for col in X.columns]

In [3]:
display(X.head())
print(X.shape)

Unnamed: 0,col_0,col_1,col_2,col_3,col_4
0,-1.169215,1.144863,-0.974682,1.583077,-0.694705
1,-0.848883,1.100919,-0.820682,1.215244,-0.532098
2,-0.48847,0.782047,-1.191303,0.735572,-0.321453
3,-0.799602,-0.339412,-0.919424,0.808465,-0.359732
4,0.540444,-0.783316,-0.034712,-0.793821,0.347233


(50, 5)


In [4]:
display(y.head())
print(y.shape)

0    1
1    1
2    1
3    1
4    0
dtype: int32

(50,)


In [5]:
class MyKNNClf():

    def __init__(self, k = 3, metric = 'euclidean'):
        self.k = k
        self.metric = metric
        self.X, self.y = None, None
        self.train_size = None

    
    def __repr__(self):
        return f'MyKNNClf class: k={self.k}'
    

    def fit(self, X: pd.DataFrame, y: pd.Series):
        self.X, self.y = X.copy().reset_index(drop=True), y.copy().reset_index(drop=True)
        self.train_size = self.X.shape


    def predict_proba(self, X: pd.DataFrame):
        X = X.copy().reset_index(drop=True)
        res = []
        for i in range(X.shape[0]):
            var = pd.Series(np.sum((self.X - X.values[i])**2, axis=1)**(1/2)).sort_values().index[:self.k]
            res.append(np.mean([self.y[i] for i in var]))
        return pd.Series(res)
    
    
    def predict(self, X):
        return self.predict_proba(X).apply(lambda x: 1 if x >= 0.5 else 0)

In [6]:
knn = MyKNNClf(k = 3, metric= 'cosine')
print(knn)

MyKNNClf class: k=3


In [7]:
knn.fit(X, y)

In [8]:
knn.predict_proba(X).head()

0    1.000000
1    1.000000
2    1.000000
3    0.666667
4    0.000000
dtype: float64