In [560]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
import random
from sklearn.model_selection import train_test_split

In [297]:
X, y = make_classification(n_samples=50, n_features=5, n_informative=2, random_state=42)
X = pd.DataFrame(X)
y = pd.Series(y)
X.columns = [f'col_{col}' for col in X.columns]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [530]:
def euclidean(X_train, X_test):
    return np.sqrt(sum((X_test - X_train)**2))    

In [531]:
def manhattan(X_train, X_test):
    return sum(abs(X_test-X_train))

In [541]:
def chebyshev(X_train, X_test):
    return max(abs(X_train - X_test))

In [551]:
def cosine(X_train, X_test):
    sqrt_train = np.sqrt(sum(X_train**2))
    sqrt_test = np.sqrt(sum(X_test**2)) 
                        
    return 1 - sum(X_train * X_test)/(sqrt_train*sqrt_test)

In [556]:
class MyKNNClf:
    def __init__(self, k=3, metric='euclidean', weight='uniform'):
        self.k = k
        self.train_size = None
        self.metric = globals()[metric]
        self.weight = weight
        
    def __str__(self):
        return f"MyKNNClf class: k={self.k}"    
    
    def fit(self, X, y):
        self.X = X
        self.y = y
        self.train_size = X.shape
                   
    def predict(self, X):
        n_samples = X.shape[0]
        y_pred = np.ones(n_samples)
        for i in range(n_samples):
            distances = np.zeros(self.train_size[0])
            for j in range(self.train_size[0]):
                distances[j] = self.metric(X.iloc[i], self.X.iloc[j])
            idx = np.argsort(distances)[:self.k]
            
            y = self.y.iloc[idx]
            mode = self.y.iloc[idx].mode().values
            if len(mode) == 1:
                y_pred[i] = mode
                
                
        return y_pred.astype(int)
    
    def predict_proba(self, X):
        n_samples = X.shape[0]
        y_pred = np.ones(n_samples)
        for i in range(n_samples):
            distances = np.zeros(self.train_size[0])
            for j in range(self.train_size[0]):
                distance = self.metric(X.iloc[i], self.X.iloc[j])
                distances[j] = distance
            idx = np.argsort(distances)[:self.k]
            y_pred[i] = sum(self.y.iloc[idx].values)/self.k
        return y_pred

In [557]:
knn = MyKNNClf(3)

In [558]:
knn.fit(X_train,y_train)

In [559]:
knn.predict(X_test)

array([0, 0, 0, 1, 0, 1, 1, 1, 1, 1])

In [555]:
knn.predict_proba(X_test)

array([0.        , 0.        , 0.        , 1.        , 0.        ,
       1.        , 1.        , 1.        , 1.        , 0.33333333])