In [1]:
import  numpy as np
import pandas as pd

In [2]:
class MyKNNClf():
    
    def __init__(self, k=3, metric = 'euclidean', weight = 'uniform'): #class initialization
        self.k = k
        self.train_size = None
        self.X_train = None
        self.y_train = None
        self.metric = metric
        self.weight = weight
        
    def __repr__(self):
        return f'MyKNNClf class: k={self.k}'
    
    def fit(self, X, y):
        self.X_train = X.copy()
        self.y_train = y.copy()
        self.train_size = X.shape
        
    def euclidian(self, Xtest, Xtrain):
        return np.sqrt(np.sum((Xtest[:,np.newaxis]-Xtrain)**2, axis=2))
    
    def chebyshev(self, Xtest, Xtrain):
        return np.max(np.abs(Xtest[:,np.newaxis]-Xtrain), axis=2)
    
    def manhattan(self, Xtest, Xtrain):
        return np.sum(np.abs(Xtest[:,np.newaxis]-Xtrain), axis=2)
    
    def cosine(self, Xtest, Xtrain):
        Xtest_norm = np.linalg.norm(Xtest, axis=1)
        Xtrain_norm = np.linalg.norm(Xtrain, axis=1)
        return 1 - np.dot(Xtest, Xtrain.T)/(Xtest_norm[:,np.newaxis]*Xtrain_norm)
    
    def calculate_metric(self,X):
        if self.metric == 'chebyshev':
            return self.chebyshev(X.values, self.X_train.values)
        if self.metric == 'manhattan':
            return self.manhattan(X.values, self.X_train.values)
        if self.metric == 'cosine':
            return self.cosine(X.values, self.X_train.values)
        if self.metric == 'euclidean':
            return self.euclidian(X.values, self.X_train.values)
        
    def get_rank(self,X):
        D = self.calculate_metric(X)
        min_k = self.y_train.values[np.argsort(D,axis=1)][:,:self.k]
        cl = np.array(list(set(self.y_train)))
        N_cl =  len(cl)
        sum_ = np.sum(1/(np.arange(self.k)+1)) 
        Q = np.zeros([min_k.shape[0],N_cl])
        for j in range(N_cl):
            for i in range(min_k.shape[0]):
                Q[i,j] = np.sum(1/(np.array(np.where(min_k[i,:] == j))+1))/sum_
        return Q
    
    def get_dist(self,X):
        D = self.calculate_metric(X)
        min_k = self.y_train.values[np.argsort(D,axis=1)][:,:self.k]
        D_min_k = (np.sort(D,axis=1))[:,:self.k]     
        cl = np.array(list(set(self.y_train)))
        N_cl =  len(cl)
        sum_ = np.sum(1/D_min_k, axis=1)
        
        Q = np.zeros([min_k.shape[0],N_cl])
        for i in range(min_k.shape[0]):
            for j in range(N_cl):            
                Q[i,j] = np.sum(1/D_min_k[i,:].take(np.where(min_k[i,:] == j)))/np.sum((1/D_min_k)[i,:])
            #print(f'{Q[i,:]} and sum:{(1/D_min_k)[i,:]} and{min_k[i,:]}:{np.sum((1/D_min_k)[i,:])} ')            
        
        return Q
        
        
        
    def predict(self,X):
        if self.weight == 'uniform':
            P = self.predict_proba(X)
            return (P >= 0.5)*1
        
        elif self.weight == 'rank':
            Q = self.get_rank(X)
            return np.argmax(Q, axis = 1)
        
        elif self.weight == 'distance':
            Q = self.get_dist(X)
            return np.argmax(Q, axis = 1)
            
    
    def predict_proba(self,X): 
       
        
        if self.weight == 'uniform':
            D = self.calculate_metric(X)
            min_k = self.y_train.values[np.argsort(D,axis=1)][:,:self.k]
            return np.sum(min_k, axis=1)/self.k
        
        elif self.weight == 'rank':
            Q = self.get_rank(X)
            return Q[:,1]
        
        elif self.weight == 'distance':
            Q = self.get_dist(X)
            return Q[:,1]