In [1]:
import numpy as np

from pandas import *
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


np.random.seed(42)
X = DataFrame(np.random.rand(10, 2))
y = Series(np.random.randint(0, 2, size=10))


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

class MyKNNClf():

  def __init__(self,k=3,metric = "euclidean", weight = "uniform"):
    self.k=k
    self.train_size=None
    self.metric = metric
    self.weight = weight

  def __str__(self):
    return "MyKNNClf class: "+ ", ".join(('{}={}'.format(item, self.__dict__[item]) for item in self.__dict__))

  def __repr__(self):
    return "MyKNNClf class: "+ ", ".join(('{}={}'.format(item, self.__dict__[item]) for item in self.__dict__))

  def fit(self,X,y):
    X.reset_index(drop=True,inplace=True)
    y.reset_index(drop=True,inplace=True)
    self.X=X
    self.y=y
    self.train_size = X.shape


  def predict(self,X):
     y_pred = []
     D = np.zeros((X.shape[0], self.train_size[0]))
     for i in range(X.shape[0]):
       for j in range(self.train_size[0]):
         if self.metric == "euclidean":
          D[i, j] = np.sqrt(np.sum((X.iloc[i] - self.X.iloc[j])**2))
         elif self.metric == "chebyshev":
          D[i,j] = max(abs(X.iloc[i]-self.X.iloc[j]))
         elif self.metric == "manhattan":
          D[i,j] = np.sum(abs(X.iloc[i]-self.X.iloc[j]))
         elif self.metric == "cosine":
          D[i,j] = 1 - np.dot(X.iloc[i], self.X.iloc[j]) / (np.linalg.norm(X.iloc[i]) * np.linalg.norm(self.X.iloc[j]))
         else:
          print("No such metric was found")

       k_nearest_indices = D[i].argsort()[:self.k]
       k_nearest_labels = self.y[k_nearest_indices]
       if self.weight == "uniform":
         class_counts = k_nearest_labels.value_counts()
         if len(class_counts) == 2 and class_counts[0] == class_counts[1]:
            y_pred.append(1)
         else:
            y_pred.append(class_counts.idxmax())
       elif self.weight == "rank":
         weights = np.array([1/r for r in range(1, self.k+1)])
         class_weights = {}
         for label in k_nearest_labels.unique():
           class_weights[label] = np.sum(weights[k_nearest_labels == label])
         y_pred.append(max(class_weights, key=class_weights.get))
       elif self.weight == "distance":
         weights = 1 / (D[i][k_nearest_indices])
         class_weights = {}
         for label in k_nearest_labels.unique():
           class_weights[label] = np.sum(weights[k_nearest_labels == label])
         y_pred.append(max(class_weights, key=class_weights.get))

     return Series(y_pred)



  def predict_proba(self,X):
     probabilities = []
     D = np.zeros((X.shape[0], self.train_size[0]))
     for i in range(X.shape[0]):
       for j in range(self.train_size[0]):
           if self.metric == "euclidean":
            D[i, j] = np.sqrt(np.sum((X.iloc[i] - self.X.iloc[j])**2))
           elif self.metric == "chebyshev":
            D[i,j] = max(abs(X.iloc[i]-self.X.iloc[j]))
           elif self.metric == "manhattan":
            D[i,j] = np.sum(abs(X.iloc[i]-self.X.iloc[j]))
           elif self.metric == "cosine":
            D[i,j] =  1 - np.dot(X.iloc[i], self.X.iloc[j]) / (np.linalg.norm(X.iloc[i]) * np.linalg.norm(self.X.iloc[j]))
           else:
            print("No such metric was found")

       k_nearest_indices = D[i].argsort()[:self.k]
       k_nearest_labels = self.y[k_nearest_indices]
       if self.weight == "uniform":
         probability_class1 = (k_nearest_labels == 1).sum() / self.k
         probabilities.append(probability_class1)
       elif self.weight == "rank":
         weights = np.array([1/r for r in range(1, self.k+1)])
         probability_class1 = np.sum(weights[k_nearest_labels == 1]) / np.sum(weights)
         probabilities.append(probability_class1)
       elif self.weight == "distance":
         weights = 1 / (D[i][k_nearest_indices])
         probability_class1 = np.sum(weights[k_nearest_labels == 1]) / np.sum(weights)
         probabilities.append(probability_class1)

     return Series(probabilities)



a = MyKNNClf(4, "cosine")

a.fit(X_train,y_train)

a.predict(X_test)







Unnamed: 0,0
0,1
1,1
