In [49]:
import numpy as np
from collections import Counter

In [50]:
from sklearn import datasets
iris = datasets.load_iris()

In [51]:
X = iris["data"][:, :]
y = iris["target"]

print(X[:5])
print(y[:5])

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]
[0 0 0 0 0]


In [52]:
X.shape[1], y.shape

(4, (150,))

In [53]:
np.concatenate((X.reshape(len(X), 4), y.reshape(len(y), 1)), 1)

array([[5.1, 3.5, 1.4, 0.2, 0. ],
       [4.9, 3. , 1.4, 0.2, 0. ],
       [4.7, 3.2, 1.3, 0.2, 0. ],
       [4.6, 3.1, 1.5, 0.2, 0. ],
       [5. , 3.6, 1.4, 0.2, 0. ],
       [5.4, 3.9, 1.7, 0.4, 0. ],
       [4.6, 3.4, 1.4, 0.3, 0. ],
       [5. , 3.4, 1.5, 0.2, 0. ],
       [4.4, 2.9, 1.4, 0.2, 0. ],
       [4.9, 3.1, 1.5, 0.1, 0. ],
       [5.4, 3.7, 1.5, 0.2, 0. ],
       [4.8, 3.4, 1.6, 0.2, 0. ],
       [4.8, 3. , 1.4, 0.1, 0. ],
       [4.3, 3. , 1.1, 0.1, 0. ],
       [5.8, 4. , 1.2, 0.2, 0. ],
       [5.7, 4.4, 1.5, 0.4, 0. ],
       [5.4, 3.9, 1.3, 0.4, 0. ],
       [5.1, 3.5, 1.4, 0.3, 0. ],
       [5.7, 3.8, 1.7, 0.3, 0. ],
       [5.1, 3.8, 1.5, 0.3, 0. ],
       [5.4, 3.4, 1.7, 0.2, 0. ],
       [5.1, 3.7, 1.5, 0.4, 0. ],
       [4.6, 3.6, 1. , 0.2, 0. ],
       [5.1, 3.3, 1.7, 0.5, 0. ],
       [4.8, 3.4, 1.9, 0.2, 0. ],
       [5. , 3. , 1.6, 0.2, 0. ],
       [5. , 3.4, 1.6, 0.4, 0. ],
       [5.2, 3.5, 1.5, 0.2, 0. ],
       [5.2, 3.4, 1.4, 0.2, 0. ],
       [4.7, 3

In [54]:
def euclidean_distance(v, w):
  return np.sqrt(
                  sum(
                      pow(v_i-w_i,2) for v_i, w_i in zip(v,w)
                      )
                  )
  

def manhattan_distance(v, w):
  return sum(
            abs(v_i - w_i) for v_i, w_i in zip(v,w)
            )

In [55]:
def max_neigbor(li):
  neighbor = Counter(li)
  label, counts = neighbor.most_common(1)[0]
  total_labels = len([n_n
                      for n_n in neighbor.values()
                      if n_n == counts]
                    )
  if total_labels == 1:
    return label
  else:
    return max_neigbor(li[-1])

In [56]:
class knn:
  def __init__(self, k_val=1, distance_metric=None):
    self.k_val = k_val
    self.distance_metric = distance_metric
    self.combined = None
  
  def fit(self, X, y):
    P = np.concatenate((X.reshape(len(X), X.shape[1]), y.reshape(len(y), 1)), 1)
    self.combined = P

  def predict(self, X, y):
    pred = []
    for i in X:
      if (self.distance_metric == 'euclidean'):
        sort_val = sorted(self.combined, key=lambda point: euclidean_distance(point[:-1], i))
      
      else:
        sort_val = sorted(self.combined, key=lambda point: manhattan_distance(point[:-1], i))

      k_nearest_neighbors = [point[-1] for point in sort_val[:self.k_val]]

      pred.append(max_neigbor(k_nearest_neighbors))

    return pred

In [57]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [58]:
knn_classifier = knn(k_val=5,distance_metric='manhattan')
knn_classifier.fit(X=X_train, y=y_train)

In [59]:
pred = np.array(knn_classifier.predict(X_test, y_test))
pred

array([1., 0., 2., 1., 1., 0., 1., 2., 1., 1., 2., 0., 0., 0., 0., 1., 2.,
       1., 1., 2., 0., 2., 0., 2., 2., 2., 2., 2., 0., 0.])

In [60]:
accuracy = np.mean(pred==y_test)
print(accuracy)

1.0
