In [1]:
import numpy as np
import pandas as pd
import statistics

In [15]:
class KNN_Classifier:
  def __init__(self, distance_metric):
    self.distance_metric = distance_metric

  def get_distance_metric(self, x1, x2):
    dist = np.inf
    if self.distance_metric == 'euclidean':
      dist = np.sqrt(np.sum(np.square(x1-x2)))
    elif self.distance_metric == 'manhatten':
      dist = np.sum(np.abs(x1-x2))
    return dist

  def nearest_neighbors(self, X, test_data, k):
    distances = []
    for x in X:
      distance = self.get_distance_metric(x, test_data)
      distances.append((distance, x))

    distances.sort(key=lambda x: x[0])
    neighbors = []
    for i in range(k):
      neighbors.append(distances[i][1])
    return neighbors


  def predict(self, X, test_data, k):
    neighbors = self.nearest_neighbors(X, test_data, k)
    labels = [neighbor[-1] for neighbor in neighbors]
    return statistics.mode(labels)

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [4]:
diabetes_dataset = pd.read_csv('/content/diabetes.csv')
X = diabetes_dataset.to_numpy()
X_train, X_test= train_test_split(X, test_size=0.2, random_state=2)

In [16]:
model = KNN_Classifier('euclidean')
test_pred = np.array([model.predict(X_train, test_data, 5) for test_data in X_test])
print(accuracy_score(X_test[:,-1], test_pred))

0.7272727272727273
