In [32]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

from collections import Counter

from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
cmap = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])


In [33]:
iris = datasets.load_iris()
X,y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)


In [34]:
print(X_train.shape) # 120 is no of samples and 4 is the feature for each sample
print(X_train[0])
print(X_test.shape)
print(y_train.shape) #1D row vector
print(y_test.shape)

(120, 4)
[5.1 2.5 3.  1.1]
(30, 4)
(120,)
(30,)


**KNN Algorithm**

In [35]:

class KNN:
  def __init__(self, k=3):
    self.k = k
#The fit method stores the training data (X_train) and their corresponding labels (y_train).
  def fit(self, X, y):
    self.X_train = X
    self.y_train = y

  def predict(self, X):
    predicted_labels = [self._predict(x) for x in X]
    return np.array(predicted_labels)

 # The _predict method computes the Euclidean distance from the new sample x to each training sample.
 #It then finds the k nearest neighbors, determines the most common class among these neighbors, and returns it as the prediction.

  def _predict(self, x): #new sample x
    # Compute distances
    distances = [self._euclidean_distance(x, x_train) for x_train in self.X_train]

    # get k nearest samples
    k_indices = np.argsort(distances)[:self.k]
    k_nearest_labels = [self.y_train[i] for i in k_indices]
    # majority vote, most common class labels
    most_common = Counter(k_nearest_labels).most_common(1)
    return most_common[0][0]

  def _euclidean_distance(self, x1, x2):
     return np.sqrt(np.sum((x1 - x2)**2))

In [36]:
clf = KNN(k=5)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)

In [37]:
acc = np.sum(predictions == y_test) / len(y_test)
print(acc)

0.9666666666666667
