#Task - 2 : K-Nearest Neighbors (KNN) Classifier — From Scratch


## Importing Libraries and data

In [None]:
import numpy as np

data =  [
    [150, 7.0, 1, 'Apple'],
    [120, 6.5, 0, 'Banana'],
    [180, 7.5, 2, 'Orange'],
    [155, 7.2, 1, 'Apple'],
    [110, 6.0, 0, 'Banana'],
    [190, 7.8, 2, 'Orange'],
    [145, 7.1, 1, 'Apple'],
    [115, 6.3, 0, 'Banana']
]

# converting the list to np array

data = np.array(data)

## Data Pre‐Processing and Cleaning

In [None]:
encode = {'Apple': 0, 'Banana' : 1, 'Orange' : 2}

data[:, 3] = [encode[name] for name in data[:, 3]]

data = data.astype(float)
data

array([[150. ,   7. ,   1. ,   0. ],
       [120. ,   6.5,   0. ,   1. ],
       [180. ,   7.5,   2. ,   2. ],
       [155. ,   7.2,   1. ,   0. ],
       [110. ,   6. ,   0. ,   1. ],
       [190. ,   7.8,   2. ,   2. ],
       [145. ,   7.1,   1. ,   0. ],
       [115. ,   6.3,   0. ,   1. ]])

Dividing the data into Features and labels


In [None]:
X = data[:, :3]
y = data[:, 3]

## Creating distance Function


In [None]:
def EuclideanDistance(vec1, vec2, dim):
  distance = 0
  for i in range(dim):
    distance += np.square(vec1[i] - vec2[i])
  return np.sqrt(distance)

In [None]:
class KNN:
  def __init__(self):
    self.k = 0
    self.X_train = None
    self.Y_train = None

  def fit(self, k, X_train, Y_train):
    self.k = k
    self.X_train = X_train
    self.Y_train = Y_train

  def predict(self, X_test):
    predictions = np.array([])
    for x in X_test:
      predictions = np.append(predictions, self.predict_one(x))
    return predictions

  def predict_one(self, x):

    # Calculating distance of the new test data from each of the data in the training Data
    distances = np.array([])
    for i in range(self.X_train.shape[0]):
      distances = np.append(distances, EuclideanDistance(x, self.X_train[i], self.X_train.shape[1]))

    # Sorting the distances array and making an array which contain the arguments of sorted form of distances array
    indices = np.argsort(distances)
    k_nn_indices = indices[:self.k]

    # Finding the labels of k Nearest neighbours
    k_nn_labels = np.array([self.Y_train[index] for index in k_nn_indices])

    # Finding the mode value from the k nn. If each there are more than one mode we randomly return any of them
    unique_labels, counts = np.unique(k_nn_labels, return_counts=True)
    mode_value = unique_labels[np.argmax(counts)]
    if np.sum(k_nn_labels == mode_value) == 1:
      np.random.seed(None)
      label = np.random.choice(k_nn_labels)
    else:
      label=mode_value
    return label

##Testing the Classifier

Using the given test samples to test the classifer

In [None]:
test_data = np.array([
    [118, 6.2, 0],
    [160, 7.3, 1],
    [185, 7.7, 2]
])

Creating an instance of class `KNN` and finding the expected label.

In [None]:
  model = KNN()
  model.fit(k=3, X_train=X, Y_train=y)
  predictions = model.predict(test_data)

  decode = {v: k for k, v in encode.items()}

  predicted_labels = np.array([decode[predictions] for predictions in predictions])
  print(predicted_labels)

['Banana' 'Apple' 'Orange']


In [None]:
test_y = np.array([1, 0, 2])

## Evaluating the output

The given outputs for the test data is given as Banana, Apple, Orange. We got the same in the output of the KNN we performed.

In [None]:
mse = np.mean((test_y - predictions) ** 2)
print("Mean Squared Error:", mse)

Mean Squared Error: 0.0
