In [None]:
from collections import Counter

import numpy as np
import pandas as pd
import random

In [None]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

In [None]:
class KNN:
    def __init__(self, k=[3,5,7,1,9]):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)

    def _predict(self, x):
        # Compute distances between x and all examples in the training set
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        # Sort by distance and return indices of the first k neighbors
        k_idx = np.argsort(distances)[: self.k]
        # Extract the labels of the k nearest neighbor training samples
        k_neighbor_labels = [self.y_train[i] for i in k_idx]
        # return the most common class label
        most_common = Counter(k_neighbor_labels).most_common(1)
        return most_common[0][0]

In [None]:
df = pd.read_csv('Iris.csv')

In [None]:
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [None]:
df.tail()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica
149,150,5.9,3.0,5.1,1.8,Iris-virginica


In [None]:
# Import label encoder
from sklearn import preprocessing
  
# label_encoder object knows how to understand word labels.
label_encoder = preprocessing.LabelEncoder()
  
# Encode labels in column 'Species'.
df['Species']= label_encoder.fit_transform(df['Species'])
  
df['Species'].unique()

array([0, 1, 2])

In [None]:
if __name__ == "__main__":
    # Imports
    from matplotlib.colors import ListedColormap
    from sklearn import datasets

    cmap = ListedColormap(["#FF0000", "#00FF00", "#0000FF"])

    def accuracy(y_true, y_pred):
        accuracy = np.sum(y_true == y_pred) / len(y_true)
        return accuracy
    

In [None]:
 
    # Shuffle your dataset 
    shuffle_df = df.sample(frac=1)

    X = shuffle_df.drop(['Species','Id'], axis = 1)
    y = shuffle_df['Species']

    # Select ratio
    ratio = 0.8
 
    total_rows = shuffle_df.shape[0]
    train_size = int(total_rows*ratio)
 
    # Split data into test and train
    X_train = X[0:train_size].values
    X_test = X[train_size:].values

    y_train = y[0:train_size].values
    y_test = y[train_size:].values

In [None]:
 mse = np.mean((predictions - y_test)**2)
print("Mean Squared Error:", mse)

Mean Squared Error: 1.2


In [None]:

    k_values = range(1, 21)

    

    best_k = None
    best_mse = float('inf')
    for k in k_values:
       knn = KNN(k=k)

       clf = KNN(k=k)
       clf.fit(X_train, y_train)
       predictions = clf.predict(X_test)
       print("KNN classification accuracy", accuracy(y_test, predictions))

       if mse < best_mse:
                best_k = k
                best_mse = mse

print("Best k:", best_k)
print("Best Mean Squared Error:", best_mse)

KNN classification accuracy 0.9666666666666667
KNN classification accuracy 0.9666666666666667
KNN classification accuracy 0.9666666666666667
KNN classification accuracy 0.9666666666666667
KNN classification accuracy 0.9666666666666667
KNN classification accuracy 0.9666666666666667
KNN classification accuracy 0.9333333333333333
KNN classification accuracy 0.9666666666666667
KNN classification accuracy 0.9333333333333333
KNN classification accuracy 0.9333333333333333
KNN classification accuracy 0.9333333333333333
KNN classification accuracy 0.9333333333333333
KNN classification accuracy 0.9333333333333333
KNN classification accuracy 0.9333333333333333
KNN classification accuracy 0.9333333333333333
KNN classification accuracy 0.9333333333333333
KNN classification accuracy 0.9333333333333333
KNN classification accuracy 0.9333333333333333
KNN classification accuracy 0.9333333333333333
KNN classification accuracy 0.9333333333333333
Best k: 1
Best Mean Squared Error: 1.2
