##### Import Required Libraries

In [None]:
import pandas as pd
import numpy as np 
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
%matplotlib inline
from pandas.plotting import parallel_coordinates
from pandas.plotting import andrews_curves
from mpl_toolkits.mplot3d import Axes3D
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import cross_val_score
import scipy as sp
from collections import Counter
from math import sqrt


##### Load DataSet

In [None]:
df = pd.read_csv('Iris.csv')

In [None]:
df.shape

In [None]:
df.head(5)

In [None]:
df.describe()

In [None]:
df.groupby('Species').size()

##### Dividing feature and labels

In [None]:
feature_columns=['SepalWidthCm', 'PetalLengthCm','PetalWidthCm']

In [None]:
X = df[feature_columns].values
y = df['Species'].values

##### Label Encoding

In [None]:
lr = LabelEncoder()
y = lr.fit_transform(y)

##### Split Dataset into Training set and Test set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state = 0)

##### Data Visualization

In [None]:
plt.figure(figsize=(15, 10))
parallel_coordinates(df.drop("Id", axis=1), "Species")
plt.title('Parallel Coordinates Plot', fontsize=20, fontweight='bold')
plt.xlabel('Features', fontsize = 15)
plt.ylabel('Features Values', fontsize = 15)
plt.legend(loc=1, prop={'size':15}, frameon = True, shadow = True, facecolor="white", edgecolor="black")
plt.show()

In [None]:
plt.figure(figsize=(15,10))
andrews_curves(df.drop("Id", axis=1), "Species")
plt.title('Andrews Curves Plot', fontsize=20, fontweight='bold')
plt.legend(loc=1, prop={'size': 15}, frameon=True,shadow=True, facecolor="white", edgecolor="black")
plt.show()

In [None]:
plt.figure()
sns.pairplot(df.drop("Id", axis=1), hue="Species", height=3, markers=["o", "s", "D"])
plt.show()

In [None]:
plt.figure()
df.drop("Id", axis=1).boxplot(by="Species", figsize=(15, 10))
plt.show()

##### KNN for Classification

In [None]:
classifier = KNeighborsClassifier(n_neighbors=3)

In [None]:
classifier.fit(X_train, y_train)

In [None]:
y_pred = classifier.predict(X_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
cm

In [None]:
accuracy = accuracy_score(y_test, y_pred)*100
print('Accuracy of our model is equal ' + str(round(accuracy, 2)) + ' %.')

In [None]:
k_list = list(range(1,50,2))

In [None]:
cv_scores = []

In [None]:
for k in k_list:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train, y_train, cv=10, scoring='accuracy')
    cv_scores.append(scores.mean())

In [None]:
MSE = [1 - x for x in cv_scores]

plt.figure()
plt.figure(figsize=(15,10))
plt.title('The optimal number of neighbors', fontsize=20, fontweight='bold')
plt.xlabel('Number of Neighbors K', fontsize=15)
plt.ylabel('Misclassification Error', fontsize=15)
sns.set_style("whitegrid")
plt.plot(k_list, MSE)

plt.show()

In [None]:
best_k = k_list[MSE.index(min(MSE))]
print("The optimal number of neighbors is %d." % best_k)

In [None]:
class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        predictions = []
        for x in X:
            neighbors = self._get_neighbors(x)
            most_common_label = self._get_most_common_label(neighbors)
            predictions.append(most_common_label)
        return predictions

    def _get_neighbors(self, x):
        distances = []
        for i, x_train in enumerate(self.X_train):
            dist = self._euclidean_distance(x, x_train)
            distances.append((i, dist))
        distances.sort(key=lambda x: x[1])
        return distances[:self.k]

    def _get_most_common_label(self, neighbors):
        labels = [self.y_train[i] for i, _ in neighbors]
        label_count = Counter(labels)
        most_common_label = label_count.most_common(1)[0][0]
        return most_common_label

    def _euclidean_distance(self, x1, x2):
        distance = 0
        for i in range(len(x1)):
            distance += (x1[i] - x2[i]) ** 2
        return sqrt(distance)


In [None]:
my_classifier = KNN(k=3)

In [None]:
my_classifier.fit(X_train, y_train)

In [None]:
my_y_pred = my_classifier.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, my_y_pred)*100
print('Accuracy of our model is equal ' + str(round(accuracy, 2)) + ' %.')