# KNN: Iris Dataset

## Loading Packages

In [1]:
import pandas as pd
from sklearn.datasets import load_iris

# KNN
from sklearn.neighbors import KNeighborsClassifier

from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

## Loading Iris Dataset

In [2]:
iris = load_iris()
iris_df = pd.DataFrame(data=iris.data,
                       columns=iris.feature_names)

iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


## Data Preparation

We split the data into two: train and test set.


#### Training Set

In [3]:
iris_train_df = iris_df.sample(frac = 0.75)
iris_train_y = iris.target[iris_train_df.index]

iris_train_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
91,6.1,3.0,4.6,1.4
36,5.5,3.5,1.3,0.2
85,6.0,3.4,4.5,1.6
1,4.9,3.0,1.4,0.2
51,6.4,3.2,4.5,1.5


#### Test Set

In [4]:
iris_test_df = iris_df.drop(iris_train_df.index)
iris_test_y = iris.target[iris_test_df.index]

iris_test_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
3,4.6,3.1,1.5,0.2
15,5.7,4.4,1.5,0.4
19,5.1,3.8,1.5,0.3
24,4.8,3.4,1.9,0.2
26,5.0,3.4,1.6,0.4


## Creating and Training the Model

#### k = 1

In [5]:
knn1 = KNeighborsClassifier(n_neighbors=1) # creating model where k = 1
knn1.fit(iris_train_df, iris_train_y) # training the model using the train set

#### k = 3

In [6]:
knn3 = KNeighborsClassifier(n_neighbors=3) # creating model where k = 3
knn3.fit(iris_train_df, iris_train_y) # training the model using the train set

## Predicting Labels for Test Set

In [7]:
# k = 1
knn1_prediction = knn1.predict(iris_test_df)
knn1_prediction

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,
       2, 1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [8]:
# k = 3
knn3_prediction = knn3.predict(iris_test_df)
knn3_prediction

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,
       2, 1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

## Evaluating the KNN Models

We use accuracy and confusion matrix

#### Results for k = 1

In [9]:
# Accuracy
acc1 = knn1.score(iris_test_df, iris_test_y)
acc1

0.9210526315789473

In [10]:
# Confusion Matrix
cm1 = confusion_matrix(iris_test_y,knn1_prediction)
cm1

array([[12,  0,  0],
       [ 0, 11,  2],
       [ 0,  1, 12]], dtype=int64)

#### Results for k = 3

In [11]:
# Accuracy
acc3 = knn3.score(iris_test_df, iris_test_y)
acc3

0.9210526315789473

In [12]:
# Confusion Matrix
cm3 = confusion_matrix(iris_test_y,knn3_prediction)
cm3

array([[12,  0,  0],
       [ 0, 11,  2],
       [ 0,  1, 12]], dtype=int64)

In [13]:
print("kNN, k = 1 accuracy:", acc1)
print(cm1)

print("kNN, k = 3 accuracy:", acc3)
print(cm3)

kNN, k = 1 accuracy: 0.9210526315789473
[[12  0  0]
 [ 0 11  2]
 [ 0  1 12]]
kNN, k = 3 accuracy: 0.9210526315789473
[[12  0  0]
 [ 0 11  2]
 [ 0  1 12]]
