# KNN: Iris Dataset

## Loading Packages

In [1]:
import pandas as pd
from sklearn.datasets import load_iris

# KNN
from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import confusion_matrix

## Loading Iris Dataset

In [2]:
iris = load_iris()
iris_df = pd.DataFrame(data=iris.data,
                       columns=iris.feature_names)

iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


## Data Preparation

We split the data into two: train and test set.


#### Training Set

In [3]:
iris_train_df = iris_df.sample(frac = 0.75, random_state=0)
iris_train_y = iris.target[iris_train_df.index]

iris_train_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
114,5.8,2.8,5.1,2.4
62,6.0,2.2,4.0,1.0
33,5.5,4.2,1.4,0.2
107,7.3,2.9,6.3,1.8
7,5.0,3.4,1.5,0.2


In [4]:
iris_train_y

array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1,
       0, 0, 2, 0, 0, 1, 1, 0, 2, 1, 0, 2, 2, 1, 0, 1, 1, 1, 2, 0, 2, 0,
       0, 1, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 0, 2, 1, 1, 1,
       1, 2, 0, 0, 2, 1, 0, 0, 1, 0, 2, 1, 0, 1, 2, 1, 0, 2, 2, 2, 2, 0,
       0, 2, 2, 0, 2, 0, 2, 2, 0, 0, 2, 0, 0, 0, 1, 2, 2, 0, 0, 0, 1, 1,
       0, 0])

#### Test Set

In [5]:
iris_test_df = iris_df.drop(iris_train_df.index)
iris_test_y = iris.target[iris_test_df.index]

iris_test_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
9,4.9,3.1,1.5,0.1
14,5.8,4.0,1.2,0.2
19,5.1,3.8,1.5,0.3
21,5.1,3.7,1.5,0.4
25,5.0,3.0,1.6,0.2


## Creating and Training the Model

#### k = 1

In [6]:
knn1 = KNeighborsClassifier(n_neighbors=1) # creating model where k = 1
knn1.fit(iris_train_df, iris_train_y) # training the model using the train set

#### k = 3

In [7]:
knn3 = KNeighborsClassifier(n_neighbors=3) # creating model where k = 3
knn3.fit(iris_train_df, iris_train_y) # training the model using the train set

## Predicting Labels for Test Set

In [8]:
# k = 1
knn1_prediction = knn1.predict(iris_test_df)
knn1_prediction

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [9]:
# k = 3
knn3_prediction = knn3.predict(iris_test_df)
knn3_prediction

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

## Evaluating the KNN Models

We use accuracy and confusion matrix

#### Results for k = 1

In [10]:
# Accuracy
acc1 = knn1.score(iris_test_df, iris_test_y)
acc1

0.9473684210526315

In [None]:
# Confusion Matrix
cm1 = confusion_matrix(iris_test_y, knn1_prediction)
cm1

#### Results for k = 3

In [None]:
# Accuracy
acc3 = knn3.score(iris_test_df, iris_test_y)
acc3

In [None]:
# Confusion Matrix
cm3 = confusion_matrix(iris_test_y, knn3_prediction)
cm3

In [None]:
print("kNN, k = 1 accuracy:", acc1)
print(cm1)

print("kNN, k = 3 accuracy:", acc3)
print(cm3)