In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("iris.csv")

In [3]:
data.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
data.shape

(150, 6)

In [5]:
data.isnull().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [6]:
x = data.drop(columns = ["Species"])
y = data["Species"]

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
xtrain,xtest,ytrain,ytest = train_test_split(x,y, test_size = 0.2)

In [9]:
xtrain.shape , xtest.shape

((120, 5), (30, 5))

In [10]:
ytrain.shape , ytest.shape

((120,), (30,))

In [11]:
from sklearn.neighbors import KNeighborsClassifier

In [12]:
model = KNeighborsClassifier(n_neighbors = 5)
model.fit(xtrain,ytrain)

KNeighborsClassifier()

In [13]:
ypred = model.predict(xtest)
ypred

array(['Iris-setosa', 'Iris-virginica', 'Iris-setosa', 'Iris-versicolor',
       'Iris-virginica', 'Iris-setosa', 'Iris-versicolor',
       'Iris-virginica', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-setosa', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-versicolor',
       'Iris-virginica', 'Iris-setosa', 'Iris-virginica',
       'Iris-virginica', 'Iris-virginica', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica',
       'Iris-versicolor', 'Iris-virginica', 'Iris-setosa',
       'Iris-virginica', 'Iris-versicolor'], dtype=object)

In [14]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
acc = accuracy_score(ytest,ypred)
acc

1.0

In [15]:
cm = confusion_matrix(ytest,ypred)
cm

array([[ 6,  0,  0],
       [ 0, 11,  0],
       [ 0,  0, 13]], dtype=int64)

#### HyperParameter Tunning

In [16]:
#estimator
model = KNeighborsClassifier()

In [17]:
#parameter
n_neighbors = range(1,31)
metric = ['euclidean',"manhattan","minkowski"]
weights = ["uniform","distance"]

In [18]:
#parameter grid
grid={"n_neighbors":n_neighbors,"metric":metric,"weights":weights}

In [19]:
from sklearn.model_selection import RepeatedStratifiedKFold

In [20]:
kfold_cv = RepeatedStratifiedKFold (n_splits = 5, n_repeats = 3, random_state = 1)

In [21]:
from sklearn.model_selection import GridSearchCV

In [22]:
grid_cv = GridSearchCV(estimator = model, param_grid = grid, cv = kfold_cv, scoring = "accuracy",n_jobs = 1)

In [23]:
res = grid_cv.fit(xtrain,ytrain)

In [24]:
res.best_params_

{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}

In [25]:
res.best_score_

1.0