In [4]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier as knc
from sklearn.model_selection import cross_val_score 
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [5]:
iris = load_iris()

In [7]:
print(iris.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

:Number of Instances: 150 (50 in each of three classes)
:Number of Attributes: 4 numeric, predictive attributes and the class
:Attribute Information:
    - sepal length in cm
    - sepal width in cm
    - petal length in cm
    - petal width in cm
    - class:
            - Iris-Setosa
            - Iris-Versicolour
            - Iris-Virginica

:Summary Statistics:

                Min  Max   Mean    SD   Class Correlation
sepal length:   4.3  7.9   5.84   0.83    0.7826
sepal width:    2.0  4.4   3.05   0.43   -0.4194
petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

:Missing Attribute Values: None
:Class Distribution: 33.3% for each of 3 classes.
:Creator: R.A. Fisher
:Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
:Date: July, 1988

The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken
from Fis

In [8]:
X = iris.data

In [9]:
y = iris.target

In [10]:
X[:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [12]:
y[:5]

array([0, 0, 0, 0, 0])

In [13]:
type(y)

numpy.ndarray

In [14]:
type(X)

numpy.ndarray

## Using train test split

In [15]:
from sklearn.model_selection import train_test_split as tts

In [20]:
x_train,x_test,y_train,y_test = tts(X,y,train_size=0.80)

In [22]:
x_train.shape

(120, 4)

In [23]:
x_test.shape

(30, 4)

In [24]:
y_train.shape

(120,)

In [27]:
y_test.shape

(30,)

In [28]:
x_train = pd.DataFrame(x_train)

In [29]:
x_test = pd.DataFrame(x_test)

In [19]:
knn = knc(n_neighbors=5)

In [31]:
knn.fit(x_train,y_train)

## using Cross-Validation

In [34]:
knns = knc(n_neighbors=5)
scores=cross_val_score(knns,X,y,cv=10,scoring='accuracy')
print(scores)

[1.         0.93333333 1.         1.         0.86666667 0.93333333
 0.93333333 1.         1.         1.        ]


In [35]:
scores.mean()

0.9666666666666668

In [36]:
k_scores=[]
for i in range(1,31):
    knnstest = knc(n_neighbors=i)
    scoretest=cross_val_score(knnstest, X, y, cv=10, scoring="accuracy")
    k_scores.append(scoretest.mean())
print(k_scores)

[0.96, 0.9533333333333334, 0.9666666666666666, 0.9666666666666666, 0.9666666666666668, 0.9666666666666668, 0.9666666666666668, 0.9666666666666668, 0.9733333333333334, 0.9666666666666668, 0.9666666666666668, 0.9733333333333334, 0.9800000000000001, 0.9733333333333334, 0.9733333333333334, 0.9733333333333334, 0.9733333333333334, 0.9800000000000001, 0.9733333333333334, 0.9800000000000001, 0.9666666666666666, 0.9666666666666666, 0.9733333333333334, 0.96, 0.9666666666666666, 0.96, 0.9666666666666666, 0.9533333333333334, 0.9533333333333334, 0.9533333333333334]


## Using 1. Grid Search CV

In [39]:
from sklearn.model_selection import GridSearchCV as GSCV

In [40]:
k_value = list(range(0,31))

In [61]:
param_grid = dict(n_neighbors=k_value)

In [62]:
kngscv = knc()

In [63]:
grid = GSCV(kngscv , param_grid , scoring = 'accuracy', cv = 10  , verbose = 1)

In [64]:
grid

In [65]:
grid.fit(X,y)

Fitting 10 folds for each of 31 candidates, totalling 310 fits


In [67]:
print(grid.best_estimator_)
print(grid.best_params_)
print(grid.best_score_)


KNeighborsClassifier(n_neighbors=13)
{'n_neighbors': 13}
0.9800000000000001


## Testing GridSearchCv with multiple Parameter

In [69]:
param_grid = dict(n_neighbors=k_value,weights=['uniform', 'distance'])

In [70]:
grids = GSCV(kngscv , param_grid , scoring = 'accuracy', cv = 10  , verbose = 3)

In [71]:
grids

In [72]:
grids.fit(X,y)

Fitting 10 folds for each of 62 candidates, totalling 620 fits
[CV 1/10] END .....n_neighbors=0, weights=uniform;, score=nan total time=   0.0s
[CV 2/10] END .....n_neighbors=0, weights=uniform;, score=nan total time=   0.0s
[CV 3/10] END .....n_neighbors=0, weights=uniform;, score=nan total time=   0.0s
[CV 4/10] END .....n_neighbors=0, weights=uniform;, score=nan total time=   0.0s
[CV 5/10] END .....n_neighbors=0, weights=uniform;, score=nan total time=   0.0s
[CV 6/10] END .....n_neighbors=0, weights=uniform;, score=nan total time=   0.0s
[CV 7/10] END .....n_neighbors=0, weights=uniform;, score=nan total time=   0.0s
[CV 8/10] END .....n_neighbors=0, weights=uniform;, score=nan total time=   0.0s
[CV 9/10] END .....n_neighbors=0, weights=uniform;, score=nan total time=   0.0s
[CV 10/10] END ....n_neighbors=0, weights=uniform;, score=nan total time=   0.0s
[CV 1/10] END ....n_neighbors=0, weights=distance;, score=nan total time=   0.0s
[CV 2/10] END ....n_neighbors=0, weights=dista

In [73]:
grids.best_estimator_

In [74]:
grids.best_index_

26

In [75]:
grids.best_params_

{'n_neighbors': 13, 'weights': 'uniform'}

In [76]:
grids.best_score_

0.9800000000000001

## Using 2.RandomizedSearchCV

In [78]:
from sklearn.model_selection import RandomizedSearchCV as rscv 

In [79]:
k_value = list(range(0,31))

In [83]:
param_dist = dict(n_neighbors=k_value,weights=['uniform', 'distance'])

In [82]:
kngscv = knc()

In [85]:
rand = rscv(kngscv,param_dist,cv=10, scoring="accuracy" , n_iter=10, verbose=True)

In [86]:
rand

In [87]:
rand.fit(X,y)

Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [90]:
pd.DataFrame(rand.cv_results_)[['mean_test_score','params']]

Unnamed: 0,mean_test_score,params
0,0.973333,"{'weights': 'uniform', 'n_neighbors': 14}"
1,0.966667,"{'weights': 'distance', 'n_neighbors': 22}"
2,0.973333,"{'weights': 'uniform', 'n_neighbors': 17}"
3,0.953333,"{'weights': 'uniform', 'n_neighbors': 2}"
4,0.98,"{'weights': 'distance', 'n_neighbors': 27}"
5,0.96,"{'weights': 'uniform', 'n_neighbors': 24}"
6,0.966667,"{'weights': 'distance', 'n_neighbors': 20}"
7,0.966667,"{'weights': 'uniform', 'n_neighbors': 27}"
8,0.973333,"{'weights': 'uniform', 'n_neighbors': 16}"
9,0.973333,"{'weights': 'distance', 'n_neighbors': 29}"


In [91]:
rand.best_estimator_

In [92]:
rand.best_params_

{'weights': 'distance', 'n_neighbors': 27}

In [93]:
rand.best_score_

0.9800000000000001

In [94]:
rand.best_index_

4