In [28]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import recall_score, precision_score, accuracy_score
         

In [4]:
load_iris()
# load_iris(return_X_y = True)

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [3]:
X,y = load_iris(return_X_y = True)

In [4]:
X.shape

(150, 4)

In [5]:
y.shape

(150,)

In [5]:
iris = load_iris()
X = iris.data
y = iris.target

In [6]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [7]:
iris_df = pd.DataFrame(data = X,columns = iris.feature_names)

iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [8]:
iris_df.loc[:,'target'] = y

iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [9]:
iris_df.sample(5)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
90,5.5,2.6,4.4,1.2,1
87,6.3,2.3,4.4,1.3,1
3,4.6,3.1,1.5,0.2,0
74,6.4,2.9,4.3,1.3,1
118,7.7,2.6,6.9,2.3,2


## train_test_split

In [None]:
 train_test_split()

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2)

In [12]:
X_train.shape

(120, 4)

In [13]:
X_test.shape

(30, 4)

In [14]:
y_train.shape

(120,)

In [15]:
y_test.shape

(30,)

In [36]:
nrm = MinMaxScaler()
nrm.fit(X_train)
X_train_nrm = nrm.transform(X_train)
X_test_nrm = nrm.transform(X_test)

In [37]:
X_train_nrm

array([[0.35294118, 0.90909091, 0.06779661, 0.04166667],
       [0.55882353, 0.54545455, 0.74576271, 0.91666667],
       [0.52941176, 0.36363636, 0.61016949, 0.54166667],
       [0.52941176, 0.27272727, 0.50847458, 0.5       ],
       [0.32352941, 0.68181818, 0.08474576, 0.04166667],
       [0.44117647, 0.81818182, 0.03389831, 0.04166667],
       [0.64705882, 0.36363636, 0.81355932, 0.875     ],
       [0.70588235, 0.40909091, 0.62711864, 0.58333333],
       [0.38235294, 0.36363636, 0.59322034, 0.58333333],
       [0.02941176, 0.45454545, 0.05084746, 0.04166667],
       [0.08823529, 0.45454545, 0.06779661, 0.04166667],
       [0.14705882, 0.36363636, 0.06779661, 0.08333333],
       [0.58823529, 0.5       , 0.62711864, 0.625     ],
       [0.41176471, 0.27272727, 0.52542373, 0.5       ],
       [0.38235294, 0.22727273, 0.54237288, 0.5       ],
       [0.38235294, 0.36363636, 0.52542373, 0.5       ],
       [0.41176471, 1.        , 0.08474576, 0.125     ],
       [0.70588235, 0.5       ,

In [27]:
# nrm = MinMaxScaler()
# X_train = nrm.fit_transform(X_train)
# X_test_nrm = nrm.transform(X_test)

In [28]:
# nrm = StandardScaler()
# nrm.fit(X_train)
# X_train_nrm = nrm.transform(X_train)
# X_test_nrm = nrm.transform(X_test)

In [38]:
# knn = KNeighborsClassifier()
# knn.fit(X_train_nrm, y_train)

KNeighborsClassifier()

In [16]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)

KNeighborsClassifier()

In [17]:
y_pred = knn.predict(X_test)
y_pred

array([1, 1, 0, 0, 2, 1, 2, 2, 2, 2, 0, 1, 0, 1, 2, 2, 2, 1, 0, 0, 0, 1,
       1, 2, 0, 2, 2, 0, 2, 0])

In [19]:
y_test

array([1, 1, 0, 0, 2, 1, 2, 2, 2, 2, 0, 1, 0, 1, 2, 2, 2, 1, 0, 0, 0, 1,
       1, 1, 0, 2, 2, 0, 2, 0])

In [20]:
knn.score(X_train, y_train)

0.975

In [21]:
knn.score(X_test, y_test)

0.9666666666666667

## Evaluation

In [25]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [26]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.89      0.94         9
           2       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30

