In [8]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

In [4]:
iris_df = sns.load_dataset('iris')

In [5]:
iris_df.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

In [7]:
X_train, X_test, y_train, y_test = train_test_split(iris_df.drop('species', axis=1), iris_df['species'], random_state=1224)

In [9]:
cls = KNeighborsClassifier().fit(X_train, y_train)

In [10]:
cls.score(X=X_train, y=y_train)

0.9821428571428571

In [11]:
cls.score(X=X_test, y=y_test)

0.9210526315789473

In [12]:
predict = cls.predict(X_test)

In [14]:
print(classification_report(y_true=y_test, y_pred=predict))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        11
  versicolor       0.92      0.86      0.89        14
   virginica       0.86      0.92      0.89        13

    accuracy                           0.92        38
   macro avg       0.93      0.93      0.93        38
weighted avg       0.92      0.92      0.92        38



# train data Distribution effect

In [18]:
for n_rand in [0, 42, 13, 100, 2315]:
    X_train, X_test, y_train, y_test = train_test_split(iris_df.drop('species', axis=1), iris_df['species'], random_state=n_rand)
    cls = KNeighborsClassifier().fit(X_train, y_train)
    predict = cls.predict(X_test)
    print('random_state = ', n_rand)
    print(classification_report(y_true=y_test, y_pred=predict))
    print('_'*100)

random_state =  0
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        13
  versicolor       1.00      0.94      0.97        16
   virginica       0.90      1.00      0.95         9

    accuracy                           0.97        38
   macro avg       0.97      0.98      0.97        38
weighted avg       0.98      0.97      0.97        38

____________________________________________________________________________________________________
random_state =  42
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       1.00      1.00      1.00        11
   virginica       1.00      1.00      1.00        12

    accuracy                           1.00        38
   macro avg       1.00      1.00      1.00        38
weighted avg       1.00      1.00      1.00        38

____________________________________________________________________________________________________

# train data Size effect

In [23]:
for n_rand in [0.9, 0.7, 0.3, 0.1]:
    X_train, X_test, y_train, y_test = train_test_split(iris_df.drop('species', axis=1), iris_df['species'],
                                                        test_size=n_rand, random_state=13, stratify=iris_df['species'],
                                                        shuffle=True)
    cls = KNeighborsClassifier().fit(X_train, y_train)
    predict = cls.predict(X_test)
    print('test_size = ', n_rand)
    print(classification_report(y_true=y_test, y_pred=predict, zero_division=0))
    print('_'*100)

test_size =  0.9
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        45
  versicolor       0.97      0.69      0.81        45
   virginica       0.76      0.98      0.85        45

    accuracy                           0.89       135
   macro avg       0.91      0.89      0.89       135
weighted avg       0.91      0.89      0.89       135

____________________________________________________________________________________________________
test_size =  0.7
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        35
  versicolor       0.97      0.91      0.94        35
   virginica       0.92      0.97      0.94        35

    accuracy                           0.96       105
   macro avg       0.96      0.96      0.96       105
weighted avg       0.96      0.96      0.96       105

____________________________________________________________________________________________________
te

# n_neighbors effect

In [25]:
X_train, X_test, y_train, y_test = train_test_split(iris_df.drop('species', axis=1), iris_df['species'],
                                                    test_size=0.3, random_state=13, stratify=iris_df['species'],
                                                    shuffle=True)
for n_rand in [1, 3, 5, 10, 15]:
    cls = KNeighborsClassifier(n_neighbors=n_rand).fit(X_train, y_train)
    predict = cls.predict(X_test)
    print('n_neighbors = ', n_rand)
    print(classification_report(y_true=y_test, y_pred=predict, zero_division=0))
    print('_'*100)

n_neighbors =  1
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       1.00      0.93      0.97        15
   virginica       0.94      1.00      0.97        15

    accuracy                           0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45

____________________________________________________________________________________________________
n_neighbors =  3
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       1.00      0.93      0.97        15
   virginica       0.94      1.00      0.97        15

    accuracy                           0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45

____________________________________________________________________________________________________
n_