# KNN


In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

In [2]:
# Get Datasets
iris = load_iris()
print(iris.data, iris.target)

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.4 3.7 1.5 0.2]
 [4.8 3.4 1.6 0.2]
 [4.8 3.  1.4 0.1]
 [4.3 3.  1.1 0.1]
 [5.8 4.  1.2 0.2]
 [5.7 4.4 1.5 0.4]
 [5.4 3.9 1.3 0.4]
 [5.1 3.5 1.4 0.3]
 [5.7 3.8 1.7 0.3]
 [5.1 3.8 1.5 0.3]
 [5.4 3.4 1.7 0.2]
 [5.1 3.7 1.5 0.4]
 [4.6 3.6 1.  0.2]
 [5.1 3.3 1.7 0.5]
 [4.8 3.4 1.9 0.2]
 [5.  3.  1.6 0.2]
 [5.  3.4 1.6 0.4]
 [5.2 3.5 1.5 0.2]
 [5.2 3.4 1.4 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [5.4 3.4 1.5 0.4]
 [5.2 4.1 1.5 0.1]
 [5.5 4.2 1.4 0.2]
 [4.9 3.1 1.5 0.2]
 [5.  3.2 1.2 0.2]
 [5.5 3.5 1.3 0.2]
 [4.9 3.6 1.4 0.1]
 [4.4 3.  1.3 0.2]
 [5.1 3.4 1.5 0.2]
 [5.  3.5 1.3 0.3]
 [4.5 2.3 1.3 0.3]
 [4.4 3.2 1.3 0.2]
 [5.  3.5 1.6 0.6]
 [5.1 3.8 1.9 0.4]
 [4.8 3.  1.4 0.3]
 [5.1 3.8 1.6 0.2]
 [4.6 3.2 1.4 0.2]
 [5.3 3.7 1.5 0.2]
 [5.  3.3 1.4 0.2]
 [7.  3.2 4.7 1.4]
 [6.4 3.2 4.5 1.5]
 [6.9 3.1 4.

In [3]:
# Handle Data & Data Split
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, random_state=42, test_size=0.3
)

In [4]:
# Feature Preprocessing
transfer = StandardScaler()
x_train = transfer.fit_transform(X_train)
x_test = transfer.fit_transform(X_test)

In [5]:
# Train Model and help to choose best parameter
knn = KNeighborsClassifier(n_neighbors=5)

param_grid = {"n_neighbors": [1, 3, 5, 7, 9]}
knn = GridSearchCV(knn, param_grid=param_grid, cv=10)

knn.fit(x_train, y_train)

In [6]:
# # Cross Validation
# cross_val_scores = cross_val_score(knn, X_train, y_train, cv=5)
# print(f"Cross-Validation Accuracy Scores: {cross_val_scores}")

# # Get Mean Score
# mean_cv_score = cross_val_scores.mean()
# print(f"Mean Cross-Validation Score: {mean_cv_score}")

In [7]:
# Evaluate Model

# Output predict value
y_pre = knn.predict(x_test)
print(f"Predict value: {y_pre}\n")
print(f"Predict VS Actual: {y_pre == y_test}\n")

# Output accuracy rate
rate = knn.score(x_test, y_test)
print(f"Accuracy rate: {rate}\n")

# Other
print(f"Best modal: {knn.best_estimator_}\n")
print(f"Best result: {knn.best_score_}\n")
print(f"Best model result: {knn.cv_results_}\n")

Predict value: [2 0 2 2 1 0 1 2 1 1 2 0 0 0 0 2 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 2 1 0 0]

Predict VS Actual: [False  True  True False  True  True  True  True  True  True  True  True
  True  True  True False  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True False  True  True  True]

Accuracy rate: 0.9111111111111111

Best modal: KNeighborsClassifier(n_neighbors=3)

Best result: 0.9263636363636364

Best model result: {'mean_fit_time': array([0.00020337, 0.00014966, 0.00016396, 0.00014281, 0.00015428]), 'std_fit_time': array([1.10218204e-04, 3.08482686e-06, 1.32226910e-05, 1.02069887e-05,
       2.41910751e-05]), 'mean_score_time': array([0.00060272, 0.00051575, 0.0005311 , 0.00047584, 0.00052352]), 'std_score_time': array([1.27094144e-04, 4.19197632e-05, 3.80253642e-05, 2.97297165e-05,
       7.68750235e-05]), 'param_n_neighbors': masked_array(data=[1, 3, 5, 7, 9],
   