<a href="https://colab.research.google.com/github/pydevcasts/Dr.RahimPour_ml_practise/blob/main/k_fold.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## K-fold CV — Hyper-parameter tuning in Python

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_validate
from prettytable import PrettyTable

# Read and inspect data
data = pd.read_csv('/content/sample_data/iris.csv')
data.head()

# train/test split (80/20)
characteristics = data.iloc[:,:4] # the first 4 columns
target = data.iloc[:,-1] # the last column
x_train, x_test, y_train, y_test = train_test_split(characteristics, target, test_size=0.2, random_state=2727)

# cross validation for hyperparameter tuning
hyperparameter_score_list = []
for p in range(1,4):
    for neighbor in range(2,7):
        knn = KNeighborsClassifier(p=p, n_neighbors=neighbor)
        scores = cross_validate(knn, x_train, y_train, cv=10, scoring='accuracy')
        mean_score = np.mean(scores['test_score'])
        hyperparameter_score_list.append([p, neighbor, mean_score])

# choose the hyper-parameters (with highest average accuracy)
myTable = PrettyTable(["p (distance)", "Number of neighbors", "Avg accuracy"])
for row in hyperparameter_score_list:
    myTable.add_row([row[0], row[1], round(row[2],3)])
print(myTable)

# evaluate the performance of model with the Best parameters on testing set
knn = KNeighborsClassifier(p=2, n_neighbors=3)
knn_best_model = knn.fit(x_train, y_train)
print("Best Model Testing Score: ", knn_best_model.score(x_test, y_test))


+--------------+---------------------+--------------+
| p (distance) | Number of neighbors | Avg accuracy |
+--------------+---------------------+--------------+
|      1       |          2          |    0.958     |
|      1       |          3          |    0.967     |
|      1       |          4          |    0.967     |
|      1       |          5          |    0.975     |
|      1       |          6          |    0.967     |
|      2       |          2          |    0.967     |
|      2       |          3          |    0.983     |
|      2       |          4          |    0.975     |
|      2       |          5          |    0.983     |
|      2       |          6          |    0.975     |
|      3       |          2          |    0.967     |
|      3       |          3          |    0.975     |
|      3       |          4          |    0.975     |
|      3       |          5          |    0.983     |
|      3       |          6          |    0.975     |
+--------------+------------