## Grid search for K-Nearest Neighbor (KNN) hyperparameters

### Prepare lists of hyperparameters for grid search

In [None]:
param_weight      = ['uniform', 'distance']     # weight type
param_n_neighbors = [3, 9, 15]   # number of neighbors for prediction
param_metric      = ['euclidean', 'manhattan']  # metric type

# Calculate the number of cases
NoOfCases = len(param_weight) * len(param_n_neighbors) * len(param_metric)
NoOfCases

In [None]:
# Create an empty dataframe to store the accuracy results
Accuracy_df = pd.DataFrame(np.zeros(shape=(NoOfCases , 4)),
                           columns=['weight', 'n_neighbors', 'metric', 'Accuracy'])
Accuracy_df

### Train the KNN models with different combinations of hyperparameters and save them

In [None]:
# Import necessary packages for KNN
from sklearn.neighbors import KNeighborsClassifier
import joblib

# Initialize a count value to store the performance of each model
cnt = 0

# Iterate through all possible combinations of weight, n_neighbors, and metric values
for temp_weight in param_weight:          # Select each 'weight' type in the list
    for temp_n_neighbors in param_n_neighbors:  # Select each 'n_neighbors' value in the list
        for temp_metric in param_metric:  # Select each 'metric' type in the list
            
            # Create, train, and validate a temporary KNN model with the current combination of hyperparameters
            tempknnModel = KNeighborsClassifier(weights=temp_weight, n_neighbors=temp_n_neighbors, metric=temp_metric)
            tempknnModel.fit(TrainData, TrainLabel)
            tempAccuracy = tempknnModel.score(TestData, TestLabel)

            # Save the temporary model to a file with a corresponding name
            tempknnModel_name = f'KNN_{temp_weight}_N{temp_n_neighbors}_M{temp_metric}.plk'
            joblib.dump(tempknnModel, '/content/drive/MyDrive/Colab Notebooks/SavedFiles/ML_Models/GridSearch_KNN/'+tempknnModel_name)
            
            # Store the performance (accuracy) of the temporary model in the dataframe
            Accuracy_df.iloc[cnt, :] = [temp_weight, temp_n_neighbors, temp_metric, tempAccuracy]
            cnt += 1

# Display the resulting dataframe with model performances
Accuracy_df

### Confirm the grid search results

In [None]:
# Sort the Accuracy_df by 'Accuracy' column in descending order
Accuracy_df_sorted = Accuracy_df.sort_values(by='Accuracy', ascending=False).reset_index(drop=True)

# Output the best case
print("[Best case]\nWeight      : " + Accuracy_df_sorted.iloc[0, 0] +
      "\nn_neighbors : %d\nMetric      : %s\n\nAccuracy: %.2f" % (Accuracy_df_sorted.iloc[0, 1],
                                                                  Accuracy_df_sorted.iloc[0, 2],
                                                                  Accuracy_df_sorted.iloc[0, 3]))

In [None]:
# Calculate mean and standard deviation accuracy for each weight
mean_accuracy_Weight = Accuracy_df.groupby(['weight'])['Accuracy'].agg(['mean', 'std']).reset_index()
mean_accuracy_Weight

In [None]:
# Calculate mean and standard deviation of accuracy for each n_neighbors
mean_accuracy_n_neighbors = Accuracy_df.groupby(['n_neighbors'])['Accuracy'].agg(['mean', 'std']).reset_index()
mean_accuracy_n_neighbors

In [None]:
# Calculate mean and standard deviation of accuracy for each metric
mean_accuracy_Metric = Accuracy_df.groupby(['metric'])['Accuracy'].agg(['mean', 'std']).reset_index()
mean_accuracy_Metric

### Visualize the performance comparison for the selected hyperparameter

In [None]:
# Set an index to select a hyperparameter
# 0: weight // 1: n_neighbors // 2: metric
idx = 1

# Automatically define variables based on the selected index
H_Param  = ['Weight', 'n_neighbors', 'Metric']
Selected = H_Param[idx]
exec('Result = mean_accuracy_' + H_Param[idx])

xLabel = Result.iloc[:, 0]
x_pos = np.arange(Result.shape[0])
y_val = Result['mean']
y_err = Result['std']

# Draw a bar chart to compare the model performance (diagnostic accuracy) for each hyperparameter
fig, ax = plt.subplots(figsize=(10, 5))

# Create a bar plot with error bars
ax.bar(x_pos, y_val, yerr=y_err, align='center', alpha=0.5, ecolor='black', capsize=10,
       color=['tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple'])
ax.set_ylabel('Accuracy (mean)', fontsize=15)
ax.set_title(f"Model performance comparison by '{Selected}'", fontsize=20)
ax.set_xticks(x_pos)
ax.set_xticklabels(xLabel, fontsize=15)
ax.yaxis.grid()

plt.tight_layout()
plt.show()