In [1]:
import matplotlib.pyplot as plt
import optuna
import pandas as pd
from sklearn.model_selection import cross_val_score, StratifiedKFold

from model_creation import save_model_params, create_knn

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
plt.style.use("default")

In [3]:
TARGET_COL_NAME = "Expert Diagnose"

dataset_train = pd.read_csv("../dataset/train.csv")
dataset_test = pd.read_csv("../dataset/test.csv")

X_train, y_train = dataset_train.drop(columns=[TARGET_COL_NAME, "Patient Number"], axis=1), dataset_train[TARGET_COL_NAME]
X_test, y_test = dataset_test.drop(columns=[TARGET_COL_NAME, "Patient Number"], axis=1), dataset_test[TARGET_COL_NAME]
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((90, 13), (90,), (30, 13), (30,))

In [4]:
def objective_knn(trial: optuna.trial.Trial):
    params = dict(
        scaler_name = trial.suggest_categorical("scaler_name", ["StandardScaler", "MinMaxScaler"]),
        n_features_to_select = trial.suggest_int("n_features_to_select", 5, len(X_train.columns)),
        n_neighbors = trial.suggest_int("n_neighbors", 1, 20),
        weights = trial.suggest_categorical("weights", ["uniform", "distance"]),
        metric = trial.suggest_categorical("metric", ["euclidean", "manhattan"]),
    )

    pipe = create_knn(params)

    cv = StratifiedKFold(n_splits=5)
    score = cross_val_score(pipe, X_train, y_train, cv=cv, scoring="roc_auc_ovr").mean()
    return score


In [5]:
study = optuna.create_study(study_name="KNN_Study", direction="maximize")
study.optimize(objective_knn, n_trials=300)

[I 2025-12-29 20:34:28,177] A new study created in memory with name: KNN_Study
[I 2025-12-29 20:34:28,200] Trial 0 finished with value: 0.9387087912087912 and parameters: {'scaler_name': 'MinMaxScaler', 'n_features_to_select': 5, 'n_neighbors': 11, 'weights': 'distance', 'metric': 'manhattan'}. Best is trial 0 with value: 0.9387087912087912.
[I 2025-12-29 20:34:28,219] Trial 1 finished with value: 0.9493269230769231 and parameters: {'scaler_name': 'MinMaxScaler', 'n_features_to_select': 8, 'n_neighbors': 15, 'weights': 'uniform', 'metric': 'manhattan'}. Best is trial 1 with value: 0.9493269230769231.
[I 2025-12-29 20:34:28,239] Trial 2 finished with value: 0.9561126373626374 and parameters: {'scaler_name': 'StandardScaler', 'n_features_to_select': 13, 'n_neighbors': 8, 'weights': 'uniform', 'metric': 'manhattan'}. Best is trial 2 with value: 0.9561126373626374.
[I 2025-12-29 20:34:28,259] Trial 3 finished with value: 0.9254739010989012 and parameters: {'scaler_name': 'StandardScaler', 

In [6]:
save_model_params("knn", study.best_params)
study.best_params

{'scaler_name': 'StandardScaler',
 'n_features_to_select': 10,
 'n_neighbors': 11,
 'weights': 'distance',
 'metric': 'manhattan'}