In [5]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score  
import optuna
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from joblib import load, dump


combined_data = load('../../DataDumps/3classfft.joblib')
combined_labels = load('../../DataDumps/3class_labels.joblib')
def reshape_data(data):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_channels = data.shape[2]  # Assuming the 3rd dimension is the number of channels
    return data.reshape(num_samples, num_timesteps * num_channels) 


combined_data = reshape_data(combined_data) 


# Split data into training/validation and test sets (using same split as in RF)
X_train_val, X_test, y_train_val, y_test = train_test_split(combined_data, combined_labels, test_size=0.2, random_state=42)

def objective(trial):
    n_neighbors = trial.suggest_int('n_neighbors', 1, 50)
    weights = trial.suggest_categorical('weights', ['uniform', 'distance'])
    metric = trial.suggest_categorical('metric', ['euclidean', 'manhattan', 'minkowski'])
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)  

    knn = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, metric=metric, n_jobs=-1)

    cv_scores = cross_val_score(knn, X_train_val, y_train_val, cv=skf, scoring='accuracy')
    return cv_scores.mean()

# Hyperparameter tuning with Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)  

best_params = study.best_params
print("Best hyperparameters:", best_params)

# Train final model with best hyperparameters
best_knn = KNeighborsClassifier(**best_params)
best_knn.fit(X_train_val, y_train_val)

# Save model if needed
dump(best_knn, 'KNN-Base-2.joblib') 

# Evaluation on test set
predictions_test = best_knn.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))

[I 2024-04-25 03:50:18,851] A new study created in memory with name: no-name-a5402d4b-f5ba-4595-9f3d-70ece0db31ec
[I 2024-04-25 03:50:24,894] Trial 11 finished with value: 0.6383928571428571 and parameters: {'n_neighbors': 6, 'weights': 'uniform', 'metric': 'minkowski'}. Best is trial 11 with value: 0.6383928571428571.
[I 2024-04-25 03:50:25,274] Trial 19 finished with value: 0.6214285714285713 and parameters: {'n_neighbors': 9, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 11 with value: 0.6383928571428571.
[I 2024-04-25 03:50:25,289] Trial 10 finished with value: 0.6267857142857144 and parameters: {'n_neighbors': 2, 'weights': 'uniform', 'metric': 'minkowski'}. Best is trial 11 with value: 0.6383928571428571.
[I 2024-04-25 03:50:25,309] Trial 12 finished with value: 0.63125 and parameters: {'n_neighbors': 11, 'weights': 'uniform', 'metric': 'minkowski'}. Best is trial 11 with value: 0.6383928571428571.
[I 2024-04-25 03:50:25,329] Trial 20 finished with value: 0.6375 and

In [1]:
# Evaluation on test set
predictions_test = best_knn.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))

NameError: name 'best_knn' is not defined