In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import pickle
from pprint import pprint
from tqdm import tqdm
from sklearn.cluster import MiniBatchKMeans, KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix
from sklearn.model_selection import KFold, GridSearchCV
import optuna

from utils import load_data_from_directory, split_into_K_folds, get_metrics
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore")


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Use a custom function to read images from the directory and assign labels based on the name of the subfolder they are in
training_files, training_labels = load_data_from_directory('../MIT_split/train/')
test_files, test_labels = load_data_from_directory('../MIT_split/test/')

In [3]:
# Since we want to do k-fold cross validation, we need to merge the training and test data and then re-split it into k folds

all_data = training_files + test_files
all_labels = training_labels + test_labels

k = 5  # number of folds
# split data into 5 folds using custom function
folds = split_into_K_folds(all_data, all_labels, k)

# Now each fold in the 'folds' list contains the training and test data for that fold in the format: (train_data, train_labels, test_data, test_labels)
for fold_index, fold in enumerate(folds):
    train_data, train_labels, test_data, test_labels = fold
    print(f"Fold {fold_index + 1}:")
    print(f"Number of samples in train set: {len(train_data)}")
    print(f"Number of samples in test set: {len(test_data)}")
    print(f"Number of labels in train set: {len(train_labels)}")
    print(f"Number of labels in test set: {len(test_labels)}")
    print()

Fold 1:
Number of samples in train set: 2150
Number of samples in test set: 538
Number of labels in train set: 2150
Number of labels in test set: 538

Fold 2:
Number of samples in train set: 2150
Number of samples in test set: 538
Number of labels in train set: 2150
Number of labels in test set: 538

Fold 3:
Number of samples in train set: 2150
Number of samples in test set: 538
Number of labels in train set: 2150
Number of labels in test set: 538

Fold 4:
Number of samples in train set: 2151
Number of samples in test set: 537
Number of labels in train set: 2151
Number of labels in test set: 537

Fold 5:
Number of samples in train set: 2151
Number of samples in test set: 537
Number of labels in train set: 2151
Number of labels in test set: 537



In [4]:
# Define Dense SIFT and Dense KAZE descriptors
def dense_sift(image, step_size=8, kp_scale=16):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    dense_sift = cv2.SIFT_create()
    keypoints = [cv2.KeyPoint(x, y, kp_scale) for y in range(0, image.shape[0], step_size) for x in range(0, image.shape[1], step_size)]
    _, descriptor = dense_sift.compute(image, keypoints)
    return descriptor

# generate descriptors for all images and store in dictionary to access them later in each fold without having to recompute them
def generate_descriptors(image_paths, descriptor_type, **kwargs):
    descriptors = {}
    for image_path in tqdm(image_paths):
        image = cv2.imread(image_path)
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        if descriptor_type == 'sift':
            sift = cv2.SIFT_create()
            _, descriptor = sift.detectAndCompute(gray_image, None)
        elif descriptor_type == 'kaze':
            kaze = cv2.KAZE_create(threshold=kwargs['threshold'])
            _, descriptor = kaze.detectAndCompute(gray_image, None)
        elif descriptor_type == 'dense_sift':
            descriptor = dense_sift(image, step_size=kwargs['step_size'], kp_scale=kwargs['kp_scale'])
        else:
            raise Exception('Unknown descriptor type')
        descriptors[image_path] = descriptor
    return descriptors
# we generate the descriptors for all images in the dataset and store them in a dictionary
# we use dense-SIFT with a step size of 8 and a keypoint scale of 12 as these values gave the best results in our experiments
descriptors_dictionary = generate_descriptors(all_data, 'dense_sift', step_size=8, kp_scale=12)


100%|██████████| 2688/2688 [02:31<00:00, 17.78it/s]


In [5]:
# Define a function to create a dictionary linking a histogram of visual words to the corresponding images images:
def create_visual_words(image_paths, descriptors_dictionary, codebook):
    visual_words = {}
    for image_path in image_paths:
        descriptor = descriptors_dictionary[image_path]
        labels = codebook.predict(descriptor)
        visual_words[image_path] = np.bincount(labels, minlength=codebook.n_clusters)
    return np.array(list(visual_words.values()))


In [6]:
# initialize a dictionary to store the codebooks for each fold
codebooks = {'fold_0': {}, 'fold_1': {}, 'fold_2': {}, 'fold_3': {}, 'fold_4': {}}

# define the codebook sizes we want to test
codebook_sizes = [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]

# loop over folds
for i, fold in enumerate(folds):
    train_data, train_labels, test_data, test_labels = fold
    train_descriptors = []

    # fetch the descriptors for the training images in the current fold
    for file in train_data:
        train_descriptors.append(descriptors_dictionary[file])

    # stack the descriptors vertically to create a single numpy array for clustering
    train_D = np.vstack(list(train_descriptors)) 

    # loop over codebook sizes
    for k in tqdm(codebook_sizes, desc=f'Creating Codebooks for fold {i}'):
        # initialize the codebook
        codebook = MiniBatchKMeans(n_clusters=k, 
                           verbose=False, 
                           batch_size=2150,
                           compute_labels=False,
                           reassignment_ratio=0.01,
                           random_state=42)
        # fit the codebook
        codebook.fit(train_D)
        # store the codebook in the dictionary
        codebooks[f'fold_{i}'][k] = codebook


Creating Codebooks for fold 0: 100%|██████████| 10/10 [1:20:10<00:00, 481.08s/it] 
Creating Codebooks for fold 1: 100%|██████████| 10/10 [1:17:58<00:00, 467.81s/it] 
Creating Codebooks for fold 2: 100%|██████████| 10/10 [1:19:52<00:00, 479.27s/it] 
Creating Codebooks for fold 3: 100%|██████████| 10/10 [1:19:23<00:00, 476.39s/it] 
Creating Codebooks for fold 4: 100%|██████████| 10/10 [1:19:34<00:00, 477.50s/it] 


### Gridsearch for KNN parameters ###

In [12]:
def objective(trial):

    n_clusters = trial.suggest_categorical('n_clusters', codebook_sizes)
    n_neighbors = trial.suggest_int('n_neighbors', 1, 25)
    metric = trial.suggest_categorical('metric', ['euclidean', 
                                                    'minkowski', 
                                                    'cosine', 
                                                    'manhattan', 
                                                    'hamming'])
    weight = trial.suggest_categorical('weight', ['uniform', 'distance'])

    metrics = []
    # Run k-fold cross validation per hyperparameter combination
    for i, fold in enumerate(folds, 0):

        train_data, train_labels, test_data, test_labels = fold
        
        codebook = codebooks[f'fold_{i}'][n_clusters]
        # create visual words for training data
        train_visual_words = create_visual_words(train_data, descriptors_dictionary, codebook)
        # create visual words for test data
        test_visual_words = create_visual_words(test_data, descriptors_dictionary, codebook)
        
        # fit kNN classifier
        print(f'Fold {i}: fitting knn with n_neighbors={n_neighbors}, metric={metric}, weights={weight}')
        knn = KNeighborsClassifier(n_neighbors=n_neighbors,n_jobs=-1,metric=metric, weights=weight)
        knn.fit(train_visual_words, train_labels)
        # predict labels for test data
        pred_labels = knn.predict(test_visual_words)

        # get accuracy metric for current run
        metrics.append(get_metrics(test_labels, pred_labels)[0])

    return sum(metrics)/len(metrics)

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

study.best_params

[I 2024-01-05 12:31:08,102] A new study created in memory with name: no-name-23b00054-5a9d-4b97-ba40-888bc8564a10


Fold 0: fitting knn with n_neighbors=12, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=12, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=12, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=12, metric=manhattan, weights=uniform


[I 2024-01-05 12:31:19,950] Trial 0 finished with value: 0.8117498425093282 and parameters: {'classifier': 'knn', 'n_clusters': 256, 'n_neighbors': 12, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 0 with value: 0.8117498425093282.


Fold 4: fitting knn with n_neighbors=12, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=21, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=21, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=21, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=21, metric=manhattan, weights=uniform
Fold 4: fitting knn with n_neighbors=21, metric=manhattan, weights=uniform


[I 2024-01-05 12:39:58,096] Trial 1 finished with value: 0.8039466123929582 and parameters: {'classifier': 'knn', 'n_clusters': 16384, 'n_neighbors': 21, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 0 with value: 0.8117498425093282.


Fold 0: fitting knn with n_neighbors=23, metric=euclidean, weights=distance
Fold 1: fitting knn with n_neighbors=23, metric=euclidean, weights=distance
Fold 2: fitting knn with n_neighbors=23, metric=euclidean, weights=distance
Fold 3: fitting knn with n_neighbors=23, metric=euclidean, weights=distance
Fold 4: fitting knn with n_neighbors=23, metric=euclidean, weights=distance


[I 2024-01-05 12:42:06,956] Trial 2 finished with value: 0.6614587443666797 and parameters: {'classifier': 'knn', 'n_clusters': 4096, 'n_neighbors': 23, 'metric': 'euclidean', 'weight': 'distance'}. Best is trial 0 with value: 0.8117498425093282.


Fold 0: fitting knn with n_neighbors=23, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=23, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=23, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=23, metric=manhattan, weights=distance


[I 2024-01-05 12:42:13,018] Trial 3 finished with value: 0.7890628785833455 and parameters: {'classifier': 'knn', 'n_clusters': 64, 'n_neighbors': 23, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 0 with value: 0.8117498425093282.


Fold 4: fitting knn with n_neighbors=23, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=9, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=9, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=9, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=9, metric=manhattan, weights=uniform
Fold 4: fitting knn with n_neighbors=9, metric=manhattan, weights=uniform


[I 2024-01-05 12:43:24,509] Trial 4 finished with value: 0.814358303392799 and parameters: {'classifier': 'knn', 'n_clusters': 2048, 'n_neighbors': 9, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 4 with value: 0.814358303392799.


Fold 0: fitting knn with n_neighbors=7, metric=cosine, weights=distance
Fold 1: fitting knn with n_neighbors=7, metric=cosine, weights=distance
Fold 2: fitting knn with n_neighbors=7, metric=cosine, weights=distance
Fold 3: fitting knn with n_neighbors=7, metric=cosine, weights=distance
Fold 4: fitting knn with n_neighbors=7, metric=cosine, weights=distance


[I 2024-01-05 12:47:34,508] Trial 5 finished with value: 0.7689656843402353 and parameters: {'classifier': 'knn', 'n_clusters': 8192, 'n_neighbors': 7, 'metric': 'cosine', 'weight': 'distance'}. Best is trial 4 with value: 0.814358303392799.


Fold 0: fitting knn with n_neighbors=19, metric=hamming, weights=distance
Fold 1: fitting knn with n_neighbors=19, metric=hamming, weights=distance
Fold 2: fitting knn with n_neighbors=19, metric=hamming, weights=distance
Fold 3: fitting knn with n_neighbors=19, metric=hamming, weights=distance
Fold 4: fitting knn with n_neighbors=19, metric=hamming, weights=distance


[I 2024-01-05 12:56:17,687] Trial 6 finished with value: 0.22470976719071256 and parameters: {'classifier': 'knn', 'n_clusters': 16384, 'n_neighbors': 19, 'metric': 'hamming', 'weight': 'distance'}. Best is trial 4 with value: 0.814358303392799.


Fold 0: fitting knn with n_neighbors=7, metric=hamming, weights=uniform
Fold 1: fitting knn with n_neighbors=7, metric=hamming, weights=uniform
Fold 2: fitting knn with n_neighbors=7, metric=hamming, weights=uniform
Fold 3: fitting knn with n_neighbors=7, metric=hamming, weights=uniform
Fold 4: fitting knn with n_neighbors=7, metric=hamming, weights=uniform


[I 2024-01-05 12:58:30,916] Trial 7 finished with value: 0.23808643641876595 and parameters: {'classifier': 'knn', 'n_clusters': 4096, 'n_neighbors': 7, 'metric': 'hamming', 'weight': 'uniform'}. Best is trial 4 with value: 0.814358303392799.


Fold 0: fitting knn with n_neighbors=25, metric=minkowski, weights=uniform
Fold 1: fitting knn with n_neighbors=25, metric=minkowski, weights=uniform
Fold 2: fitting knn with n_neighbors=25, metric=minkowski, weights=uniform
Fold 3: fitting knn with n_neighbors=25, metric=minkowski, weights=uniform
Fold 4: fitting knn with n_neighbors=25, metric=minkowski, weights=uniform


[I 2024-01-05 13:02:33,789] Trial 8 finished with value: 0.563606155635397 and parameters: {'classifier': 'knn', 'n_clusters': 8192, 'n_neighbors': 25, 'metric': 'minkowski', 'weight': 'uniform'}. Best is trial 4 with value: 0.814358303392799.


Fold 0: fitting knn with n_neighbors=14, metric=euclidean, weights=uniform
Fold 1: fitting knn with n_neighbors=14, metric=euclidean, weights=uniform
Fold 2: fitting knn with n_neighbors=14, metric=euclidean, weights=uniform
Fold 3: fitting knn with n_neighbors=14, metric=euclidean, weights=uniform


[I 2024-01-05 13:02:39,595] Trial 9 finished with value: 0.7325033055734391 and parameters: {'classifier': 'knn', 'n_clusters': 32, 'n_neighbors': 14, 'metric': 'euclidean', 'weight': 'uniform'}. Best is trial 4 with value: 0.814358303392799.


Fold 4: fitting knn with n_neighbors=14, metric=euclidean, weights=uniform
Fold 0: fitting knn with n_neighbors=2, metric=minkowski, weights=uniform
Fold 1: fitting knn with n_neighbors=2, metric=minkowski, weights=uniform
Fold 2: fitting knn with n_neighbors=2, metric=minkowski, weights=uniform
Fold 3: fitting knn with n_neighbors=2, metric=minkowski, weights=uniform
Fold 4: fitting knn with n_neighbors=2, metric=minkowski, weights=uniform


[I 2024-01-05 13:03:47,449] Trial 10 finished with value: 0.7280471848975101 and parameters: {'classifier': 'knn', 'n_clusters': 2048, 'n_neighbors': 2, 'metric': 'minkowski', 'weight': 'uniform'}. Best is trial 4 with value: 0.814358303392799.


Fold 0: fitting knn with n_neighbors=11, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=11, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=11, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=11, metric=manhattan, weights=uniform


[I 2024-01-05 13:03:59,896] Trial 11 finished with value: 0.8151017978165909 and parameters: {'classifier': 'knn', 'n_clusters': 256, 'n_neighbors': 11, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 11 with value: 0.8151017978165909.


Fold 4: fitting knn with n_neighbors=11, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=12, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=12, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=12, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=12, metric=manhattan, weights=uniform


[I 2024-01-05 13:04:11,333] Trial 12 finished with value: 0.8117498425093282 and parameters: {'classifier': 'knn', 'n_clusters': 256, 'n_neighbors': 12, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 11 with value: 0.8151017978165909.


Fold 4: fitting knn with n_neighbors=12, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 4: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform


[I 2024-01-05 13:05:20,690] Trial 13 finished with value: 0.816214962652212 and parameters: {'classifier': 'knn', 'n_clusters': 2048, 'n_neighbors': 7, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 13 with value: 0.816214962652212.


Fold 0: fitting knn with n_neighbors=1, metric=cosine, weights=uniform
Fold 1: fitting knn with n_neighbors=1, metric=cosine, weights=uniform
Fold 2: fitting knn with n_neighbors=1, metric=cosine, weights=uniform
Fold 3: fitting knn with n_neighbors=1, metric=cosine, weights=uniform


[I 2024-01-05 13:05:40,305] Trial 14 finished with value: 0.723590371954892 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 1, 'metric': 'cosine', 'weight': 'uniform'}. Best is trial 13 with value: 0.816214962652212.


Fold 4: fitting knn with n_neighbors=1, metric=cosine, weights=uniform
Fold 0: fitting knn with n_neighbors=16, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=16, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=16, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=16, metric=manhattan, weights=uniform
Fold 4: fitting knn with n_neighbors=16, metric=manhattan, weights=uniform


[I 2024-01-05 13:06:16,259] Trial 15 finished with value: 0.8147279738046285 and parameters: {'classifier': 'knn', 'n_clusters': 1024, 'n_neighbors': 16, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 13 with value: 0.816214962652212.


Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform


[I 2024-01-05 13:06:24,171] Trial 16 finished with value: 0.7942687240832658 and parameters: {'classifier': 'knn', 'n_clusters': 128, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 13 with value: 0.816214962652212.


Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=10, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=10, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=10, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=10, metric=manhattan, weights=uniform
Fold 4: fitting knn with n_neighbors=10, metric=manhattan, weights=uniform


[I 2024-01-05 13:07:33,836] Trial 17 finished with value: 0.8165894789308634 and parameters: {'classifier': 'knn', 'n_clusters': 2048, 'n_neighbors': 10, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 17 with value: 0.8165894789308634.


Fold 0: fitting knn with n_neighbors=4, metric=minkowski, weights=distance
Fold 1: fitting knn with n_neighbors=4, metric=minkowski, weights=distance
Fold 2: fitting knn with n_neighbors=4, metric=minkowski, weights=distance
Fold 3: fitting knn with n_neighbors=4, metric=minkowski, weights=distance
Fold 4: fitting knn with n_neighbors=4, metric=minkowski, weights=distance


[I 2024-01-05 13:08:41,417] Trial 18 finished with value: 0.7448014233003122 and parameters: {'classifier': 'knn', 'n_clusters': 2048, 'n_neighbors': 4, 'metric': 'minkowski', 'weight': 'distance'}. Best is trial 17 with value: 0.8165894789308634.


Fold 0: fitting knn with n_neighbors=16, metric=cosine, weights=uniform
Fold 1: fitting knn with n_neighbors=16, metric=cosine, weights=uniform
Fold 2: fitting knn with n_neighbors=16, metric=cosine, weights=uniform
Fold 3: fitting knn with n_neighbors=16, metric=cosine, weights=uniform
Fold 4: fitting knn with n_neighbors=16, metric=cosine, weights=uniform


[I 2024-01-05 13:09:47,978] Trial 19 finished with value: 0.7764172429786851 and parameters: {'classifier': 'knn', 'n_clusters': 2048, 'n_neighbors': 16, 'metric': 'cosine', 'weight': 'uniform'}. Best is trial 17 with value: 0.8165894789308634.


Fold 0: fitting knn with n_neighbors=9, metric=hamming, weights=uniform
Fold 1: fitting knn with n_neighbors=9, metric=hamming, weights=uniform
Fold 2: fitting knn with n_neighbors=9, metric=hamming, weights=uniform
Fold 3: fitting knn with n_neighbors=9, metric=hamming, weights=uniform
Fold 4: fitting knn with n_neighbors=9, metric=hamming, weights=uniform


[I 2024-01-05 13:10:57,696] Trial 20 finished with value: 0.26264044360449423 and parameters: {'classifier': 'knn', 'n_clusters': 2048, 'n_neighbors': 9, 'metric': 'hamming', 'weight': 'uniform'}. Best is trial 17 with value: 0.8165894789308634.


Fold 0: fitting knn with n_neighbors=10, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=10, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=10, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=10, metric=manhattan, weights=uniform


[I 2024-01-05 13:11:09,156] Trial 21 finished with value: 0.8139865561809032 and parameters: {'classifier': 'knn', 'n_clusters': 256, 'n_neighbors': 10, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 17 with value: 0.8165894789308634.


Fold 4: fitting knn with n_neighbors=10, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform


[I 2024-01-05 13:11:14,227] Trial 22 finished with value: 0.747394654316629 and parameters: {'classifier': 'knn', 'n_clusters': 32, 'n_neighbors': 7, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 17 with value: 0.8165894789308634.


Fold 4: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=14, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=14, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=14, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=14, metric=manhattan, weights=uniform
Fold 4: fitting knn with n_neighbors=14, metric=manhattan, weights=uniform


[I 2024-01-05 13:11:50,540] Trial 23 finished with value: 0.8229140273999155 and parameters: {'classifier': 'knn', 'n_clusters': 1024, 'n_neighbors': 14, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 23 with value: 0.8229140273999155.


Fold 0: fitting knn with n_neighbors=15, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=15, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=15, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=15, metric=manhattan, weights=uniform
Fold 4: fitting knn with n_neighbors=15, metric=manhattan, weights=uniform


[I 2024-01-05 13:12:26,549] Trial 24 finished with value: 0.8151031823499686 and parameters: {'classifier': 'knn', 'n_clusters': 1024, 'n_neighbors': 15, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 23 with value: 0.8229140273999155.


Fold 0: fitting knn with n_neighbors=18, metric=euclidean, weights=uniform
Fold 1: fitting knn with n_neighbors=18, metric=euclidean, weights=uniform
Fold 2: fitting knn with n_neighbors=18, metric=euclidean, weights=uniform
Fold 3: fitting knn with n_neighbors=18, metric=euclidean, weights=uniform
Fold 4: fitting knn with n_neighbors=18, metric=euclidean, weights=uniform


[I 2024-01-05 13:13:01,931] Trial 25 finished with value: 0.7633846302949749 and parameters: {'classifier': 'knn', 'n_clusters': 1024, 'n_neighbors': 18, 'metric': 'euclidean', 'weight': 'uniform'}. Best is trial 23 with value: 0.8229140273999155.


Fold 0: fitting knn with n_neighbors=13, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=13, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=13, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=13, metric=manhattan, weights=distance


[I 2024-01-05 13:13:07,825] Trial 26 finished with value: 0.7894325489951749 and parameters: {'classifier': 'knn', 'n_clusters': 64, 'n_neighbors': 13, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 23 with value: 0.8229140273999155.


Fold 4: fitting knn with n_neighbors=13, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=8, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=8, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=8, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=8, metric=manhattan, weights=uniform


[I 2024-01-05 13:13:15,763] Trial 27 finished with value: 0.8035589430472194 and parameters: {'classifier': 'knn', 'n_clusters': 128, 'n_neighbors': 8, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 23 with value: 0.8229140273999155.


Fold 4: fitting knn with n_neighbors=8, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform


[I 2024-01-05 13:13:35,063] Trial 28 finished with value: 0.8169605338760704 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 4, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 23 with value: 0.8229140273999155.


Fold 4: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform


[I 2024-01-05 13:13:54,608] Trial 29 finished with value: 0.8173315888212775 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 3, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 23 with value: 0.8229140273999155.


Fold 4: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=4, metric=euclidean, weights=uniform
Fold 1: fitting knn with n_neighbors=4, metric=euclidean, weights=uniform
Fold 2: fitting knn with n_neighbors=4, metric=euclidean, weights=uniform
Fold 3: fitting knn with n_neighbors=4, metric=euclidean, weights=uniform


[I 2024-01-05 13:14:14,187] Trial 30 finished with value: 0.7648736959426249 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 4, 'metric': 'euclidean', 'weight': 'uniform'}. Best is trial 23 with value: 0.8229140273999155.


Fold 4: fitting knn with n_neighbors=4, metric=euclidean, weights=uniform
Fold 0: fitting knn with n_neighbors=2, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=2, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=2, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=2, metric=manhattan, weights=uniform


[I 2024-01-05 13:14:33,649] Trial 31 finished with value: 0.7775290232809287 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 2, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 23 with value: 0.8229140273999155.


Fold 4: fitting knn with n_neighbors=2, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform


[I 2024-01-05 13:14:52,906] Trial 32 finished with value: 0.8247699943926399 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 32 with value: 0.8247699943926399.


Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform


[I 2024-01-05 13:15:12,287] Trial 33 finished with value: 0.8247699943926399 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 32 with value: 0.8247699943926399.


Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform


[I 2024-01-05 13:15:31,634] Trial 34 finished with value: 0.8247699943926399 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 32 with value: 0.8247699943926399.


Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=distance


[I 2024-01-05 13:15:51,212] Trial 35 finished with value: 0.824773455726084 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 35 with value: 0.824773455726084.


Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=5, metric=cosine, weights=distance
Fold 1: fitting knn with n_neighbors=5, metric=cosine, weights=distance
Fold 2: fitting knn with n_neighbors=5, metric=cosine, weights=distance
Fold 3: fitting knn with n_neighbors=5, metric=cosine, weights=distance


[I 2024-01-05 13:16:11,423] Trial 36 finished with value: 0.7544530054758295 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 5, 'metric': 'cosine', 'weight': 'distance'}. Best is trial 35 with value: 0.824773455726084.


Fold 4: fitting knn with n_neighbors=5, metric=cosine, weights=distance
Fold 0: fitting knn with n_neighbors=6, metric=hamming, weights=distance
Fold 1: fitting knn with n_neighbors=6, metric=hamming, weights=distance
Fold 2: fitting knn with n_neighbors=6, metric=hamming, weights=distance
Fold 3: fitting knn with n_neighbors=6, metric=hamming, weights=distance


[I 2024-01-05 13:16:30,635] Trial 37 finished with value: 0.30989595231667044 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 6, 'metric': 'hamming', 'weight': 'distance'}. Best is trial 35 with value: 0.824773455726084.


Fold 4: fitting knn with n_neighbors=6, metric=hamming, weights=distance
Fold 0: fitting knn with n_neighbors=1, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=1, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=1, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=1, metric=manhattan, weights=distance


[I 2024-01-05 13:16:50,017] Trial 38 finished with value: 0.7912961309214762 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 1, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 35 with value: 0.824773455726084.


Fold 4: fitting knn with n_neighbors=1, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=5, metric=minkowski, weights=distance
Fold 1: fitting knn with n_neighbors=5, metric=minkowski, weights=distance
Fold 2: fitting knn with n_neighbors=5, metric=minkowski, weights=distance
Fold 3: fitting knn with n_neighbors=5, metric=minkowski, weights=distance
Fold 4: fitting knn with n_neighbors=5, metric=minkowski, weights=distance


[I 2024-01-05 13:24:46,622] Trial 39 finished with value: 0.4959301641364319 and parameters: {'classifier': 'knn', 'n_clusters': 16384, 'n_neighbors': 5, 'metric': 'minkowski', 'weight': 'distance'}. Best is trial 35 with value: 0.824773455726084.


Fold 0: fitting knn with n_neighbors=8, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=8, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=8, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=8, metric=manhattan, weights=distance


[I 2024-01-05 13:25:05,959] Trial 40 finished with value: 0.823655445023641 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 8, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 35 with value: 0.824773455726084.


Fold 4: fitting knn with n_neighbors=8, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=6, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=6, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=6, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=6, metric=manhattan, weights=distance


[I 2024-01-05 13:25:25,231] Trial 41 finished with value: 0.8258873128283941 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 6, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=6, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=3, metric=manhattan, weights=distance


[I 2024-01-05 13:25:44,731] Trial 42 finished with value: 0.8180778523118246 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 3, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=6, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=6, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=6, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=6, metric=manhattan, weights=distance
Fold 4: fitting knn with n_neighbors=6, metric=manhattan, weights=distance


[I 2024-01-05 13:27:56,907] Trial 43 finished with value: 0.8180792368452021 and parameters: {'classifier': 'knn', 'n_clusters': 4096, 'n_neighbors': 6, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 0: fitting knn with n_neighbors=6, metric=euclidean, weights=distance
Fold 1: fitting knn with n_neighbors=6, metric=euclidean, weights=distance
Fold 2: fitting knn with n_neighbors=6, metric=euclidean, weights=distance
Fold 3: fitting knn with n_neighbors=6, metric=euclidean, weights=distance
Fold 4: fitting knn with n_neighbors=6, metric=euclidean, weights=distance


[I 2024-01-05 13:31:59,850] Trial 44 finished with value: 0.6268634088596292 and parameters: {'classifier': 'knn', 'n_clusters': 8192, 'n_neighbors': 6, 'metric': 'euclidean', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 0: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=3, metric=manhattan, weights=distance


[I 2024-01-05 13:32:19,300] Trial 45 finished with value: 0.8180778523118246 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 3, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=8, metric=hamming, weights=distance
Fold 1: fitting knn with n_neighbors=8, metric=hamming, weights=distance
Fold 2: fitting knn with n_neighbors=8, metric=hamming, weights=distance
Fold 3: fitting knn with n_neighbors=8, metric=hamming, weights=distance


[I 2024-01-05 13:32:25,175] Trial 46 finished with value: 0.3705350529237884 and parameters: {'classifier': 'knn', 'n_clusters': 64, 'n_neighbors': 8, 'metric': 'hamming', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=8, metric=hamming, weights=distance
Fold 0: fitting knn with n_neighbors=2, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=2, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=2, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=2, metric=manhattan, weights=distance


[I 2024-01-05 13:32:44,584] Trial 47 finished with value: 0.7898084498072038 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 2, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=2, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=9, metric=minkowski, weights=distance
Fold 1: fitting knn with n_neighbors=9, metric=minkowski, weights=distance
Fold 2: fitting knn with n_neighbors=9, metric=minkowski, weights=distance
Fold 3: fitting knn with n_neighbors=9, metric=minkowski, weights=distance


[I 2024-01-05 13:33:03,983] Trial 48 finished with value: 0.7782725177047205 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 9, 'metric': 'minkowski', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=9, metric=minkowski, weights=distance
Fold 0: fitting knn with n_neighbors=6, metric=cosine, weights=distance
Fold 1: fitting knn with n_neighbors=6, metric=cosine, weights=distance
Fold 2: fitting knn with n_neighbors=6, metric=cosine, weights=distance
Fold 3: fitting knn with n_neighbors=6, metric=cosine, weights=distance
Fold 4: fitting knn with n_neighbors=6, metric=cosine, weights=distance


[I 2024-01-05 13:41:00,892] Trial 49 finished with value: 0.7678504427045476 and parameters: {'classifier': 'knn', 'n_clusters': 16384, 'n_neighbors': 6, 'metric': 'cosine', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 0: fitting knn with n_neighbors=22, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=22, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=22, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=22, metric=manhattan, weights=uniform


[I 2024-01-05 13:41:06,080] Trial 50 finished with value: 0.7473918852498735 and parameters: {'classifier': 'knn', 'n_clusters': 32, 'n_neighbors': 22, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=22, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=8, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=8, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=8, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=8, metric=manhattan, weights=distance


[I 2024-01-05 13:41:25,285] Trial 51 finished with value: 0.823655445023641 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 8, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=8, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=distance


[I 2024-01-05 13:41:44,698] Trial 52 finished with value: 0.824773455726084 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=distance


[I 2024-01-05 13:42:04,244] Trial 53 finished with value: 0.824773455726084 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=4, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=4, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=4, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=4, metric=manhattan, weights=distance
Fold 4: fitting knn with n_neighbors=4, metric=manhattan, weights=distance


[I 2024-01-05 13:44:15,963] Trial 54 finished with value: 0.8139879407142807 and parameters: {'classifier': 'knn', 'n_clusters': 4096, 'n_neighbors': 4, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 0: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=3, metric=manhattan, weights=distance


[I 2024-01-05 13:44:35,374] Trial 55 finished with value: 0.8180778523118246 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 3, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=11, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=11, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=11, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=11, metric=manhattan, weights=distance
Fold 4: fitting knn with n_neighbors=11, metric=manhattan, weights=distance


[I 2024-01-05 13:48:51,208] Trial 56 finished with value: 0.8124912601330536 and parameters: {'classifier': 'knn', 'n_clusters': 8192, 'n_neighbors': 11, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 0: fitting knn with n_neighbors=7, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=7, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=7, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=7, metric=manhattan, weights=distance


[I 2024-01-05 13:48:58,871] Trial 57 finished with value: 0.8046838764165507 and parameters: {'classifier': 'knn', 'n_clusters': 128, 'n_neighbors': 7, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=7, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=1, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=1, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=1, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=1, metric=manhattan, weights=distance


[I 2024-01-05 13:49:18,337] Trial 58 finished with value: 0.7912961309214762 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 1, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=1, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=5, metric=hamming, weights=distance
Fold 1: fitting knn with n_neighbors=5, metric=hamming, weights=distance
Fold 2: fitting knn with n_neighbors=5, metric=hamming, weights=distance
Fold 3: fitting knn with n_neighbors=5, metric=hamming, weights=distance


[I 2024-01-05 13:49:29,963] Trial 59 finished with value: 0.3374336289312095 and parameters: {'classifier': 'knn', 'n_clusters': 256, 'n_neighbors': 5, 'metric': 'hamming', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=5, metric=hamming, weights=distance
Fold 0: fitting knn with n_neighbors=10, metric=euclidean, weights=distance
Fold 1: fitting knn with n_neighbors=10, metric=euclidean, weights=distance
Fold 2: fitting knn with n_neighbors=10, metric=euclidean, weights=distance
Fold 3: fitting knn with n_neighbors=10, metric=euclidean, weights=distance


[I 2024-01-05 13:49:49,161] Trial 60 finished with value: 0.7771503534021447 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 10, 'metric': 'euclidean', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=10, metric=euclidean, weights=distance
Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform


[I 2024-01-05 13:50:08,936] Trial 61 finished with value: 0.8247699943926399 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform


[I 2024-01-05 13:50:28,360] Trial 62 finished with value: 0.8214194236187549 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 6, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform


[I 2024-01-05 13:50:47,766] Trial 63 finished with value: 0.8169605338760704 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 4, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform


[I 2024-01-05 13:51:07,220] Trial 64 finished with value: 0.8214187313520661 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 7, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=2, metric=minkowski, weights=distance
Fold 1: fitting knn with n_neighbors=2, metric=minkowski, weights=distance
Fold 2: fitting knn with n_neighbors=2, metric=minkowski, weights=distance
Fold 3: fitting knn with n_neighbors=2, metric=minkowski, weights=distance


[I 2024-01-05 13:51:13,509] Trial 65 finished with value: 0.7090721549569755 and parameters: {'classifier': 'knn', 'n_clusters': 64, 'n_neighbors': 2, 'metric': 'minkowski', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=2, metric=minkowski, weights=distance
Fold 0: fitting knn with n_neighbors=5, metric=cosine, weights=uniform
Fold 1: fitting knn with n_neighbors=5, metric=cosine, weights=uniform
Fold 2: fitting knn with n_neighbors=5, metric=cosine, weights=uniform
Fold 3: fitting knn with n_neighbors=5, metric=cosine, weights=uniform


[I 2024-01-05 13:51:19,301] Trial 66 finished with value: 0.7176348016309804 and parameters: {'classifier': 'knn', 'n_clusters': 32, 'n_neighbors': 5, 'metric': 'cosine', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=5, metric=cosine, weights=uniform
Fold 0: fitting knn with n_neighbors=25, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=25, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=25, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=25, metric=manhattan, weights=uniform


[I 2024-01-05 13:51:38,646] Trial 67 finished with value: 0.8121202051878464 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 25, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=25, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=4, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=4, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=4, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=4, metric=manhattan, weights=distance


[I 2024-01-05 13:51:58,157] Trial 68 finished with value: 0.8203117969166442 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 4, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=4, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 4: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform


[I 2024-01-05 14:00:22,563] Trial 69 finished with value: 0.7976158335237067 and parameters: {'classifier': 'knn', 'n_clusters': 16384, 'n_neighbors': 3, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 0: fitting knn with n_neighbors=9, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=9, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=9, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=9, metric=manhattan, weights=distance


[I 2024-01-05 14:00:30,450] Trial 70 finished with value: 0.8031948107689006 and parameters: {'classifier': 'knn', 'n_clusters': 128, 'n_neighbors': 9, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=9, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform


[I 2024-01-05 14:00:49,925] Trial 71 finished with value: 0.8247699943926399 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform


[I 2024-01-05 14:01:09,456] Trial 72 finished with value: 0.8169605338760704 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 4, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform


[I 2024-01-05 14:01:28,609] Trial 73 finished with value: 0.8214194236187549 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 6, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform


[I 2024-01-05 14:01:48,040] Trial 74 finished with value: 0.8214187313520661 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 7, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=19, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=19, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=19, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=19, metric=manhattan, weights=uniform


[I 2024-01-05 14:01:59,664] Trial 75 finished with value: 0.8098917987165375 and parameters: {'classifier': 'knn', 'n_clusters': 256, 'n_neighbors': 19, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=19, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform


[I 2024-01-05 14:02:36,454] Trial 76 finished with value: 0.815465237828221 and parameters: {'classifier': 'knn', 'n_clusters': 1024, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 0: fitting knn with n_neighbors=2, metric=euclidean, weights=uniform
Fold 1: fitting knn with n_neighbors=2, metric=euclidean, weights=uniform
Fold 2: fitting knn with n_neighbors=2, metric=euclidean, weights=uniform
Fold 3: fitting knn with n_neighbors=2, metric=euclidean, weights=uniform


[I 2024-01-05 14:02:56,056] Trial 77 finished with value: 0.7235896796882031 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 2, 'metric': 'euclidean', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=2, metric=euclidean, weights=uniform
Fold 0: fitting knn with n_neighbors=7, metric=hamming, weights=distance
Fold 1: fitting knn with n_neighbors=7, metric=hamming, weights=distance
Fold 2: fitting knn with n_neighbors=7, metric=hamming, weights=distance
Fold 3: fitting knn with n_neighbors=7, metric=hamming, weights=distance
Fold 4: fitting knn with n_neighbors=7, metric=hamming, weights=distance


[I 2024-01-05 14:05:07,436] Trial 78 finished with value: 0.23027766816888534 and parameters: {'classifier': 'knn', 'n_clusters': 4096, 'n_neighbors': 7, 'metric': 'hamming', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 0: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform


[I 2024-01-05 14:05:26,817] Trial 79 finished with value: 0.8173315888212775 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 3, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=6, metric=minkowski, weights=distance
Fold 1: fitting knn with n_neighbors=6, metric=minkowski, weights=distance
Fold 2: fitting knn with n_neighbors=6, metric=minkowski, weights=distance
Fold 3: fitting knn with n_neighbors=6, metric=minkowski, weights=distance
Fold 4: fitting knn with n_neighbors=6, metric=minkowski, weights=distance


[I 2024-01-05 14:09:29,575] Trial 80 finished with value: 0.6268634088596292 and parameters: {'classifier': 'knn', 'n_clusters': 8192, 'n_neighbors': 6, 'metric': 'minkowski', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform


[I 2024-01-05 14:09:48,996] Trial 81 finished with value: 0.8247699943926399 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform


[I 2024-01-05 14:10:08,550] Trial 82 finished with value: 0.8247699943926399 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform


[I 2024-01-05 14:10:28,323] Trial 83 finished with value: 0.8169605338760704 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 4, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform


[I 2024-01-05 14:10:47,814] Trial 84 finished with value: 0.8214194236187549 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 6, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=8, metric=cosine, weights=uniform
Fold 1: fitting knn with n_neighbors=8, metric=cosine, weights=uniform
Fold 2: fitting knn with n_neighbors=8, metric=cosine, weights=uniform
Fold 3: fitting knn with n_neighbors=8, metric=cosine, weights=uniform


[I 2024-01-05 14:11:07,179] Trial 85 finished with value: 0.7462738745474307 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 8, 'metric': 'cosine', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=8, metric=cosine, weights=uniform
Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform


[I 2024-01-05 14:12:16,442] Trial 86 finished with value: 0.813607886302119 and parameters: {'classifier': 'knn', 'n_clusters': 2048, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 0: fitting knn with n_neighbors=4, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=4, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=4, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=4, metric=manhattan, weights=distance


[I 2024-01-05 14:12:35,825] Trial 87 finished with value: 0.8203117969166442 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 4, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=4, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform


[I 2024-01-05 14:12:41,750] Trial 88 finished with value: 0.782360352502198 and parameters: {'classifier': 'knn', 'n_clusters': 64, 'n_neighbors': 7, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=7, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=3, metric=manhattan, weights=distance


[I 2024-01-05 14:13:01,152] Trial 89 finished with value: 0.8180778523118246 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 3, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=3, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform


[I 2024-01-05 14:13:20,839] Trial 90 finished with value: 0.8214194236187549 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 6, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform


[I 2024-01-05 14:13:40,039] Trial 91 finished with value: 0.8247699943926399 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform


[I 2024-01-05 14:13:59,462] Trial 92 finished with value: 0.8247699943926399 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 5, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=5, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform


[I 2024-01-05 14:14:18,833] Trial 93 finished with value: 0.8169605338760704 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 4, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=4, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform


[I 2024-01-05 14:14:24,045] Trial 94 finished with value: 0.7403245346237185 and parameters: {'classifier': 'knn', 'n_clusters': 32, 'n_neighbors': 6, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=6, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=8, metric=manhattan, weights=distance
Fold 1: fitting knn with n_neighbors=8, metric=manhattan, weights=distance
Fold 2: fitting knn with n_neighbors=8, metric=manhattan, weights=distance
Fold 3: fitting knn with n_neighbors=8, metric=manhattan, weights=distance


[I 2024-01-05 14:14:43,731] Trial 95 finished with value: 0.823655445023641 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 8, 'metric': 'manhattan', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=8, metric=manhattan, weights=distance
Fold 0: fitting knn with n_neighbors=2, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=2, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=2, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=2, metric=manhattan, weights=uniform


[I 2024-01-05 14:15:03,133] Trial 96 finished with value: 0.7775290232809287 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 2, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=2, metric=manhattan, weights=uniform
Fold 0: fitting knn with n_neighbors=5, metric=euclidean, weights=distance
Fold 1: fitting knn with n_neighbors=5, metric=euclidean, weights=distance
Fold 2: fitting knn with n_neighbors=5, metric=euclidean, weights=distance
Fold 3: fitting knn with n_neighbors=5, metric=euclidean, weights=distance
Fold 4: fitting knn with n_neighbors=5, metric=euclidean, weights=distance


[I 2024-01-05 14:22:58,051] Trial 97 finished with value: 0.4959301641364319 and parameters: {'classifier': 'knn', 'n_clusters': 16384, 'n_neighbors': 5, 'metric': 'euclidean', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 0: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 1: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 2: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 3: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform
Fold 4: fitting knn with n_neighbors=3, metric=manhattan, weights=uniform


[I 2024-01-05 14:23:34,275] Trial 98 finished with value: 0.808771711214028 and parameters: {'classifier': 'knn', 'n_clusters': 1024, 'n_neighbors': 3, 'metric': 'manhattan', 'weight': 'uniform'}. Best is trial 41 with value: 0.8258873128283941.


Fold 0: fitting knn with n_neighbors=7, metric=hamming, weights=distance
Fold 1: fitting knn with n_neighbors=7, metric=hamming, weights=distance
Fold 2: fitting knn with n_neighbors=7, metric=hamming, weights=distance
Fold 3: fitting knn with n_neighbors=7, metric=hamming, weights=distance


[I 2024-01-05 14:23:53,384] Trial 99 finished with value: 0.30952351283808577 and parameters: {'classifier': 'knn', 'n_clusters': 512, 'n_neighbors': 7, 'metric': 'hamming', 'weight': 'distance'}. Best is trial 41 with value: 0.8258873128283941.


Fold 4: fitting knn with n_neighbors=7, metric=hamming, weights=distance


{'classifier': 'knn',
 'n_clusters': 512,
 'n_neighbors': 6,
 'metric': 'manhattan',
 'weight': 'distance'}

### Gridsearch for SVC parameters ###

In [13]:
def objective(trial):

    n_clusters = trial.suggest_categorical('n_clusters', codebook_sizes)

    svm_kernel = trial.suggest_categorical('svm_kernel', ['rbf', 'poly', 'sigmoid'])
    svm_C = trial.suggest_categorical('svm_C', [0.01, 0.1, 1, 10, 100])
    svm_gamma = trial.suggest_categorical('svm_gamma', [0.05, 0.1, 0.15, 0.2, 0.25])

    metrics = []
    # Run k-fold cross validation per hyperparameter combination
    for i, fold in enumerate(folds, 0):

        train_data, train_labels, test_data, test_labels = fold
        
        codebook = codebooks[f'fold_{i}'][n_clusters]
        # create visual words for training data
        train_visual_words = create_visual_words(train_data, descriptors_dictionary, codebook)
        # create visual words for test data
        test_visual_words = create_visual_words(test_data, descriptors_dictionary, codebook)
        
        
        # fit SVM classifier
        print(f'Fold {i}: fitting svm with C={svm_C}, kernel={svm_kernel}, gamma={svm_gamma}, n_clusters={n_clusters}')
        svm = SVC(C=svm_C, kernel=svm_kernel, gamma=svm_gamma)
        svm.fit(train_visual_words, train_labels)
        # predict labels for test data
        pred_labels = svm.predict(test_visual_words)
    
        # get accuracy metric for current run
        metrics.append(get_metrics(test_labels, pred_labels)[0])

    return sum(metrics)/len(metrics)

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

study.best_params

[I 2024-01-05 18:58:13,226] A new study created in memory with name: no-name-1496ce56-3621-4308-939a-1b7fd28d6d1b


Fold 0: fitting svm with C=0.1, kernel=sigmoid, gamma=0.05, n_clusters=1024
Fold 1: fitting svm with C=0.1, kernel=sigmoid, gamma=0.05, n_clusters=1024
Fold 2: fitting svm with C=0.1, kernel=sigmoid, gamma=0.05, n_clusters=1024
Fold 3: fitting svm with C=0.1, kernel=sigmoid, gamma=0.05, n_clusters=1024
Fold 4: fitting svm with C=0.1, kernel=sigmoid, gamma=0.05, n_clusters=1024


[I 2024-01-05 18:59:01,688] Trial 0 finished with value: 0.15847507493786908 and parameters: {'n_clusters': 1024, 'svm_kernel': 'sigmoid', 'svm_C': 0.1, 'svm_gamma': 0.05}. Best is trial 0 with value: 0.15847507493786908.


Fold 0: fitting svm with C=1, kernel=sigmoid, gamma=0.05, n_clusters=4096
Fold 1: fitting svm with C=1, kernel=sigmoid, gamma=0.05, n_clusters=4096
Fold 2: fitting svm with C=1, kernel=sigmoid, gamma=0.05, n_clusters=4096
Fold 3: fitting svm with C=1, kernel=sigmoid, gamma=0.05, n_clusters=4096
Fold 4: fitting svm with C=1, kernel=sigmoid, gamma=0.05, n_clusters=4096


[I 2024-01-05 19:02:16,225] Trial 1 finished with value: 0.3467356164288731 and parameters: {'n_clusters': 4096, 'svm_kernel': 'sigmoid', 'svm_C': 1, 'svm_gamma': 0.05}. Best is trial 1 with value: 0.3467356164288731.


Fold 0: fitting svm with C=0.1, kernel=sigmoid, gamma=0.1, n_clusters=512
Fold 1: fitting svm with C=0.1, kernel=sigmoid, gamma=0.1, n_clusters=512
Fold 2: fitting svm with C=0.1, kernel=sigmoid, gamma=0.1, n_clusters=512
Fold 3: fitting svm with C=0.1, kernel=sigmoid, gamma=0.1, n_clusters=512
Fold 4: fitting svm with C=0.1, kernel=sigmoid, gamma=0.1, n_clusters=512


[I 2024-01-05 19:02:42,246] Trial 2 finished with value: 0.1528981744927416 and parameters: {'n_clusters': 512, 'svm_kernel': 'sigmoid', 'svm_C': 0.1, 'svm_gamma': 0.1}. Best is trial 1 with value: 0.3467356164288731.


Fold 0: fitting svm with C=10, kernel=sigmoid, gamma=0.05, n_clusters=64
Fold 1: fitting svm with C=10, kernel=sigmoid, gamma=0.05, n_clusters=64
Fold 2: fitting svm with C=10, kernel=sigmoid, gamma=0.05, n_clusters=64
Fold 3: fitting svm with C=10, kernel=sigmoid, gamma=0.05, n_clusters=64
Fold 4: fitting svm with C=10, kernel=sigmoid, gamma=0.05, n_clusters=64


[I 2024-01-05 19:02:50,067] Trial 3 finished with value: 0.1532699217046375 and parameters: {'n_clusters': 64, 'svm_kernel': 'sigmoid', 'svm_C': 10, 'svm_gamma': 0.05}. Best is trial 1 with value: 0.3467356164288731.


Fold 0: fitting svm with C=1, kernel=rbf, gamma=0.1, n_clusters=256
Fold 1: fitting svm with C=1, kernel=rbf, gamma=0.1, n_clusters=256
Fold 2: fitting svm with C=1, kernel=rbf, gamma=0.1, n_clusters=256
Fold 3: fitting svm with C=1, kernel=rbf, gamma=0.1, n_clusters=256
Fold 4: fitting svm with C=1, kernel=rbf, gamma=0.1, n_clusters=256


[I 2024-01-05 19:03:07,592] Trial 4 finished with value: 0.1532706139713263 and parameters: {'n_clusters': 256, 'svm_kernel': 'rbf', 'svm_C': 1, 'svm_gamma': 0.1}. Best is trial 1 with value: 0.3467356164288731.


Fold 0: fitting svm with C=1, kernel=poly, gamma=0.25, n_clusters=1024
Fold 1: fitting svm with C=1, kernel=poly, gamma=0.25, n_clusters=1024
Fold 2: fitting svm with C=1, kernel=poly, gamma=0.25, n_clusters=1024
Fold 3: fitting svm with C=1, kernel=poly, gamma=0.25, n_clusters=1024
Fold 4: fitting svm with C=1, kernel=poly, gamma=0.25, n_clusters=1024


[I 2024-01-05 19:03:55,650] Trial 5 finished with value: 0.832958817054682 and parameters: {'n_clusters': 1024, 'svm_kernel': 'poly', 'svm_C': 1, 'svm_gamma': 0.25}. Best is trial 5 with value: 0.832958817054682.


Fold 0: fitting svm with C=100, kernel=sigmoid, gamma=0.05, n_clusters=16384
Fold 1: fitting svm with C=100, kernel=sigmoid, gamma=0.05, n_clusters=16384
Fold 2: fitting svm with C=100, kernel=sigmoid, gamma=0.05, n_clusters=16384
Fold 3: fitting svm with C=100, kernel=sigmoid, gamma=0.05, n_clusters=16384
Fold 4: fitting svm with C=100, kernel=sigmoid, gamma=0.05, n_clusters=16384


[I 2024-01-05 19:17:43,810] Trial 6 finished with value: 0.5204620187881179 and parameters: {'n_clusters': 16384, 'svm_kernel': 'sigmoid', 'svm_C': 100, 'svm_gamma': 0.05}. Best is trial 5 with value: 0.832958817054682.


Fold 0: fitting svm with C=0.1, kernel=poly, gamma=0.25, n_clusters=128
Fold 1: fitting svm with C=0.1, kernel=poly, gamma=0.25, n_clusters=128
Fold 2: fitting svm with C=0.1, kernel=poly, gamma=0.25, n_clusters=128
Fold 3: fitting svm with C=0.1, kernel=poly, gamma=0.25, n_clusters=128
Fold 4: fitting svm with C=0.1, kernel=poly, gamma=0.25, n_clusters=128


[I 2024-01-05 19:17:53,242] Trial 7 finished with value: 0.8307234879164849 and parameters: {'n_clusters': 128, 'svm_kernel': 'poly', 'svm_C': 0.1, 'svm_gamma': 0.25}. Best is trial 5 with value: 0.832958817054682.


Fold 0: fitting svm with C=0.1, kernel=poly, gamma=0.1, n_clusters=256
Fold 1: fitting svm with C=0.1, kernel=poly, gamma=0.1, n_clusters=256
Fold 2: fitting svm with C=0.1, kernel=poly, gamma=0.1, n_clusters=256
Fold 3: fitting svm with C=0.1, kernel=poly, gamma=0.1, n_clusters=256
Fold 4: fitting svm with C=0.1, kernel=poly, gamma=0.1, n_clusters=256


[I 2024-01-05 19:18:08,336] Trial 8 finished with value: 0.8385371020331872 and parameters: {'n_clusters': 256, 'svm_kernel': 'poly', 'svm_C': 0.1, 'svm_gamma': 0.1}. Best is trial 8 with value: 0.8385371020331872.


Fold 0: fitting svm with C=1, kernel=poly, gamma=0.05, n_clusters=16384
Fold 1: fitting svm with C=1, kernel=poly, gamma=0.05, n_clusters=16384
Fold 2: fitting svm with C=1, kernel=poly, gamma=0.05, n_clusters=16384
Fold 3: fitting svm with C=1, kernel=poly, gamma=0.05, n_clusters=16384
Fold 4: fitting svm with C=1, kernel=poly, gamma=0.05, n_clusters=16384


[I 2024-01-05 19:39:51,042] Trial 9 finished with value: 0.3526849563525853 and parameters: {'n_clusters': 16384, 'svm_kernel': 'poly', 'svm_C': 1, 'svm_gamma': 0.05}. Best is trial 8 with value: 0.8385371020331872.


Fold 0: fitting svm with C=0.01, kernel=rbf, gamma=0.15, n_clusters=32
Fold 1: fitting svm with C=0.01, kernel=rbf, gamma=0.15, n_clusters=32
Fold 2: fitting svm with C=0.01, kernel=rbf, gamma=0.15, n_clusters=32
Fold 3: fitting svm with C=0.01, kernel=rbf, gamma=0.15, n_clusters=32
Fold 4: fitting svm with C=0.01, kernel=rbf, gamma=0.15, n_clusters=32


[I 2024-01-05 19:39:58,611] Trial 10 finished with value: 0.1525264272808457 and parameters: {'n_clusters': 32, 'svm_kernel': 'rbf', 'svm_C': 0.01, 'svm_gamma': 0.15}. Best is trial 8 with value: 0.8385371020331872.


Fold 0: fitting svm with C=0.01, kernel=poly, gamma=0.2, n_clusters=2048
Fold 1: fitting svm with C=0.01, kernel=poly, gamma=0.2, n_clusters=2048
Fold 2: fitting svm with C=0.01, kernel=poly, gamma=0.2, n_clusters=2048
Fold 3: fitting svm with C=0.01, kernel=poly, gamma=0.2, n_clusters=2048
Fold 4: fitting svm with C=0.01, kernel=poly, gamma=0.2, n_clusters=2048


[I 2024-01-05 19:41:40,475] Trial 11 finished with value: 0.7775241774141071 and parameters: {'n_clusters': 2048, 'svm_kernel': 'poly', 'svm_C': 0.01, 'svm_gamma': 0.2}. Best is trial 8 with value: 0.8385371020331872.


Fold 0: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256
Fold 1: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256
Fold 2: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256
Fold 3: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256
Fold 4: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256


[I 2024-01-05 19:41:56,721] Trial 12 finished with value: 0.8385371020331872 and parameters: {'n_clusters': 256, 'svm_kernel': 'poly', 'svm_C': 100, 'svm_gamma': 0.25}. Best is trial 8 with value: 0.8385371020331872.


Fold 0: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256
Fold 1: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256
Fold 2: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256
Fold 3: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256
Fold 4: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256


[I 2024-01-05 19:42:12,931] Trial 13 finished with value: 0.8385371020331872 and parameters: {'n_clusters': 256, 'svm_kernel': 'poly', 'svm_C': 100, 'svm_gamma': 0.25}. Best is trial 8 with value: 0.8385371020331872.


Fold 0: fitting svm with C=100, kernel=poly, gamma=0.1, n_clusters=256
Fold 1: fitting svm with C=100, kernel=poly, gamma=0.1, n_clusters=256
Fold 2: fitting svm with C=100, kernel=poly, gamma=0.1, n_clusters=256
Fold 3: fitting svm with C=100, kernel=poly, gamma=0.1, n_clusters=256
Fold 4: fitting svm with C=100, kernel=poly, gamma=0.1, n_clusters=256


[I 2024-01-05 19:42:27,422] Trial 14 finished with value: 0.8385371020331872 and parameters: {'n_clusters': 256, 'svm_kernel': 'poly', 'svm_C': 100, 'svm_gamma': 0.1}. Best is trial 8 with value: 0.8385371020331872.


Fold 0: fitting svm with C=10, kernel=poly, gamma=0.15, n_clusters=8192
Fold 1: fitting svm with C=10, kernel=poly, gamma=0.15, n_clusters=8192
Fold 2: fitting svm with C=10, kernel=poly, gamma=0.15, n_clusters=8192
Fold 3: fitting svm with C=10, kernel=poly, gamma=0.15, n_clusters=8192
Fold 4: fitting svm with C=10, kernel=poly, gamma=0.15, n_clusters=8192


[I 2024-01-05 19:48:55,017] Trial 15 finished with value: 0.48029047510262857 and parameters: {'n_clusters': 8192, 'svm_kernel': 'poly', 'svm_C': 10, 'svm_gamma': 0.15}. Best is trial 8 with value: 0.8385371020331872.


Fold 0: fitting svm with C=0.1, kernel=rbf, gamma=0.2, n_clusters=256
Fold 1: fitting svm with C=0.1, kernel=rbf, gamma=0.2, n_clusters=256
Fold 2: fitting svm with C=0.1, kernel=rbf, gamma=0.2, n_clusters=256
Fold 3: fitting svm with C=0.1, kernel=rbf, gamma=0.2, n_clusters=256
Fold 4: fitting svm with C=0.1, kernel=rbf, gamma=0.2, n_clusters=256


[I 2024-01-05 19:49:10,218] Trial 16 finished with value: 0.1525264272808457 and parameters: {'n_clusters': 256, 'svm_kernel': 'rbf', 'svm_C': 0.1, 'svm_gamma': 0.2}. Best is trial 8 with value: 0.8385371020331872.


Fold 0: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256
Fold 1: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256
Fold 2: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256
Fold 3: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256
Fold 4: fitting svm with C=100, kernel=poly, gamma=0.25, n_clusters=256


[I 2024-01-05 19:49:24,650] Trial 17 finished with value: 0.8385371020331872 and parameters: {'n_clusters': 256, 'svm_kernel': 'poly', 'svm_C': 100, 'svm_gamma': 0.25}. Best is trial 8 with value: 0.8385371020331872.


Fold 0: fitting svm with C=100, kernel=poly, gamma=0.1, n_clusters=64
Fold 1: fitting svm with C=100, kernel=poly, gamma=0.1, n_clusters=64
Fold 2: fitting svm with C=100, kernel=poly, gamma=0.1, n_clusters=64
Fold 3: fitting svm with C=100, kernel=poly, gamma=0.1, n_clusters=64


[I 2024-01-05 19:49:32,013] Trial 18 finished with value: 0.7901781202190332 and parameters: {'n_clusters': 64, 'svm_kernel': 'poly', 'svm_C': 100, 'svm_gamma': 0.1}. Best is trial 8 with value: 0.8385371020331872.


Fold 4: fitting svm with C=100, kernel=poly, gamma=0.1, n_clusters=64
Fold 0: fitting svm with C=0.1, kernel=poly, gamma=0.1, n_clusters=4096
Fold 1: fitting svm with C=0.1, kernel=poly, gamma=0.1, n_clusters=4096
Fold 2: fitting svm with C=0.1, kernel=poly, gamma=0.1, n_clusters=4096
Fold 3: fitting svm with C=0.1, kernel=poly, gamma=0.1, n_clusters=4096
Fold 4: fitting svm with C=0.1, kernel=poly, gamma=0.1, n_clusters=4096


[I 2024-01-05 19:52:54,632] Trial 19 finished with value: 0.643603801928655 and parameters: {'n_clusters': 4096, 'svm_kernel': 'poly', 'svm_C': 0.1, 'svm_gamma': 0.1}. Best is trial 8 with value: 0.8385371020331872.


Fold 0: fitting svm with C=0.01, kernel=rbf, gamma=0.25, n_clusters=8192


[W 2024-01-05 19:54:55,538] Trial 20 failed with parameters: {'n_clusters': 8192, 'svm_kernel': 'rbf', 'svm_C': 0.01, 'svm_gamma': 0.25} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/georg/projects/university/C2_Optimization_and_Inference/venv/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_49809/553763206.py", line 17, in objective
    train_visual_words = create_visual_words(train_data, descriptors_dictionary, codebook)
  File "/tmp/ipykernel_49809/4024483382.py", line 6, in create_visual_words
    labels = codebook.predict(descriptor)
  File "/home/georg/projects/university/C2_Optimization_and_Inference/venv/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py", line 1111, in predict
    labels = _labels_inertia_threadpool_limit(
  File "/home/georg/projects/university/C2_Optimization_and_Inference/venv/lib/python3.10/site-packages/skle

KeyboardInterrupt: 

In [26]:
# initialize a dictionary to store the codebooks for each fold
codebooks = {'fold_0': {}, 'fold_1': {}, 'fold_2': {}, 'fold_3': {}, 'fold_4': {}}

# define the codebook sizes we want to test
codebook_sizes = [32, 64, 128, 256, 512, 1024]

# loop over folds
for i, fold in enumerate(folds):
    train_data, train_labels, test_data, test_labels = fold
    train_descriptors = []

    # fetch the descriptors for the training images in the current fold
    for file in train_data:
        train_descriptors.append(descriptors_dictionary[file])

    # stack the descriptors vertically to create a single numpy array for clustering
    train_D = np.vstack(list(train_descriptors)) 

    # loop over codebook sizes
    for k in tqdm(codebook_sizes, desc=f'Creating Codebooks for fold {i}'):
        # initialize the codebook
        codebook = MiniBatchKMeans(n_clusters=k, 
                           verbose=False, 
                           batch_size=2150,
                           compute_labels=False,
                           reassignment_ratio=0.01,
                           random_state=42)
        # fit the codebook
        codebook.fit(train_D)
        # store the codebook in the dictionary
        codebooks[f'fold_{i}'][k] = codebook

Creating Codebooks for fold 0: 100%|██████████| 6/6 [01:06<00:00, 11.02s/it]
Creating Codebooks for fold 1: 100%|██████████| 6/6 [01:08<00:00, 11.34s/it]
Creating Codebooks for fold 2: 100%|██████████| 6/6 [01:14<00:00, 12.42s/it]
Creating Codebooks for fold 3: 100%|██████████| 6/6 [01:06<00:00, 11.01s/it]
Creating Codebooks for fold 4: 100%|██████████| 6/6 [01:08<00:00, 11.38s/it]


The previous kernel has been interrupted because it was run on a different device and took too long to re-run. The outputs can be found in SVC_gridsearch.ipynb

### Gridsearch PCA parameters for KNN ###

In [27]:
pca_param = [16, 32, 64, 128, 256]
n_clusters = 512
n_neighbors = 6
distance = 'manhattan'
weights = 'distance'

for val in pca_param:
    pca_accuracies = []
    print(f"\nPCA Components = {val}")
    for i, fold in enumerate(folds, 0):

        train_data, train_labels, test_data, test_labels = fold
        
        codebook = codebooks[f'fold_{i}'][n_clusters]
        # create visual words for training data
        train_visual_words = create_visual_words(train_data, descriptors_dictionary, codebook)
        # create visual words for test data
        test_visual_words = create_visual_words(test_data, descriptors_dictionary, codebook)

        pca = PCA(n_components=int(val))
        VWpca = pca.fit_transform(train_visual_words)
        knnpca = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, metric=distance)
        knnpca.fit(VWpca, train_labels) 
        vwtestpca = pca.transform(test_visual_words)
        accuracy = 100*knnpca.score(vwtestpca, test_labels)

        print(f'Fold {i} accuracy: {accuracy}')
        pca_accuracies.append(accuracy)

    print('Average Accuracy: ', np.mean(pca_accuracies))


PCA Components = 16
Fold 0 accuracy: 76.95167286245353
Fold 1 accuracy: 79.5539033457249
Fold 2 accuracy: 73.97769516728626
Fold 3 accuracy: 77.09497206703911
Fold 4 accuracy: 75.23277467411546
Average Accuracy:  76.56220362332385

PCA Components = 32
Fold 0 accuracy: 79.182156133829
Fold 1 accuracy: 79.73977695167285
Fold 2 accuracy: 77.32342007434944
Fold 3 accuracy: 76.16387337057728
Fold 4 accuracy: 78.02607076350093
Average Accuracy:  78.08705945878589

PCA Components = 64
Fold 0 accuracy: 79.36802973977694
Fold 1 accuracy: 77.88104089219331
Fold 2 accuracy: 74.90706319702602
Fold 3 accuracy: 74.86033519553072
Fold 4 accuracy: 76.35009310986965
Average Accuracy:  76.67331242687933

PCA Components = 128
Fold 0 accuracy: 75.46468401486989
Fold 1 accuracy: 77.32342007434944
Fold 2 accuracy: 73.23420074349443
Fold 3 accuracy: 75.41899441340783
Fold 4 accuracy: 76.35009310986965
Average Accuracy:  75.55827847119825

PCA Components = 256
Fold 0 accuracy: 75.09293680297398
Fold 1 accura

### Gridsearch LDA parameters for KNN ###

In [28]:

lda_param = [1, 3, 5, 7]
      
for val in lda_param:
        lda_accuracies = []
        print(f"\nLDA Components = {val}")
        for i, fold in enumerate(folds, 0):

                train_data, train_labels, test_data, test_labels = fold
                
                codebook = codebooks[f'fold_{i}'][n_clusters]
                # create visual words for training data
                train_visual_words = create_visual_words(train_data, descriptors_dictionary, codebook)
                # create visual words for test data
                test_visual_words = create_visual_words(test_data, descriptors_dictionary, codebook)

                
                lda = LinearDiscriminantAnalysis(n_components=val)
                VWlda = lda.fit_transform(train_visual_words, train_labels)
                knnlda = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, metric=distance)
                knnlda.fit(VWlda, train_labels) 
                vwtestlda = lda.transform(test_visual_words)
                accuracy = 100*knnlda.score(vwtestlda, test_labels)

                
                print(f'Fold {i} accuracy: {accuracy}')
                lda_accuracies.append(accuracy)

        print('Average Accuracy: ', np.mean(lda_accuracies))


LDA Components = 1
Fold 0 accuracy: 35.87360594795539
Fold 1 accuracy: 29.739776951672862
Fold 2 accuracy: 36.43122676579926
Fold 3 accuracy: 32.774674115456236
Fold 4 accuracy: 30.353817504655495
Average Accuracy:  33.034620257107846

LDA Components = 3
Fold 0 accuracy: 72.11895910780669
Fold 1 accuracy: 70.4460966542751
Fold 2 accuracy: 70.817843866171
Fold 3 accuracy: 70.94972067039106
Fold 4 accuracy: 68.15642458100558
Average Accuracy:  70.49780897592989

LDA Components = 5
Fold 0 accuracy: 81.78438661710037
Fold 1 accuracy: 81.22676579925651
Fold 2 accuracy: 80.29739776951673
Fold 3 accuracy: 78.2122905027933
Fold 4 accuracy: 80.07448789571696
Average Accuracy:  80.31906571687678

LDA Components = 7
Fold 0 accuracy: 85.50185873605948
Fold 1 accuracy: 83.45724907063197
Fold 2 accuracy: 81.59851301115242
Fold 3 accuracy: 81.75046554934823
Fold 4 accuracy: 82.68156424581005
Average Accuracy:  82.99793012260042


### Gridsearch PCA parameters for SVM ###

In [29]:
#best combination parameters
n_clusters = 1024
pca_param = [16, 32, 64, 128, 256]
svc_C = 0.01
svc_gamma = 0.2
svc_kernel = 'linear'           

for val in pca_param:
        pca_accuracies = []
        print(f"\nPCA Components = {val}")
        for i, fold in enumerate(folds, 0):

                train_data, train_labels, test_data, test_labels = fold
                
                codebook = codebooks[f'fold_{i}'][n_clusters]
                # create visual words for training data
                train_visual_words = create_visual_words(train_data, descriptors_dictionary, codebook)
                # create visual words for test data
                test_visual_words = create_visual_words(test_data, descriptors_dictionary, codebook)


                pca = PCA(n_components=val)
                VWpca = pca.fit_transform(train_visual_words)
                svcpca = SVC(kernel=svc_kernel, C=svc_C, gamma=svc_gamma)
                svcpca.fit(VWpca, train_labels) 
                vwtestpca = pca.transform(test_visual_words)
                accuracy = 100*svcpca.score(vwtestpca, test_labels)

                print(f'Fold {i} accuracy: {accuracy}')
                pca_accuracies.append(accuracy)

        print('Average Accuracy: ', np.mean(pca_accuracies))


PCA Components = 16
Fold 0 accuracy: 83.08550185873605
Fold 1 accuracy: 85.31598513011153
Fold 2 accuracy: 78.81040892193309
Fold 3 accuracy: 79.14338919925513
Fold 4 accuracy: 79.70204841713222
Average Accuracy:  81.2114667054336

PCA Components = 32
Fold 0 accuracy: 83.27137546468401
Fold 1 accuracy: 85.87360594795538
Fold 2 accuracy: 80.29739776951673
Fold 3 accuracy: 81.00558659217877
Fold 4 accuracy: 81.00558659217877
Average Accuracy:  82.29071047330272

PCA Components = 64
Fold 0 accuracy: 85.31598513011153
Fold 1 accuracy: 83.27137546468401
Fold 2 accuracy: 80.66914498141264
Fold 3 accuracy: 83.9851024208566
Fold 4 accuracy: 82.12290502793296
Average Accuracy:  83.07290260499954

PCA Components = 128
Fold 0 accuracy: 86.05947955390334
Fold 1 accuracy: 84.20074349442379
Fold 2 accuracy: 81.59851301115242
Fold 3 accuracy: 82.49534450651768
Fold 4 accuracy: 83.42644320297951
Average Accuracy:  83.55610475379535

PCA Components = 256
Fold 0 accuracy: 86.80297397769516
Fold 1 accur

### Gridsearch LDA parameters for SVM ###

In [30]:
#best combination parameters
n_clusters = 1024
lda_param = [1, 3, 5, 7]
svc_C = 0.01
svc_gamma = 0.2
svc_kernel = 'linear'           

for val in lda_param:
        lda_accuracies = []
        print(f"\nLDA Components = {val}")
        for i, fold in enumerate(folds, 0):

                train_data, train_labels, test_data, test_labels = fold
                
                codebook = codebooks[f'fold_{i}'][n_clusters]
                # create visual words for training data
                train_visual_words = create_visual_words(train_data, descriptors_dictionary, codebook)
                # create visual words for test data
                test_visual_words = create_visual_words(test_data, descriptors_dictionary, codebook)

                
                lda = LinearDiscriminantAnalysis(n_components=val)
                VWlda = lda.fit_transform(train_visual_words, train_labels)
                svclda = SVC(kernel=svc_kernel, C=svc_C, gamma=svc_gamma)
                svclda.fit(VWlda, train_labels) 
                vwtestlda = lda.transform(test_visual_words)
                accuracy = 100*svclda.score(vwtestlda, test_labels)

                
                print(f'Fold {i} accuracy: {accuracy}')
                lda_accuracies.append(accuracy)

        print('Average Accuracy: ', np.mean(lda_accuracies))


LDA Components = 1
Fold 0 accuracy: 36.2453531598513
Fold 1 accuracy: 29.182156133828997
Fold 2 accuracy: 36.43122676579926
Fold 3 accuracy: 33.33333333333333
Fold 4 accuracy: 32.02979515828678
Average Accuracy:  33.44437291021993

LDA Components = 3
Fold 0 accuracy: 70.4460966542751
Fold 1 accuracy: 67.84386617100373
Fold 2 accuracy: 65.05576208178438
Fold 3 accuracy: 63.128491620111724
Fold 4 accuracy: 67.59776536312849
Average Accuracy:  66.81439637806068

LDA Components = 5
Fold 0 accuracy: 79.73977695167285
Fold 1 accuracy: 77.5092936802974
Fold 2 accuracy: 75.8364312267658
Fold 3 accuracy: 73.9292364990689
Fold 4 accuracy: 74.11545623836126
Average Accuracy:  76.22603891923325

LDA Components = 7
Fold 0 accuracy: 82.15613382899627
Fold 1 accuracy: 76.95167286245353
Fold 2 accuracy: 79.5539033457249
Fold 3 accuracy: 76.72253258845437
Fold 4 accuracy: 81.00558659217877
Average Accuracy:  79.27796584356157


### Test Spatial Pyramids for KNN ###

In [31]:
codebooks = {'fold_0': {}, 'fold_1': {}, 'fold_2': {}, 'fold_3': {}, 'fold_4': {}}
codebook_size = 512
levels = 3

Training_descriptors = {}

print(f'Spatial Pyramid with {levels} levels.')

for filename in tqdm(all_data):

    image = cv2.imread(filename)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    descriptors = []

    for level in range(levels):
        # Calculate the size of each cell in the pyramid
        cell_size = (image.shape[0] // (2 ** level), image.shape[1] // (2 ** level))

        # Divide the image into cells
        for i in range(0, image.shape[0], cell_size[0]):
            for j in range(0, image.shape[1], cell_size[1]):
                cell = image[i:i+cell_size[0], j:j+cell_size[1]]  # Extract cell
                # Perform feature extraction (You can use any desired method here, e.g., SIFT, SURF, HOG)
                # Example: using ORB feature detector
                descriptor = dense_sift(cell, step_size=8, kp_scale=12)
                if descriptor is not None:
                    descriptors.append(descriptor)  # Store descriptors for each cell

    # Combine features (Example: using concatenation)
    combined_features = np.concatenate(descriptors, axis=0)
    
    Training_descriptors[filename] = combined_features

metrics = []

for i, fold in enumerate(folds):
    train_data, train_labels, test_data, test_labels = fold
    train_descriptors = []

    for file in train_data:
        train_descriptors.append(Training_descriptors[file])
    train_D = np.vstack(list(train_descriptors)) 


    codebook = MiniBatchKMeans(n_clusters=codebook_size, 
                        verbose=False, 
                        batch_size=2150,
                        compute_labels=False,
                        reassignment_ratio=0.01,
                        random_state=42)
    
    codebook.fit(train_D)

    train_visual_words = create_visual_words(train_data, descriptors_dictionary, codebook)
    test_visual_words = create_visual_words(test_data, descriptors_dictionary, codebook)
    
    knn = KNeighborsClassifier(n_neighbors=6, n_jobs=-1, metric='manhattan', weights='distance')
    knn.fit(train_visual_words, train_labels)
    pred_labels = knn.predict(test_visual_words)

    accuracy = get_metrics(test_labels, pred_labels)[0]
    print(f'Fold {i} accuracy: {accuracy}')
    metrics.append(accuracy)

print('Average accuracy', np.mean(metrics))


Spatial Pyramid with 3 levels.


  0%|          | 0/2688 [00:00<?, ?it/s]

100%|██████████| 2688/2688 [04:40<00:00,  9.58it/s]


Fold 0 accuracy: 0.8327137546468402
Fold 1 accuracy: 0.8234200743494424
Fold 2 accuracy: 0.7973977695167286
Fold 3 accuracy: 0.7970204841713222
Fold 4 accuracy: 0.8286778398510242
Average accuracy 0.8158459845070715


### Test Spatial Pyramids for SVC ###

In [32]:
codebooks = {'fold_0': {}, 'fold_1': {}, 'fold_2': {}, 'fold_3': {}, 'fold_4': {}}
codebook_size = 1024
levels = 3

Training_descriptors = {}

print(f'Spatial Pyramid with {levels} levels.')

for filename in tqdm(all_data):

    image = cv2.imread(filename)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    descriptors = []

    for level in range(levels):
        # Calculate the size of each cell in the pyramid
        cell_size = (image.shape[0] // (2 ** level), image.shape[1] // (2 ** level))

        # Divide the image into cells
        for i in range(0, image.shape[0], cell_size[0]):
            for j in range(0, image.shape[1], cell_size[1]):
                cell = image[i:i+cell_size[0], j:j+cell_size[1]]  # Extract cell
                # Perform feature extraction (You can use any desired method here, e.g., SIFT, SURF, HOG)
                # Example: using ORB feature detector
                descriptor = dense_sift(cell, step_size=8, kp_scale=12)
                if descriptor is not None:
                    descriptors.append(descriptor)  # Store descriptors for each cell

    # Combine features (Example: using concatenation)
    combined_features = np.concatenate(descriptors, axis=0)
    
    Training_descriptors[filename] = combined_features
    
metrics = []
for i, fold in enumerate(folds):
    train_data, train_labels, test_data, test_labels = fold
    train_descriptors = []

    for file in train_data:
        train_descriptors.append(Training_descriptors[file])
    train_D = np.vstack(list(train_descriptors)) 


    codebook = MiniBatchKMeans(n_clusters=codebook_size, 
                        verbose=False, 
                        batch_size=2150,
                        compute_labels=False,
                        reassignment_ratio=0.01,
                        random_state=42)
    
    codebook.fit(train_D)

    train_visual_words = create_visual_words(train_data, descriptors_dictionary, codebook)
    test_visual_words = create_visual_words(test_data, descriptors_dictionary, codebook)
    
    svm = SVC(kernel=svc_kernel, C=svc_C, gamma=svc_gamma)
    svm.fit(train_visual_words, train_labels)

    pred_labels = svm.predict(test_visual_words)

    accuracy = get_metrics(test_labels, pred_labels)[0]
    print(f'Fold {i} accuracy: {accuracy}')
    metrics.append(accuracy)

print('Average accuracy', np.mean(metrics))

Spatial Pyramid with 3 levels.


100%|██████████| 2688/2688 [04:41<00:00,  9.53it/s]


Fold 0 accuracy: 0.8568773234200744
Fold 1 accuracy: 0.8513011152416357
Fold 2 accuracy: 0.828996282527881
Fold 3 accuracy: 0.8286778398510242
Fold 4 accuracy: 0.8342644320297952
Average accuracy 0.840023398614082
