In [1]:
from MetaLearning.ApplicationPhase import ApplicationPhase
from MetaLearning import MetaFeatureExtractor
from pathlib import Path
from pandas.core.common import SettingWithCopyWarning
import warnings
warnings.filterwarnings(category=RuntimeWarning, action="ignore")
warnings.filterwarnings(category=SettingWithCopyWarning, action="ignore")
import numpy as np
np.random.seed(0)
# Specify where to find our MKR
# TODO: How to fix the path issue?
# mkr_path = Path("/home/tschecds/automlclustering/src/MetaKnowledgeRepository/")
mkr_path = Path("/home/camilo/dev/ml2dac/src/MetaKnowledgeRepository")

# Specify meta-feature set to use. This is the set General+Stats+Info 
mf_set = MetaFeatureExtractor.meta_feature_sets[4]
# Create simple synthetic dataset
from sklearn.datasets import make_blobs
# We expect the data as numpy arrays
X,y = make_blobs(n_samples=1000, n_features=10, random_state=0)

# We also use a name to describe/identify this dataset
dataset_name = "simple_blobs_n1000_f10"
# Parameters of our approach. This can be customized
n_warmstarts = 5 # Number of warmstart configurations (has to be smaller than n_loops)
n_loops = 10 # Number of optimizer loops. This is n_loops = n_warmstarts + x
limit_cs = True # Reduces the search space to suitable algorithms, dependening on warmstart configurations
time_limit = 120 * 60 # Time limit of overall optimization --> Aborts earlier if n_loops not finished but time_limit reached
cvi = "predict" # We want to predict a cvi based on our meta-knowledge
ML2DAC = ApplicationPhase(mkr_path=mkr_path, mf_set=mf_set)
optimizer_result, additional_info = ML2DAC.optimize_with_meta_learning(X, n_warmstarts=n_warmstarts,
                                                                       n_optimizer_loops=n_loops, 
                                                                       limit_cs=limit_cs,
                                                                       cvi=cvi, time_limit=time_limit,
                                                                       dataset_name=dataset_name)
best_config_stats = optimizer_result.get_incumbent_stats()
from sklearn.metrics import adjusted_rand_score
predicted_labels = best_config_stats["labels"]
adjusted_rand_score(predicted_labels, y)

In [10]:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')
from sklearn.preprocessing import LabelEncoder

def plot_pca_comparison(X, y, labels, ari_score, save_path):
    """Generate and save PCA plots comparing original labels and cluster labels."""
    def encode_labels(data):
        """Encode labels only if they are non-numeric."""
        if np.issubdtype(data.dtype, np.number):  # Check if numeric
            return data  # No changes for numeric data
        encoder = LabelEncoder()
        return encoder.fit_transform(data)  # Encode non-numeric labels

    # Encode y and labels dynamically based on their type
    y_encoded = encode_labels(np.array(y))
    labels_encoded = encode_labels(np.array(labels))

    # Perform PCA
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X)

    # Plot
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))

    scatter1 = axes[0].scatter(X_pca[:, 0], X_pca[:, 1], c=y_encoded, cmap='viridis', edgecolor='k')
    axes[0].set_title('PCA with Original Labels')
    axes[0].legend(*scatter1.legend_elements(), title="Classes")

    scatter2 = axes[1].scatter(X_pca[:, 0], X_pca[:, 1], c=labels_encoded, cmap='plasma', edgecolor='k')
    axes[1].set_title(f'PCA with Cluster Labels\nARI: {ari_score:.2f}')
    axes[1].legend(*scatter2.legend_elements(), title="Clusters")

    plt.tight_layout()
    plt.savefig(save_path, dpi=300)
    plt.close(fig)

In [11]:
from MetaLearning.ApplicationPhase import ApplicationPhase
from MetaLearning import MetaFeatureExtractor
from pathlib import Path
from pandas.core.common import SettingWithCopyWarning
import warnings
import pandas as pd
import numpy as np
from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score, adjusted_rand_score
import os
import time

# Suppress warnings
warnings.filterwarnings(category=RuntimeWarning, action="ignore")
warnings.filterwarnings(category=SettingWithCopyWarning, action="ignore")
np.random.seed(0)
# Configuration
scoring_metric = "ml2dac"
validation_folder = "data/validation_csv"
results_file = f"results/{scoring_metric}/experiment_summary.csv"
output_folder = f"results/{scoring_metric}"
labels_folder = f"results/{scoring_metric}/labels"
os.makedirs(output_folder, exist_ok=True)
os.makedirs(labels_folder, exist_ok=True)
# Specify where to find the MetaKnowledgeRepository
mkr_path = Path("/home/camilo/dev/ml2dac/src/MetaKnowledgeRepository")

# Specify the meta-feature set to use (General+Stats+Info)
mf_set = MetaFeatureExtractor.meta_feature_sets[4]

# Initialize the ApplicationPhase instance
ML2DAC = ApplicationPhase(mkr_path=mkr_path, mf_set=mf_set)

# Parameters for the optimization process
n_warmstarts = 5
n_loops = 10
limit_cs = True
time_limit = 120 * 60  # 120 minutes time limit
cvi = "predict"  # Predict the CVI based on meta-knowledge

def get_processed_datasets(results_file):
    """Load processed datasets from results file."""
    if os.path.exists(results_file):
        df = pd.read_csv(results_file)
        return set(df['Dataset'])
    return set()

# Initialize the results file if it doesn't exist
if not os.path.exists(results_file):
    pd.DataFrame(columns=["Dataset", "Best_Pipeline", "silhouette_score", 
                          "davies_bouldin_score", "calinski_harabasz_score", 
                          "adjusted_rand_score", "Running_Time(s)"]
                ).to_csv(results_file, index=False)

processed_datasets = get_processed_datasets(results_file)

# Process each dataset in the directory
for dataset_name in os.listdir(validation_folder):
    try:
        start_time = time.time()

        # Load the dataset
        df = pd.read_csv(f"{validation_folder}/{dataset_name}")
        
        # Separate features and target (assuming the target is the last column)
        X = df.iloc[:, :-1].values
        y = df.iloc[:, -1].values
        print(f"\n{dataset_name}\n")
        # Perform the clustering optimization using ML2DAC
        optimizer_result, additional_info = ML2DAC.optimize_with_meta_learning(
            X, n_warmstarts=n_warmstarts, n_optimizer_loops=n_loops, 
            limit_cs=limit_cs, cvi=cvi, time_limit=time_limit,
            dataset_name=dataset_name
        )
        
        # Calculate the ARI using the predicted labels
        best_config_stats = optimizer_result.get_incumbent_stats()
        predicted_labels = best_config_stats["labels"]
        pipeline = best_config_stats['config']
        labels = np.array(predicted_labels)
        if(len(set(labels))>1):
            # Save labels to CSV for comparison
            labels_df = pd.DataFrame(labels, columns=["Cluster_Label"])
            labels_df.to_csv(f"{labels_folder}/{dataset_name.replace('.csv', '_labels.csv')}", index=False)
            
            sil = silhouette_score(X, labels)
            dbs = davies_bouldin_score(X, labels)
            chs = calinski_harabasz_score(X, labels)
        else:
            sil = None
            dbs = None
            chs = None
        ari = adjusted_rand_score(y, labels)
        running_time = round(time.time() - start_time, 2)
        # Save results
        results = pd.DataFrame({
            "Dataset": [dataset_name],
            "Best_Pipeline": [pipeline],
            "silhouette_score": [sil],
            "davies_bouldin_score": [dbs],
            "calinski_harabasz_score": [chs],
            "adjusted_rand_score": [ari],
            "Running_Time(s)": [running_time]
        })

        results.to_csv(results_file, mode="a", header=False, index=False)

        # Save PCA plot
        plot_pca_comparison(X, y, labels, ari, f"{output_folder}/{dataset_name.replace('.csv', '_pca.png')}")

    except Exception as e:
        print(f"Error processing {dataset_name}: {e}")


cluto-t7-10k.csv

----------------------------------
most similar dataset is: ['type=moons-k=2-n=10000-d=2-noise=0.1']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
19    {'algorithm': 'dbscan', 'eps': 0.2948792654336...
24    {'algorithm': 'dbscan', 'eps': 0.2937667056363...
16    {'algorithm': 'dbscan', 'eps': 0.3144172336672...
21    {'algorithm': 'dbscan', 'eps': 0.2990119756196...
47    {'algorithm': 'dbscan', 'eps': 0.2983573713374...
Name: config, dtype: object
--
selected algorithms: ['dbscan']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {dbscan}, Default: dbscan
    eps, Type: UniformFloat, Range: [0.1, 1.0], Default: 0.55, Q: 0.01
    min_samples, Type: UniformInteger, Range: [2, 200], Default: 101
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    eps | algorithm in {'dbscan'}
    min_samples | algorithm in {'dbsca



Obtained CVI score for SIL: -0.28289667337102564
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.29376670563637275,
  'min_samples': 189,
})

Obtained CVI score for SIL: -0.2821028198281443
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.31441723366726726,
  'min_samples': 190,
})

Obtained CVI score for SIL: -0.2984478853241852
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.55,
  'min_samples': 101,
})

Obtained CVI score for SIL: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.2990119756196803,
  'min_samples': 189,
})

Obtained CVI score for SIL: -0.2824922968766904
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.29835737133741924,
  'min_samples': 196,
})

Obtained CVI score for SIL: -0.28329567401299643
----
Executing Configuration: Configuration(values={
  'algorithm




segment.csv

----------------------------------




most similar dataset is: ['type=varied-k=10-n=1000-d=50-noise=0']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
9     {'algorithm': 'MBKMeans', 'n_clusters': 12}
13        {'algorithm': 'ward', 'n_clusters': 12}
11        {'algorithm': 'ward', 'n_clusters': 13}
6     {'algorithm': 'MBKMeans', 'n_clusters': 14}
7     {'algorithm': 'MBKMeans', 'n_clusters': 15}
Name: config, dtype: object
--
selected algorithms: ['MBKMeans', 'ward']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {MBKMeans, ward}, Default: MBKMeans
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'MBKMeans', 'ward'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {MBKMeans, ward}, Default: MBKMeans
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | a



Obtained CVI score for SIL: -0.3029821198349075
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 13,
})

Obtained CVI score for SIL: -0.3048083840033452
----
Executing Configuration: Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 14,
})

Obtained CVI score for SIL: -0.2890009851969409
----
Executing Configuration: Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 15,
})

Obtained CVI score for SIL: -0.2662614212741658
----
Executing Configuration: Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 6,
})

Obtained CVI score for SIL: -0.3239477607904614
----
Executing Configuration: Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 2,
})

Obtained CVI score for SIL: -0.36000053046617453
----
Executing Configuration: Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 3,
})

Obtained CVI score for SIL: -0.28913100115143964
----
Executing Configuration: Configuration(v




twodiamonds.csv

----------------------------------
most similar dataset is: ['type=moons-k=2-n=1000-d=2-noise=0.1']
--
selected cvi: DBCV (DBCV)
--
Selected Warmstart Configs:
35    {'algorithm': 'dbscan', 'eps': 0.3580144202486...
44    {'algorithm': 'dbscan', 'eps': 0.3602593667692...
37    {'algorithm': 'dbscan', 'eps': 0.3598762720999...
30    {'algorithm': 'dbscan', 'eps': 0.3567646981858...
32    {'algorithm': 'dbscan', 'eps': 0.3677481510090...
Name: config, dtype: object
--
selected algorithms: ['dbscan']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {dbscan}, Default: dbscan
    eps, Type: UniformFloat, Range: [0.1, 1.0], Default: 0.55, Q: 0.01
    min_samples, Type: UniformInteger, Range: [2, 200], Default: 101
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    eps | algorithm in {'dbscan'}
    min_samples | algorithm in {'dbscan'}
   



Obtained CVI score for DBCV: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.3602593667692515,
  'min_samples': 23,
})

Obtained CVI score for DBCV: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.35987627209999906,
  'min_samples': 17,
})

Obtained CVI score for DBCV: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.356764698185857,
  'min_samples': 25,
})

Obtained CVI score for DBCV: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.36774815100907543,
  'min_samples': 27,
})

Obtained CVI score for DBCV: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.66,
  'min_samples': 188,
})

Obtained CVI score for DBCV: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.93,
  'min_samples': 18




fourty.csv

----------------------------------
most similar dataset is: ['type=moons-k=2-n=1000-d=2-noise=0.1']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
35    {'algorithm': 'dbscan', 'eps': 0.3580144202486...
44    {'algorithm': 'dbscan', 'eps': 0.3602593667692...
37    {'algorithm': 'dbscan', 'eps': 0.3598762720999...
30    {'algorithm': 'dbscan', 'eps': 0.3567646981858...
32    {'algorithm': 'dbscan', 'eps': 0.3677481510090...
Name: config, dtype: object
--
selected algorithms: ['dbscan']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {dbscan}, Default: dbscan
    eps, Type: UniformFloat, Range: [0.1, 1.0], Default: 0.55, Q: 0.01
    min_samples, Type: UniformInteger, Range: [2, 200], Default: 101
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    eps | algorithm in {'dbscan'}
    min_samples | algorithm in {'dbscan'}
   



Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.66,
  'min_samples': 188,
})

Obtained CVI score for SIL: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.93,
  'min_samples': 18,
})

Obtained CVI score for SIL: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.42000000000000004,
  'min_samples': 62,
})

Obtained CVI score for SIL: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.31,
  'min_samples': 134,
})

Obtained CVI score for SIL: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.77,
  'min_samples': 47,
})

Obtained CVI score for SIL: 2147483647
----
----------------------------------
finished optimization
best obtained configuration is:
Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.3580144202486343,
  'min_samples': 22,
})






disk-6000n.csv

----------------------------------
most similar dataset is: ['type=moons-k=2-n=5000-d=2-noise=0.1']
--
selected cvi: Coggins Jain Index (CJI)
--
Selected Warmstart Configs:
7       {'algorithm': 'ward', 'n_clusters': 2}
30     {'algorithm': 'birch', 'n_clusters': 2}
61     {'algorithm': 'birch', 'n_clusters': 3}
46      {'algorithm': 'ward', 'n_clusters': 3}
0     {'algorithm': 'KMeans', 'n_clusters': 2}
Name: config, dtype: object
--
selected algorithms: ['ward', 'birch', 'KMeans']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {ward, birch, KMeans}, Default: ward
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'ward', 'birch', 'KMeans'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {ward, birch, KMeans}, Default: ward
    n_clusters, Type: UniformInteger, 



Obtained CVI score for CJI: -1.3881359177741064
----
Executing Configuration: Configuration(values={
  'algorithm': 'birch',
  'n_clusters': 2,
})

Obtained CVI score for CJI: -1.450164982789552
----
Executing Configuration: Configuration(values={
  'algorithm': 'birch',
  'n_clusters': 3,
})

Obtained CVI score for CJI: -1.719139099351649
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 3,
})

Obtained CVI score for CJI: -1.9611873610113066
----
Executing Configuration: Configuration(values={
  'algorithm': 'KMeans',
  'n_clusters': 2,
})

Obtained CVI score for CJI: -1.6263014456219194
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 124,
})

Obtained CVI score for CJI: -1.436430810151504
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 22,
})

Obtained CVI score for CJI: -1.7318700796769109
----
Executing Configuration: Configuration(values={
  'algorithm'



Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 8,
})

Obtained CVI score for CJI: -1.8626036424358392
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 95,
})

Obtained CVI score for CJI: -1.640167370327257
----
----------------------------------
finished optimization
best obtained configuration is:
Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 3,
})






wine.csv

----------------------------------
most similar dataset is: ['type=varied-k=10-n=1000-d=50-noise=0']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
9     {'algorithm': 'MBKMeans', 'n_clusters': 12}
13        {'algorithm': 'ward', 'n_clusters': 12}
11        {'algorithm': 'ward', 'n_clusters': 13}
6     {'algorithm': 'MBKMeans', 'n_clusters': 14}
7     {'algorithm': 'MBKMeans', 'n_clusters': 15}
Name: config, dtype: object
--
selected algorithms: ['MBKMeans', 'ward']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {MBKMeans, ward}, Default: MBKMeans
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'MBKMeans', 'ward'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {MBKMeans, ward}, Default: MBKMeans
    n_clusters, Type: UniformInteger, Range: [2, 200



Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 3,
})

Obtained CVI score for SIL: -0.2774439826952265
----
Executing Configuration: Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 185,
})

n_samples=178 should be >= n_clusters=185.
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 48,
})

Obtained CVI score for SIL: -0.16422123548840173
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 64,
})

Obtained CVI score for SIL: -0.15543563653387182
----
----------------------------------
finished optimization
best obtained configuration is:
Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 3,
})






balance-scale.csv

----------------------------------




most similar dataset is: ['type=gaussian-k=10-n=1000-d=10-noise=0']
--
selected cvi: DBCV (DBCV)
--
Selected Warmstart Configs:
74              {'algorithm': 'ward', 'n_clusters': 10}
64              {'algorithm': 'ward', 'n_clusters': 12}
25    {'algorithm': 'dbscan', 'eps': 0.7585251766955...
79               {'algorithm': 'ward', 'n_clusters': 9}
9           {'algorithm': 'MBKMeans', 'n_clusters': 16}
Name: config, dtype: object
--
selected algorithms: ['ward', 'dbscan', 'MBKMeans']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {ward, dbscan, MBKMeans}, Default: ward
    eps, Type: UniformFloat, Range: [0.1, 1.0], Default: 0.55, Q: 0.01
    min_samples, Type: UniformInteger, Range: [2, 200], Default: 101
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    eps | algorithm in {'dbscan'}
    min_samples | algorithm in {'dbscan'}
    n_clusters | al




aggregation.csv

----------------------------------
most similar dataset is: ['type=moons-k=2-n=1000-d=2-noise=0.1']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
35    {'algorithm': 'dbscan', 'eps': 0.3580144202486...
44    {'algorithm': 'dbscan', 'eps': 0.3602593667692...
37    {'algorithm': 'dbscan', 'eps': 0.3598762720999...
30    {'algorithm': 'dbscan', 'eps': 0.3567646981858...
32    {'algorithm': 'dbscan', 'eps': 0.3677481510090...
Name: config, dtype: object
--
selected algorithms: ['dbscan']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {dbscan}, Default: dbscan
    eps, Type: UniformFloat, Range: [0.1, 1.0], Default: 0.55, Q: 0.01
    min_samples, Type: UniformInteger, Range: [2, 200], Default: 101
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    eps | algorithm in {'dbscan'}
    min_samples | algorithm in {'dbscan'



Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.39,
  'min_samples': 52,
})

Obtained CVI score for SIL: -0.46286541769593215
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.37,
  'min_samples': 29,
})

Obtained CVI score for SIL: -0.5205640771665001
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.37,
  'min_samples': 33,
})

Obtained CVI score for SIL: -0.5205640771665001
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.37,
  'min_samples': 36,
})

Obtained CVI score for SIL: -0.483594380462706
----
----------------------------------
finished optimization
best obtained configuration is:
Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.36774815100907543,
  'min_samples': 27,
})






tae.csv

----------------------------------




most similar dataset is: ['type=varied-k=10-n=1000-d=30-noise=0']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
20    {'algorithm': 'MBKMeans', 'n_clusters': 21}
51       {'algorithm': 'birch', 'n_clusters': 67}
58        {'algorithm': 'ward', 'n_clusters': 81}
72    {'algorithm': 'MBKMeans', 'n_clusters': 67}
13         {'algorithm': 'GMM', 'n_clusters': 59}
Name: config, dtype: object
--
selected algorithms: ['MBKMeans', 'birch', 'ward', 'GMM']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {MBKMeans, birch, ward, GMM}, Default: MBKMeans
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'MBKMeans', 'birch', 'ward', 'GMM'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {MBKMeans, birch, ward, GMM}, Default: MBKMeans
    n_clusters, Type: UniformInteger, Rang




cassini.csv

----------------------------------
most similar dataset is: ['type=moons-k=2-n=1000-d=2-noise=0.1']
--
selected cvi: DBCV (DBCV)
--
Selected Warmstart Configs:
35    {'algorithm': 'dbscan', 'eps': 0.3580144202486...
44    {'algorithm': 'dbscan', 'eps': 0.3602593667692...
37    {'algorithm': 'dbscan', 'eps': 0.3598762720999...
30    {'algorithm': 'dbscan', 'eps': 0.3567646981858...
32    {'algorithm': 'dbscan', 'eps': 0.3677481510090...
Name: config, dtype: object
--
selected algorithms: ['dbscan']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {dbscan}, Default: dbscan
    eps, Type: UniformFloat, Range: [0.1, 1.0], Default: 0.55, Q: 0.01
    min_samples, Type: UniformInteger, Range: [2, 200], Default: 101
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    eps | algorithm in {'dbscan'}
    min_samples | algorithm in {'dbscan'}
    n_c



Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.66,
  'min_samples': 188,
})

Obtained CVI score for DBCV: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.93,
  'min_samples': 18,
})

Obtained CVI score for DBCV: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.42000000000000004,
  'min_samples': 62,
})

Obtained CVI score for DBCV: 0.1688537879790451
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.55,
  'min_samples': 101,
})

Obtained CVI score for DBCV: 0.09442718832245672
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.39,
  'min_samples': 103,
})

Obtained CVI score for DBCV: 2147483647
----
----------------------------------
finished optimization
best obtained configuration is:
Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.55,
  'min_samples': 101,
}




pathbased.csv

----------------------------------
most similar dataset is: ['type=varied-k=50-n=1000-d=10-noise=0']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
24    {'algorithm': 'ward', 'n_clusters': 53}
22    {'algorithm': 'ward', 'n_clusters': 54}
16    {'algorithm': 'ward', 'n_clusters': 55}
21    {'algorithm': 'ward', 'n_clusters': 58}
26    {'algorithm': 'ward', 'n_clusters': 48}
Name: config, dtype: object
--
selected algorithms: ['ward']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {ward}, Default: ward
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'ward'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {ward}, Default: ward
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'ward'}




Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 6,
})

Obtained CVI score for SIL: -0.41269814287492324
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 3,
})

Obtained CVI score for SIL: -0.5007977633074632
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 80,
})

Obtained CVI score for SIL: -0.4226486169798902
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 19,
})

Obtained CVI score for SIL: -0.3914208697726524
----
----------------------------------
finished optimization
best obtained configuration is:
Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 3,
})






iono.csv

----------------------------------




most similar dataset is: ['type=gaussian-k=50-n=1000-d=30-noise=0']
--
selected cvi: DBCV (DBCV)
--
Selected Warmstart Configs:
30         {'algorithm': 'GMM', 'n_clusters': 51}
11         {'algorithm': 'GMM', 'n_clusters': 52}
10         {'algorithm': 'GMM', 'n_clusters': 53}
13         {'algorithm': 'GMM', 'n_clusters': 49}
34    {'algorithm': 'MBKMeans', 'n_clusters': 49}
Name: config, dtype: object
--
selected algorithms: ['GMM', 'MBKMeans']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {GMM, MBKMeans}, Default: GMM
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'GMM', 'MBKMeans'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {GMM, MBKMeans}, Default: GMM
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'GMM



Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 52,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 53,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 49,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 49,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing




flame.csv

----------------------------------
most similar dataset is: ['type=gaussian-k=10-n=1000-d=10-noise=0']
--
selected cvi: DBCV (DBCV)
--
Selected Warmstart Configs:
74              {'algorithm': 'ward', 'n_clusters': 10}
64              {'algorithm': 'ward', 'n_clusters': 12}
25    {'algorithm': 'dbscan', 'eps': 0.7585251766955...
79               {'algorithm': 'ward', 'n_clusters': 9}
9           {'algorithm': 'MBKMeans', 'n_clusters': 16}
Name: config, dtype: object
--
selected algorithms: ['ward', 'dbscan', 'MBKMeans']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {ward, dbscan, MBKMeans}, Default: ward
    eps, Type: UniformFloat, Range: [0.1, 1.0], Default: 0.55, Q: 0.01
    min_samples, Type: UniformInteger, Range: [2, 200], Default: 101
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    eps | algorithm in {'dbscan'}
    min_sample



Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.45000000000000007,
  'min_samples': 60,
})

Obtained CVI score for DBCV: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 15,
})

Error occured: zero-size array to reduction operation minimum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 128,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 12,
})

Obtained CVI score for DBCV: 0.13743420527998196
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 2,
})

Obtained CVI score for DBCV: 0.3612613222314636
----
----------------------------------
finished optimization
best obtained configuration is:
Con




glass.csv

----------------------------------




most similar dataset is: ['type=gaussian-k=50-n=1000-d=30-noise=0']
--
selected cvi: DBCV (DBCV)
--
Selected Warmstart Configs:
30         {'algorithm': 'GMM', 'n_clusters': 51}
11         {'algorithm': 'GMM', 'n_clusters': 52}
10         {'algorithm': 'GMM', 'n_clusters': 53}
13         {'algorithm': 'GMM', 'n_clusters': 49}
34    {'algorithm': 'MBKMeans', 'n_clusters': 49}
Name: config, dtype: object
--
selected algorithms: ['GMM', 'MBKMeans']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {GMM, MBKMeans}, Default: GMM
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'GMM', 'MBKMeans'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {GMM, MBKMeans}, Default: GMM
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'GMM



Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 49,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 49,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 124,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 158,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Ex




sizes2.csv

----------------------------------
most similar dataset is: ['type=moons-k=2-n=1000-d=2-noise=0.1']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
35    {'algorithm': 'dbscan', 'eps': 0.3580144202486...
44    {'algorithm': 'dbscan', 'eps': 0.3602593667692...
37    {'algorithm': 'dbscan', 'eps': 0.3598762720999...
30    {'algorithm': 'dbscan', 'eps': 0.3567646981858...
32    {'algorithm': 'dbscan', 'eps': 0.3677481510090...
Name: config, dtype: object
--
selected algorithms: ['dbscan']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {dbscan}, Default: dbscan
    eps, Type: UniformFloat, Range: [0.1, 1.0], Default: 0.55, Q: 0.01
    min_samples, Type: UniformInteger, Range: [2, 200], Default: 101
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    eps | algorithm in {'dbscan'}
    min_samples | algorithm in {'dbscan'}
   



Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.36,
  'min_samples': 23,
})

Obtained CVI score for SIL: -0.5282104500826279
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.11,
  'min_samples': 22,
})

Obtained CVI score for SIL: 0.2676438489720733
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.29000000000000004,
  'min_samples': 22,
})

Obtained CVI score for SIL: -0.4608254592737417
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.36,
  'min_samples': 21,
})

Obtained CVI score for SIL: -0.5391583686958262
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.55,
  'min_samples': 101,
})

Obtained CVI score for SIL: -0.47036148747060336
----
----------------------------------
finished optimization
best obtained configuration is:
Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.36,
  




iris.csv

----------------------------------
most similar dataset is: ['type=gaussian-k=50-n=1000-d=30-noise=0']
--
selected cvi: DBCV (DBCV)
--
Selected Warmstart Configs:
30         {'algorithm': 'GMM', 'n_clusters': 51}
11         {'algorithm': 'GMM', 'n_clusters': 52}
10         {'algorithm': 'GMM', 'n_clusters': 53}
13         {'algorithm': 'GMM', 'n_clusters': 49}
34    {'algorithm': 'MBKMeans', 'n_clusters': 49}
Name: config, dtype: object
--
selected algorithms: ['GMM', 'MBKMeans']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {GMM, MBKMeans}, Default: GMM
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'GMM', 'MBKMeans'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {GMM, MBKMeans}, Default: GMM
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  C



Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 124,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 158,
})

n_samples=150 should be >= n_clusters=158.
Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 135,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 143,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 121,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV




engytime.csv

----------------------------------
most similar dataset is: ['type=moons-k=2-n=5000-d=2-noise=0.1']
--
selected cvi: DBCV (DBCV)
--
Selected Warmstart Configs:
7       {'algorithm': 'ward', 'n_clusters': 2}
30     {'algorithm': 'birch', 'n_clusters': 2}
61     {'algorithm': 'birch', 'n_clusters': 3}
46      {'algorithm': 'ward', 'n_clusters': 3}
0     {'algorithm': 'KMeans', 'n_clusters': 2}
Name: config, dtype: object
--
selected algorithms: ['ward', 'birch', 'KMeans']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {ward, birch, KMeans}, Default: ward
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'ward', 'birch', 'KMeans'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {ward, birch, KMeans}, Default: ward
    n_clusters, Type: UniformInteger, Range: [2, 200]



Obtained CVI score for DBCV: 0.7042805349549017
----
Executing Configuration: Configuration(values={
  'algorithm': 'birch',
  'n_clusters': 2,
})

Obtained CVI score for DBCV: 0.8822042235240426
----
Executing Configuration: Configuration(values={
  'algorithm': 'birch',
  'n_clusters': 3,
})

Obtained CVI score for DBCV: 0.8474292235551242
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 3,
})

Obtained CVI score for DBCV: 0.7590480418137573
----
Executing Configuration: Configuration(values={
  'algorithm': 'KMeans',
  'n_clusters': 2,
})

Obtained CVI score for DBCV: 0.7962809978928592
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 185,
})

Error occured: zero-size array to reduction operation minimum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 19,
})

Obtained CVI score for DBCV: 0.63633422551




elliptical_10_2.csv

----------------------------------
most similar dataset is: ['type=varied-k=10-n=1000-d=10-noise=0']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
41    {'algorithm': 'dbscan', 'eps': 0.9536790514390...
88    {'algorithm': 'dbscan', 'eps': 0.9059743386946...
99    {'algorithm': 'dbscan', 'eps': 0.8878634391450...
55    {'algorithm': 'dbscan', 'eps': 0.9703480015377...
62    {'algorithm': 'dbscan', 'eps': 0.8893681869594...
Name: config, dtype: object
--
selected algorithms: ['dbscan']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {dbscan}, Default: dbscan
    eps, Type: UniformFloat, Range: [0.1, 1.0], Default: 0.55, Q: 0.01
    min_samples, Type: UniformInteger, Range: [2, 200], Default: 101
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    eps | algorithm in {'dbscan'}
    min_samples | algorithm in {'db



Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.66,
  'min_samples': 188,
})

Obtained CVI score for SIL: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.93,
  'min_samples': 18,
})

Obtained CVI score for SIL: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.42000000000000004,
  'min_samples': 62,
})

Obtained CVI score for SIL: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.31,
  'min_samples': 134,
})

Obtained CVI score for SIL: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.77,
  'min_samples': 47,
})

Obtained CVI score for SIL: 2147483647
----
----------------------------------
finished optimization
best obtained configuration is:
Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.9536790514390626,
  'min_samples': 3,
})






sonar.csv

----------------------------------
most similar dataset is: ['type=gaussian-k=50-n=1000-d=50-noise=0']
--
selected cvi: DBCV (DBCV)
--
Selected Warmstart Configs:
80       {'algorithm': 'GMM', 'n_clusters': 59}
65     {'algorithm': 'birch', 'n_clusters': 60}
26      {'algorithm': 'ward', 'n_clusters': 43}
50     {'algorithm': 'birch', 'n_clusters': 42}
36    {'algorithm': 'KMeans', 'n_clusters': 94}
Name: config, dtype: object
--
selected algorithms: ['GMM', 'birch', 'ward', 'KMeans']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {GMM, birch, ward, KMeans}, Default: GMM
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'GMM', 'birch', 'ward', 'KMeans'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {GMM, birch, ward, KMeans}, Default: GMM
    n_clusters, Type: Unif



Executing Configuration: Configuration(values={
  'algorithm': 'birch',
  'n_clusters': 42,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'KMeans',
  'n_clusters': 94,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 124,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 158,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'KMeans',
  'n_clusters': 183,
})

Error occured: zero-size array




jain.csv

----------------------------------
most similar dataset is: ['type=varied-k=50-n=1000-d=10-noise=0']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
24    {'algorithm': 'ward', 'n_clusters': 53}
22    {'algorithm': 'ward', 'n_clusters': 54}
16    {'algorithm': 'ward', 'n_clusters': 55}
21    {'algorithm': 'ward', 'n_clusters': 58}
26    {'algorithm': 'ward', 'n_clusters': 48}
Name: config, dtype: object
--
selected algorithms: ['ward']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {ward}, Default: ward
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'ward'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {ward}, Default: ward
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'ward'}

Exec



Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 17,
})

Obtained CVI score for SIL: -0.3942726902655882
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 185,
})

Obtained CVI score for SIL: -0.36315455768382265
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 123,
})

Obtained CVI score for SIL: -0.39870451651376565
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 46,
})

Obtained CVI score for SIL: -0.3825096476179292
----
----------------------------------
finished optimization
best obtained configuration is:
Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 2,
})






sizes4.csv

----------------------------------
most similar dataset is: ['type=moons-k=2-n=1000-d=2-noise=0.1']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
35    {'algorithm': 'dbscan', 'eps': 0.3580144202486...
44    {'algorithm': 'dbscan', 'eps': 0.3602593667692...
37    {'algorithm': 'dbscan', 'eps': 0.3598762720999...
30    {'algorithm': 'dbscan', 'eps': 0.3567646981858...
32    {'algorithm': 'dbscan', 'eps': 0.3677481510090...
Name: config, dtype: object
--
selected algorithms: ['dbscan']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {dbscan}, Default: dbscan
    eps, Type: UniformFloat, Range: [0.1, 1.0], Default: 0.55, Q: 0.01
    min_samples, Type: UniformInteger, Range: [2, 200], Default: 101
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    eps | algorithm in {'dbscan'}
    min_samples | algorithm in {'dbscan'}
   



Obtained CVI score for SIL: -0.38053764844642207
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.66,
  'min_samples': 188,
})

Obtained CVI score for SIL: -0.5337504248083521
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.88,
  'min_samples': 187,
})

Obtained CVI score for SIL: -0.5490721335755361
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.85,
  'min_samples': 124,
})

Obtained CVI score for SIL: -0.5497977424706622
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.99,
  'min_samples': 146,
})

Obtained CVI score for SIL: -0.5474484918582702
----
----------------------------------
finished optimization
best obtained configuration is:
Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.85,
  'min_samples': 124,
})






3-spiral.csv

----------------------------------
most similar dataset is: ['type=varied-k=50-n=1000-d=10-noise=0']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
24    {'algorithm': 'ward', 'n_clusters': 53}
22    {'algorithm': 'ward', 'n_clusters': 54}
16    {'algorithm': 'ward', 'n_clusters': 55}
21    {'algorithm': 'ward', 'n_clusters': 58}
26    {'algorithm': 'ward', 'n_clusters': 48}
Name: config, dtype: object
--
selected algorithms: ['ward']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {ward}, Default: ward
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'ward'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {ward}, Default: ward
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'ward'}





Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 185,
})

Obtained CVI score for SIL: -0.24429751519574425
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 123,
})

Obtained CVI score for SIL: -0.36868160568804814
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 49,
})

Obtained CVI score for SIL: -0.45271094120543925
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 43,
})

Obtained CVI score for SIL: -0.44950533673198023
----
----------------------------------
finished optimization
best obtained configuration is:
Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 49,
})






cluto-t8-8k.csv

----------------------------------
most similar dataset is: ['type=moons-k=2-n=10000-d=2-noise=0.1']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
19    {'algorithm': 'dbscan', 'eps': 0.2948792654336...
24    {'algorithm': 'dbscan', 'eps': 0.2937667056363...
16    {'algorithm': 'dbscan', 'eps': 0.3144172336672...
21    {'algorithm': 'dbscan', 'eps': 0.2990119756196...
47    {'algorithm': 'dbscan', 'eps': 0.2983573713374...
Name: config, dtype: object
--
selected algorithms: ['dbscan']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {dbscan}, Default: dbscan
    eps, Type: UniformFloat, Range: [0.1, 1.0], Default: 0.55, Q: 0.01
    min_samples, Type: UniformInteger, Range: [2, 200], Default: 101
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    eps | algorithm in {'dbscan'}
    min_samples | algorithm in {'dbscan



Obtained CVI score for SIL: -0.37616854852822185
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.29376670563637275,
  'min_samples': 189,
})

Obtained CVI score for SIL: -0.37627045707256296
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.55,
  'min_samples': 101,
})

Obtained CVI score for SIL: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.31441723366726726,
  'min_samples': 190,
})

Obtained CVI score for SIL: -0.2737212851519864
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.2990119756196803,
  'min_samples': 189,
})

Obtained CVI score for SIL: -0.3871646843508146
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.29835737133741924,
  'min_samples': 196,
})

Obtained CVI score for SIL: -0.37774187029778467
----
Executing Configuration: Configuration(values={
  'algorith




compound.csv

----------------------------------
most similar dataset is: ['type=varied-k=10-n=1000-d=10-noise=0']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
41    {'algorithm': 'dbscan', 'eps': 0.9536790514390...
88    {'algorithm': 'dbscan', 'eps': 0.9059743386946...
99    {'algorithm': 'dbscan', 'eps': 0.8878634391450...
55    {'algorithm': 'dbscan', 'eps': 0.9703480015377...
62    {'algorithm': 'dbscan', 'eps': 0.8893681869594...
Name: config, dtype: object
--
selected algorithms: ['dbscan']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {dbscan}, Default: dbscan
    eps, Type: UniformFloat, Range: [0.1, 1.0], Default: 0.55, Q: 0.01
    min_samples, Type: UniformInteger, Range: [2, 200], Default: 101
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    eps | algorithm in {'dbscan'}
    min_samples | algorithm in {'dbscan'}




Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.66,
  'min_samples': 188,
})

Obtained CVI score for SIL: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.93,
  'min_samples': 18,
})

Obtained CVI score for SIL: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.42000000000000004,
  'min_samples': 62,
})

Obtained CVI score for SIL: -0.2983830329457819
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.55,
  'min_samples': 101,
})

Obtained CVI score for SIL: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.31,
  'min_samples': 134,
})

Obtained CVI score for SIL: 2147483647
----
----------------------------------
finished optimization
best obtained configuration is:
Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.42000000000000004,
  'min_samples': 62,





thy.csv

----------------------------------
most similar dataset is: ['type=varied-k=10-n=1000-d=30-noise=0']
--
selected cvi: Calinski-Harabasz (CH)
--
Selected Warmstart Configs:
20    {'algorithm': 'MBKMeans', 'n_clusters': 21}
51       {'algorithm': 'birch', 'n_clusters': 67}
58        {'algorithm': 'ward', 'n_clusters': 81}
72    {'algorithm': 'MBKMeans', 'n_clusters': 67}
13         {'algorithm': 'GMM', 'n_clusters': 59}
Name: config, dtype: object
--
selected algorithms: ['MBKMeans', 'birch', 'ward', 'GMM']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {MBKMeans, birch, ward, GMM}, Default: MBKMeans
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'MBKMeans', 'birch', 'ward', 'GMM'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {MBKMeans, birch, ward, GMM}, Default: 



Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 152,
})

Obtained CVI score for CH: -204.7460779635293
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 83,
})

Obtained CVI score for CH: -137.78585473071104
----
Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 175,
})

Obtained CVI score for CH: -237.26713363756625
----
Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 155,
})

Obtained CVI score for CH: -177.08605705852452
----
----------------------------------
finished optimization
best obtained configuration is:
Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 175,
})






haberman.csv

----------------------------------




most similar dataset is: ['type=varied-k=10-n=1000-d=10-noise=0']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
41    {'algorithm': 'dbscan', 'eps': 0.9536790514390...
88    {'algorithm': 'dbscan', 'eps': 0.9059743386946...
99    {'algorithm': 'dbscan', 'eps': 0.8878634391450...
55    {'algorithm': 'dbscan', 'eps': 0.9703480015377...
62    {'algorithm': 'dbscan', 'eps': 0.8893681869594...
Name: config, dtype: object
--
selected algorithms: ['dbscan']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {dbscan}, Default: dbscan
    eps, Type: UniformFloat, Range: [0.1, 1.0], Default: 0.55, Q: 0.01
    min_samples, Type: UniformInteger, Range: [2, 200], Default: 101
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    eps | algorithm in {'dbscan'}
    min_samples | algorithm in {'dbscan'}
    n_clusters | algorithm in {}

Configuration sp




R15.csv

----------------------------------
most similar dataset is: ['type=moons-k=2-n=1000-d=2-noise=0.1']
--
selected cvi: DBCV (DBCV)
--
Selected Warmstart Configs:
35    {'algorithm': 'dbscan', 'eps': 0.3580144202486...
44    {'algorithm': 'dbscan', 'eps': 0.3602593667692...
37    {'algorithm': 'dbscan', 'eps': 0.3598762720999...
30    {'algorithm': 'dbscan', 'eps': 0.3567646981858...
32    {'algorithm': 'dbscan', 'eps': 0.3677481510090...
Name: config, dtype: object
--
selected algorithms: ['dbscan']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {dbscan}, Default: dbscan
    eps, Type: UniformFloat, Range: [0.1, 1.0], Default: 0.55, Q: 0.01
    min_samples, Type: UniformInteger, Range: [2, 200], Default: 101
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    eps | algorithm in {'dbscan'}
    min_samples | algorithm in {'dbscan'}
    n_clust



Obtained CVI score for DBCV: -0.7570394515921052
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.356764698185857,
  'min_samples': 25,
})

Obtained CVI score for DBCV: -0.7570394515921052
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.36774815100907543,
  'min_samples': 27,
})

Obtained CVI score for DBCV: -0.7570394515921052
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.66,
  'min_samples': 188,
})

Obtained CVI score for DBCV: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.42000000000000004,
  'min_samples': 103,
})

Obtained CVI score for DBCV: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.27,
  'min_samples': 48,
})

Obtained CVI score for DBCV: 2147483647
----
Executing Configuration: Configuration(values={
  'algorithm': 'dbscan',
  'eps': 0.23,
  'mi




arrhythmia.csv

----------------------------------




most similar dataset is: ['type=varied-k=10-n=1000-d=50-noise=0']
--
selected cvi: Silhouette (SIL)
--
Selected Warmstart Configs:
9     {'algorithm': 'MBKMeans', 'n_clusters': 12}
13        {'algorithm': 'ward', 'n_clusters': 12}
11        {'algorithm': 'ward', 'n_clusters': 13}
6     {'algorithm': 'MBKMeans', 'n_clusters': 14}
7     {'algorithm': 'MBKMeans', 'n_clusters': 15}
Name: config, dtype: object
--
selected algorithms: ['MBKMeans', 'ward']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {MBKMeans, ward}, Default: MBKMeans
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'MBKMeans', 'ward'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {MBKMeans, ward}, Default: MBKMeans
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | a



Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 3,
})

Obtained CVI score for SIL: -0.057403861350777435
----
Executing Configuration: Configuration(values={
  'algorithm': 'ward',
  'n_clusters': 2,
})

Obtained CVI score for SIL: -0.48340331097488487
----
Executing Configuration: Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 80,
})

Obtained CVI score for SIL: 0.05511774383690254
----
Executing Configuration: Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 19,
})

Obtained CVI score for SIL: 0.047583116762501
----
----------------------------------
finished optimization
best obtained configuration is:
Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 2,
})






ecoli.csv

----------------------------------




most similar dataset is: ['type=gaussian-k=50-n=1000-d=30-noise=0']
--
selected cvi: DBCV (DBCV)
--
Selected Warmstart Configs:
30         {'algorithm': 'GMM', 'n_clusters': 51}
11         {'algorithm': 'GMM', 'n_clusters': 52}
10         {'algorithm': 'GMM', 'n_clusters': 53}
13         {'algorithm': 'GMM', 'n_clusters': 49}
34    {'algorithm': 'MBKMeans', 'n_clusters': 49}
Name: config, dtype: object
--
selected algorithms: ['GMM', 'MBKMeans']
--
----------------------------------
starting the optimization
Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {GMM, MBKMeans}, Default: GMM
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'GMM', 'MBKMeans'}

Configuration space object:
  Hyperparameters:
    algorithm, Type: Categorical, Choices: {GMM, MBKMeans}, Default: GMM
    n_clusters, Type: UniformInteger, Range: [2, 200], Default: 2
  Conditions:
    n_clusters | algorithm in {'GMM



Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 53,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 49,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'MBKMeans',
  'n_clusters': 49,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executing Configuration: Configuration(values={
  'algorithm': 'GMM',
  'n_clusters': 124,
})

Error occured: zero-size array to reduction operation maximum which has no identity
Obtained CVI score for DBCV: 1.0
----
Executin

