# KMeans

In [6]:
import cluster_data_pca
from cluster_data_pca import run_clustering, normalize_data, unnormalize
import numpy as np
import pandas as pd
import os
from kmeans import k_means
from cluster_plotter import ClusterPlotter
from clustering_utils_pca import ClusterData
import cluster_plotter
import high_dim_analysis
import seaborn as sns
import matplotlib.pyplot as plt
import my_dbcv_module
import scores 

standard_year_ranges = {
    f"{start}-{start + 3}": np.arange(start, start + 4)
    for start in [2002, 2006, 2010, 2014, 2019]
}
standard_year_ranges["2019-2023"] = np.arange(2019, 2024)
running_ranges = cluster_data_pca.generate_running_year_ranges(2002, 2023, 4)

binned_data = cluster_data_pca.bin_data_for_clustering(running_ranges, print_res=False)

plot_dir = "Images\\k_means_running_bins_pca"
os.makedirs(plot_dir, exist_ok=True)
cluster_plotter.clear_directory(plot_dir)

for cluster_data, year_range in binned_data:
    print(f"\nRunning K-Means for Year Range: {year_range}")

    data_array = np.array([cluster_data.ecc, cluster_data.mag_obj, cluster_data.sem_maj,
                            cluster_data.diameter, cluster_data.inc, cluster_data.raan]).T
    feature_names = ["Eccentricity e", "Object magnitude [mag]", "Semi major axis [km]",
                     "Diameter [m]", "Inclination [°]", "RAAN [°]"]

    normalized_data, data_min, data_max = normalize_data(data_array)

    k_values = [3, 4, 5, 6, 7]
    results = []

    for k in k_values:
        result_kmeans, time_kmeans, n_clusters_kmeans, points_per_cluster_kmeans, metrics_kmeans = \
            run_clustering(
                k_means, f"K-means (k={k})", normalized_data, data_min, data_max, k, init='kmeans++'
            )

        labels = result_kmeans.labels
        # Compute DBCV score using custom Rust module
        dbcv_score = scores.DBCV_score_rust(result_kmeans)

        results.append({
            "Year Range": year_range,
            "k": k,
            "Runtime (s)": f"{time_kmeans:.3f}",
            "Clusters": n_clusters_kmeans,
            "Points per Cluster": points_per_cluster_kmeans,
            "DBCV Score": f"{dbcv_score:.3f}"
        })

        # Plot diagnostics
        high_dim_analysis.plot_correlation_heatmap(
            cluster_data,
            output_folder=plot_dir,
            filename=f"corr_{year_range}_k{k}.png"
        )
        high_dim_analysis.plot_mutual_information(
            cluster_data,
            labels,
            output_folder=plot_dir,
            filename=f"mi_{year_range}_k{k}.png"
        )

        from sklearn.feature_selection import f_classif
        X = np.array([cluster_data.ecc, cluster_data.mag_obj, cluster_data.sem_maj,
                      cluster_data.diameter, cluster_data.inc, cluster_data.raan]).T
        valid = labels != -1
        if np.any(valid) and len(np.unique(labels[valid])) > 1:
            f_vals, _ = f_classif(X[valid], labels[valid])
            mean_f = np.mean(f_vals)
        else:
            mean_f = 0.0

        results[-1]["Mean ANOVA F"] = f"{mean_f:.3f}"

    df = pd.DataFrame(results)
    df["Mean ANOVA F"] = pd.to_numeric(df["Mean ANOVA F"], errors="coerce")
    df_sorted = df.sort_values(by="Mean ANOVA F", ascending=False)
    best_params = df_sorted.iloc[0]
    best_k = best_params["k"]

    best_result, _, _, _, _ = run_clustering(
        k_means, f"K-means (k={best_k})", normalized_data, data_min, data_max, int(best_k), init='kmeans++'
    )
    best_labels = best_result.labels

    df_plot = pd.DataFrame(data_array, columns=feature_names)
    df_plot['cluster'] = best_labels.astype(str)

    pairplot = sns.pairplot(df_plot, hue='cluster', diag_kind='kde', plot_kws={'alpha': 0.6, 's': 8})
    pairplot.fig.suptitle(f"K-Means Clusters for Year Range {year_range}", y=1.02)
    pairplot.savefig(os.path.join(plot_dir, f"pairplot_{year_range}_k{best_k}.png"))
    plt.close(pairplot.fig)

    high_dim_analysis.plot_anova_f_values(
        cluster_data,
        best_labels,
        output_folder=plot_dir,
        filename=f"anova_best_{year_range}_k{best_k}.png",
        title=f"ANOVA F-values (Best KMeans Parameters), k = {int(best_k)}"
    )
    print("Best KMeans parameters based on ANOVA F-value:")
    print(best_params)

    display(df)



Running K-Means for Year Range: 2002-2005
Runtime for k_means: 0.018001 seconds
Runtime for k_means: 0.029042 seconds
Runtime for k_means: 0.128654 seconds
Runtime for k_means: 0.151001 seconds
Runtime for k_means: 0.221861 seconds
Runtime for k_means: 0.029860 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                              2002-2005
k                                               3
Runtime (s)                                 0.018
Clusters                                        3
Points per Cluster    {0: 3672, 1: 3149, 2: 1216}
DBCV Score                                 -0.832
Mean ANOVA F                            27762.699
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2002-2005,3,0.018,3,"{0: 3672, 1: 3149, 2: 1216}",-0.832,27762.699
1,2002-2005,4,0.029,4,"{0: 3155, 1: 1359, 2: 2315, 3: 1208}",-0.959,19750.659
2,2002-2005,5,0.129,5,"{0: 1669, 1: 1208, 2: 3154, 3: 772, 4: 1234}",-0.966,15255.199
3,2002-2005,6,0.151,6,"{0: 1236, 1: 1495, 2: 780, 3: 1658, 4: 2354, 5...",-0.967,12572.349
4,2002-2005,7,0.222,7,"{0: 786, 1: 4360, 2: 307, 3: 294, 4: 1102, 5: ...",-0.75,12223.131



Running K-Means for Year Range: 2003-2006
Runtime for k_means: 0.075371 seconds
Runtime for k_means: 0.065014 seconds
Runtime for k_means: 0.054575 seconds
Runtime for k_means: 0.090877 seconds
Runtime for k_means: 0.058519 seconds
Runtime for k_means: 0.011589 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                              2003-2006
k                                               3
Runtime (s)                                 0.075
Clusters                                        3
Points per Cluster    {0: 3874, 1: 3670, 2: 1335}
DBCV Score                                 -0.829
Mean ANOVA F                            28907.517
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2003-2006,3,0.075,3,"{0: 3874, 1: 3670, 2: 1335}",-0.829,28907.517
1,2003-2006,4,0.065,4,"{0: 2218, 1: 3871, 2: 1335, 3: 1455}",-0.967,20544.593
2,2003-2006,5,0.055,5,"{0: 1624, 1: 751, 2: 3870, 3: 1300, 4: 1334}",-0.963,16086.752
3,2003-2006,6,0.091,6,"{0: 1335, 1: 871, 2: 705, 3: 1524, 4: 3867, 5:...",-0.961,13683.29
4,2003-2006,7,0.059,7,"{0: 1236, 1: 3870, 2: 485, 3: 848, 4: 551, 5: ...",-0.947,14596.129



Running K-Means for Year Range: 2004-2007
Runtime for k_means: 0.032000 seconds
Runtime for k_means: 0.035104 seconds
Runtime for k_means: 0.024515 seconds
Runtime for k_means: 0.056045 seconds
Runtime for k_means: 0.092434 seconds
Runtime for k_means: 0.025704 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                              2004-2007
k                                               3
Runtime (s)                                 0.032
Clusters                                        3
Points per Cluster    {0: 2989, 1: 1366, 2: 4194}
DBCV Score                                 -0.828
Mean ANOVA F                             30434.94
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2004-2007,3,0.032,3,"{0: 2989, 1: 1366, 2: 4194}",-0.828,30434.94
1,2004-2007,4,0.035,4,"{0: 5555, 1: 1019, 2: 1315, 3: 660}",-0.693,20914.097
2,2004-2007,5,0.025,5,"{0: 5486, 1: 1440, 2: 918, 3: 630, 4: 75}",-0.873,15868.266
3,2004-2007,6,0.056,6,"{0: 1885, 1: 920, 2: 628, 3: 710, 4: 1443, 5: ...",-0.949,13666.651
4,2004-2007,7,0.092,7,"{0: 2970, 1: 717, 2: 1879, 3: 585, 4: 1274, 5:...",-0.951,11986.494



Running K-Means for Year Range: 2005-2008
Runtime for k_means: 0.023514 seconds
Runtime for k_means: 0.023001 seconds
Runtime for k_means: 0.067751 seconds
Runtime for k_means: 0.076110 seconds
Runtime for k_means: 0.089969 seconds
Runtime for k_means: 0.011999 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                             2005-2008
k                                              3
Runtime (s)                                0.024
Clusters                                       3
Points per Cluster    {0: 5097, 1: 1585, 2: 939}
DBCV Score                                -0.629
Mean ANOVA F                           27000.725
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2005-2008,3,0.024,3,"{0: 5097, 1: 1585, 2: 939}",-0.629,27000.725
1,2005-2008,4,0.023,4,"{0: 1245, 1: 3853, 2: 1583, 3: 940}",-0.965,18758.131
2,2005-2008,5,0.068,5,"{0: 748, 1: 5094, 2: 1141, 3: 111, 4: 527}",-0.604,13894.253
3,2005-2008,6,0.076,6,"{0: 720, 1: 5094, 2: 297, 3: 400, 4: 376, 5: 734}",-0.613,11780.883
4,2005-2008,7,0.09,7,"{0: 1501, 1: 1602, 2: 1225, 3: 1379, 4: 530, 5...",-0.955,10316.799



Running K-Means for Year Range: 2006-2009
Runtime for k_means: 0.020972 seconds
Runtime for k_means: 0.033664 seconds
Runtime for k_means: 0.110733 seconds
Runtime for k_means: 0.139174 seconds
Runtime for k_means: 0.080952 seconds
Runtime for k_means: 0.014998 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                              2006-2009
k                                               3
Runtime (s)                                 0.021
Clusters                                        3
Points per Cluster    {0: 2345, 1: 1206, 2: 3928}
DBCV Score                                 -0.827
Mean ANOVA F                            26452.612
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2006-2009,3,0.021,3,"{0: 2345, 1: 1206, 2: 3928}",-0.827,26452.612
1,2006-2009,4,0.034,4,"{0: 3924, 1: 1473, 2: 873, 3: 1209}",-0.973,18385.466
2,2006-2009,5,0.111,5,"{0: 3925, 1: 1145, 2: 1208, 3: 499, 4: 702}",-0.964,14494.448
3,2006-2009,6,0.139,6,"{0: 3927, 1: 466, 2: 677, 3: 1090, 4: 113, 5: ...",-0.957,11818.187
4,2006-2009,7,0.081,7,"{0: 1562, 1: 1144, 2: 498, 3: 1489, 4: 702, 5:...",-0.943,10227.49



Running K-Means for Year Range: 2007-2010
Runtime for k_means: 0.008024 seconds
Runtime for k_means: 0.023539 seconds
Runtime for k_means: 0.034697 seconds
Runtime for k_means: 0.039666 seconds
Runtime for k_means: 0.064383 seconds
Runtime for k_means: 0.007997 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                             2007-2010
k                                              3
Runtime (s)                                0.008
Clusters                                       3
Points per Cluster    {0: 4049, 1: 622, 2: 1015}
DBCV Score                                -0.510
Mean ANOVA F                           20061.874
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2007-2010,3,0.008,3,"{0: 4049, 1: 622, 2: 1015}",-0.51,20061.874
1,2007-2010,4,0.024,4,"{0: 622, 1: 3058, 2: 993, 3: 1013}",-0.965,14077.315
2,2007-2010,5,0.035,5,"{0: 829, 1: 3059, 2: 470, 3: 335, 4: 993}",-0.949,11002.77
3,2007-2010,6,0.04,6,"{0: 3059, 1: 392, 2: 731, 3: 224, 4: 992, 5: 288}",-0.948,8976.629
4,2007-2010,7,0.064,7,"{0: 526, 1: 470, 2: 765, 3: 309, 4: 2163, 5: 9...",-0.934,7579.549



Running K-Means for Year Range: 2008-2011
Runtime for k_means: 0.011004 seconds
Runtime for k_means: 0.008540 seconds
Runtime for k_means: 0.009999 seconds
Runtime for k_means: 0.059696 seconds
Runtime for k_means: 0.022311 seconds
Runtime for k_means: 0.005001 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                            2008-2011
k                                             3
Runtime (s)                               0.011
Clusters                                      3
Points per Cluster    {0: 2857, 1: 692, 2: 515}
DBCV Score                               -0.447
Mean ANOVA F                          13281.066
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2008-2011,3,0.011,3,"{0: 2857, 1: 692, 2: 515}",-0.447,13281.066
1,2008-2011,4,0.009,4,"{0: 2114, 1: 691, 2: 515, 3: 744}",-0.943,9228.152
2,2008-2011,5,0.01,5,"{0: 2852, 1: 526, 2: 361, 3: 90, 4: 235}",-0.434,11161.558
3,2008-2011,6,0.06,6,"{0: 913, 1: 515, 2: 793, 3: 691, 4: 829, 5: 323}",-0.925,5766.923
4,2008-2011,7,0.022,7,"{0: 740, 1: 501, 2: 363, 3: 48, 4: 2112, 5: 90...",-0.932,7116.854



Running K-Means for Year Range: 2009-2012
Runtime for k_means: 0.008085 seconds
Runtime for k_means: 0.010042 seconds
Runtime for k_means: 0.007997 seconds
Runtime for k_means: 0.023951 seconds
Runtime for k_means: 0.036630 seconds
Runtime for k_means: 0.004006 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                             2009-2012
k                                              3
Runtime (s)                                0.008
Clusters                                       3
Points per Cluster    {0: 1837, 1: 1080, 2: 643}
DBCV Score                                -0.812
Mean ANOVA F                           11684.736
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2009-2012,3,0.008,3,"{0: 1837, 1: 1080, 2: 643}",-0.812,11684.736
1,2009-2012,4,0.01,4,"{0: 399, 1: 1837, 2: 681, 3: 643}",-0.928,8585.809
2,2009-2012,5,0.008,5,"{0: 873, 1: 995, 2: 612, 3: 619, 4: 461}",-0.93,6214.502
3,2009-2012,6,0.024,6,"{0: 1283, 1: 336, 2: 375, 3: 861, 4: 219, 5: 486}",-0.94,5263.214
4,2009-2012,7,0.037,7,"{0: 2395, 1: 165, 2: 81, 3: 181, 4: 34, 5: 327...",-0.887,4318.808



Running K-Means for Year Range: 2010-2013
Runtime for k_means: 0.002972 seconds
Runtime for k_means: 0.003055 seconds
Runtime for k_means: 0.010049 seconds
Runtime for k_means: 0.006002 seconds
Runtime for k_means: 0.017975 seconds
Runtime for k_means: 0.003987 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                           2010-2013
k                                            3
Runtime (s)                              0.003
Clusters                                     3
Points per Cluster    {0: 830, 1: 454, 2: 295}
DBCV Score                              -0.802
Mean ANOVA F                           4990.92
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2010-2013,3,0.003,3,"{0: 830, 1: 454, 2: 295}",-0.802,4990.92
1,2010-2013,4,0.003,4,"{0: 295, 1: 254, 2: 200, 3: 830}",-0.918,3440.948
2,2010-2013,5,0.01,5,"{0: 588, 1: 395, 2: 230, 3: 224, 4: 142}",-0.94,2599.567
3,2010-2013,6,0.006,6,"{0: 828, 1: 135, 2: 296, 3: 56, 4: 175, 5: 89}",-0.902,2422.534
4,2010-2013,7,0.018,7,"{0: 588, 1: 79, 2: 54, 3: 141, 4: 130, 5: 395,...",-0.92,1924.668



Running K-Means for Year Range: 2011-2014
Runtime for k_means: 0.010378 seconds
Runtime for k_means: 0.005001 seconds
Runtime for k_means: 0.002996 seconds
Runtime for k_means: 0.004397 seconds
Runtime for k_means: 0.003999 seconds
Runtime for k_means: 0.002971 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                          2011-2014
k                                           3
Runtime (s)                             0.010
Clusters                                    3
Points per Cluster    {0: 72, 1: 172, 2: 622}
DBCV Score                             -0.559
Mean ANOVA F                         2702.294
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2011-2014,3,0.01,3,"{0: 72, 1: 172, 2: 622}",-0.559,2702.294
1,2011-2014,4,0.005,4,"{0: 213, 1: 244, 2: 284, 3: 125}",-0.759,1789.724
2,2011-2014,5,0.003,5,"{0: 172, 1: 278, 2: 131, 3: 213, 4: 72}",-0.871,1496.507
3,2011-2014,6,0.004,6,"{0: 160, 1: 44, 2: 462, 3: 106, 4: 80, 5: 14}",-0.886,1126.825
4,2011-2014,7,0.004,7,"{0: 462, 1: 160, 2: 30, 3: 42, 4: 70, 5: 94, 6...",-0.859,993.049



Running K-Means for Year Range: 2012-2015
Runtime for k_means: 0.002002 seconds
Runtime for k_means: 0.004015 seconds
Runtime for k_means: 0.002506 seconds
Runtime for k_means: 0.005984 seconds
Runtime for k_means: 0.004983 seconds
Runtime for k_means: 0.003004 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                           2012-2015
k                                            3
Runtime (s)                              0.002
Clusters                                     3
Points per Cluster    {0: 408, 1: 298, 2: 164}
DBCV Score                              -0.767
Mean ANOVA F                          2631.497
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2012-2015,3,0.002,3,"{0: 408, 1: 298, 2: 164}",-0.767,2631.497
1,2012-2015,4,0.004,4,"{0: 78, 1: 150, 2: 70, 3: 572}",-0.611,1819.274
2,2012-2015,5,0.003,5,"{0: 147, 1: 194, 2: 221, 3: 104, 4: 204}",-0.865,1424.13
3,2012-2015,6,0.006,6,"{0: 80, 1: 147, 2: 266, 3: 152, 4: 66, 5: 159}",-0.854,1186.544
4,2012-2015,7,0.005,7,"{0: 358, 1: 107, 2: 64, 3: 89, 4: 44, 5: 150, ...",-0.848,1042.301



Running K-Means for Year Range: 2013-2016
Runtime for k_means: 0.003001 seconds
Runtime for k_means: 0.011996 seconds
Runtime for k_means: 0.008515 seconds
Runtime for k_means: 0.005001 seconds
Runtime for k_means: 0.007511 seconds
Runtime for k_means: 0.005486 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                           2013-2016
k                                            3
Runtime (s)                              0.003
Clusters                                     3
Points per Cluster    {0: 538, 1: 131, 2: 658}
DBCV Score                              -0.757
Mean ANOVA F                          1953.602
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2013-2016,3,0.003,3,"{0: 538, 1: 131, 2: 658}",-0.757,1953.602
1,2013-2016,4,0.012,4,"{0: 597, 1: 189, 2: 354, 3: 187}",-0.858,1606.43
2,2013-2016,5,0.009,5,"{0: 240, 1: 354, 2: 486, 3: 186, 4: 61}",-0.87,1244.407
3,2013-2016,6,0.005,6,"{0: 93, 1: 186, 2: 148, 3: 592, 4: 123, 5: 185}",-0.834,1819.52
4,2013-2016,7,0.008,7,"{0: 123, 1: 244, 2: 186, 3: 472, 4: 60, 5: 93,...",-0.845,1583.564



Running K-Means for Year Range: 2014-2017
Runtime for k_means: 0.003001 seconds
Runtime for k_means: 0.004998 seconds
Runtime for k_means: 0.006000 seconds
Runtime for k_means: 0.003998 seconds
Runtime for k_means: 0.050634 seconds
Runtime for k_means: 0.009999 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                                   2014-2017
k                                                    4
Runtime (s)                                      0.005
Clusters                                             4
Points per Cluster    {0: 284, 1: 810, 2: 547, 3: 256}
DBCV Score                                      -0.761
Mean ANOVA F                                   4860.46
Name: 1, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2014-2017,3,0.003,3,"{0: 266, 1: 817, 2: 814}",-0.851,2694.893
1,2014-2017,4,0.005,4,"{0: 284, 1: 810, 2: 547, 3: 256}",-0.761,4860.46
2,2014-2017,5,0.006,5,"{0: 1064, 1: 127, 2: 250, 3: 283, 4: 173}",-0.721,2974.687
3,2014-2017,6,0.004,6,"{0: 543, 1: 280, 2: 285, 3: 198, 4: 380, 5: 211}",-0.744,2853.703
4,2014-2017,7,0.051,7,"{0: 247, 1: 127, 2: 100, 3: 625, 4: 173, 5: 28...",-0.862,2169.615



Running K-Means for Year Range: 2015-2018
Runtime for k_means: 0.003053 seconds
Runtime for k_means: 0.006991 seconds
Runtime for k_means: 0.016998 seconds
Runtime for k_means: 0.026004 seconds
Runtime for k_means: 0.014998 seconds
Runtime for k_means: 0.010980 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                                            2015-2018
k                                                             5
Runtime (s)                                               0.017
Clusters                                                      5
Points per Cluster    {0: 360, 1: 503, 2: 438, 3: 1095, 4: 246}
DBCV Score                                               -0.866
Mean ANOVA F                                           4272.997
Name: 2, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2015-2018,3,0.003,3,"{0: 1101, 1: 1174, 2: 367}",-0.863,3486.624
1,2015-2018,4,0.007,4,"{0: 1101, 1: 359, 2: 815, 3: 367}",-0.926,2682.248
2,2015-2018,5,0.017,5,"{0: 360, 1: 503, 2: 438, 3: 1095, 4: 246}",-0.866,4272.997
3,2015-2018,6,0.026,6,"{0: 192, 1: 360, 2: 424, 3: 1095, 4: 224, 5: 347}",-0.865,3436.418
4,2015-2018,7,0.015,7,"{0: 490, 1: 415, 2: 166, 3: 280, 4: 680, 5: 37...",-0.827,3860.805



Running K-Means for Year Range: 2016-2019
Runtime for k_means: 0.007004 seconds
Runtime for k_means: 0.013031 seconds
Runtime for k_means: 0.014013 seconds
Runtime for k_means: 0.012999 seconds
Runtime for k_means: 0.012997 seconds
Runtime for k_means: 0.012001 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                                            2016-2019
k                                                             5
Runtime (s)                                               0.014
Clusters                                                      5
Points per Cluster    {0: 373, 1: 371, 2: 1137, 3: 586, 4: 243}
DBCV Score                                               -0.896
Mean ANOVA F                                           4889.755
Name: 2, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2016-2019,3,0.007,3,"{0: 1520, 1: 336, 2: 854}",-0.875,3294.831
1,2016-2019,4,0.013,4,"{0: 922, 1: 1188, 2: 317, 3: 283}",-0.836,2350.88
2,2016-2019,5,0.014,5,"{0: 373, 1: 371, 2: 1137, 3: 586, 4: 243}",-0.896,4889.755
3,2016-2019,6,0.013,6,"{0: 316, 1: 1152, 2: 358, 3: 477, 4: 169, 5: 238}",-0.888,3467.311
4,2016-2019,7,0.013,7,"{0: 532, 1: 273, 2: 656, 3: 321, 4: 550, 5: 58...",-0.866,3867.244



Running K-Means for Year Range: 2017-2020
Runtime for k_means: 0.004088 seconds
Runtime for k_means: 0.010505 seconds
Runtime for k_means: 0.017005 seconds
Runtime for k_means: 0.008960 seconds
Runtime for k_means: 0.030223 seconds
Runtime for k_means: 0.008081 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                            2017-2020
k                                             3
Runtime (s)                               0.004
Clusters                                      3
Points per Cluster    {0: 689, 1: 1891, 2: 711}
DBCV Score                               -0.677
Mean ANOVA F                          12733.854
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2017-2020,3,0.004,3,"{0: 689, 1: 1891, 2: 711}",-0.677,12733.854
1,2017-2020,4,0.011,4,"{0: 1439, 1: 1011, 2: 375, 3: 466}",-0.952,3346.78
2,2017-2020,5,0.017,5,"{0: 717, 1: 325, 2: 372, 3: 1384, 4: 493}",-0.843,2320.854
3,2017-2020,6,0.009,6,"{0: 1889, 1: 568, 2: 328, 3: 192, 4: 52, 5: 262}",-0.701,4144.223
4,2017-2020,7,0.03,7,"{0: 298, 1: 360, 2: 491, 3: 677, 4: 718, 5: 42...",-0.86,4733.969



Running K-Means for Year Range: 2018-2021
Runtime for k_means: 0.007000 seconds
Runtime for k_means: 0.016562 seconds
Runtime for k_means: 0.015045 seconds
Runtime for k_means: 0.024648 seconds
Runtime for k_means: 0.029998 seconds
Runtime for k_means: 0.010927 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                                    2018-2021
k                                                     4
Runtime (s)                                       0.017
Clusters                                              4
Points per Cluster    {0: 632, 1: 1569, 2: 498, 3: 743}
DBCV Score                                       -0.852
Mean ANOVA F                                  10046.129
Name: 1, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2018-2021,3,0.007,3,"{0: 517, 1: 1352, 2: 1573}",-0.907,4585.85
1,2018-2021,4,0.017,4,"{0: 632, 1: 1569, 2: 498, 3: 743}",-0.852,10046.129
2,2018-2021,5,0.015,5,"{0: 436, 1: 991, 2: 363, 3: 1283, 4: 369}",-0.941,2730.452
3,2018-2021,6,0.025,6,"{0: 369, 1: 277, 2: 722, 3: 434, 4: 357, 5: 1283}",-0.923,2769.737
4,2018-2021,7,0.03,7,"{0: 306, 1: 380, 2: 354, 3: 622, 4: 441, 5: 78...",-0.868,5430.424



Running K-Means for Year Range: 2019-2022
Runtime for k_means: 0.031009 seconds
Runtime for k_means: 0.015441 seconds
Runtime for k_means: 0.008140 seconds
Runtime for k_means: 0.023139 seconds
Runtime for k_means: 0.046536 seconds
Runtime for k_means: 0.009901 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                                            2019-2022
k                                                             5
Runtime (s)                                               0.008
Clusters                                                      5
Points per Cluster    {0: 1673, 1: 330, 2: 501, 3: 598, 4: 507}
DBCV Score                                               -0.829
Mean ANOVA F                                           7332.618
Name: 2, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2019-2022,3,0.031,3,"{0: 2286, 1: 352, 2: 971}",-0.788,5694.683
1,2019-2022,4,0.015,4,"{0: 1316, 1: 481, 2: 1303, 3: 509}",-0.883,3617.165
2,2019-2022,5,0.008,5,"{0: 1673, 1: 330, 2: 501, 3: 598, 4: 507}",-0.829,7332.618
3,2019-2022,6,0.023,6,"{0: 563, 1: 429, 2: 188, 3: 435, 4: 1707, 5: 287}",-0.934,4845.388
4,2019-2022,7,0.047,7,"{0: 304, 1: 965, 2: 257, 3: 498, 4: 759, 5: 25...",-0.927,2434.801



Running K-Means for Year Range: 2020-2023
Runtime for k_means: 0.010997 seconds
Runtime for k_means: 0.021108 seconds
Runtime for k_means: 0.025953 seconds
Runtime for k_means: 0.032569 seconds
Runtime for k_means: 0.018064 seconds
Runtime for k_means: 0.007052 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                                    2020-2023
k                                                     4
Runtime (s)                                       0.021
Clusters                                              4
Points per Cluster    {0: 2775, 1: 411, 2: 601, 3: 585}
DBCV Score                                       -0.717
Mean ANOVA F                                  11691.106
Name: 1, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,DBCV Score,Mean ANOVA F
0,2020-2023,3,0.011,3,"{0: 1079, 1: 505, 2: 2788}",-0.852,7239.932
1,2020-2023,4,0.021,4,"{0: 2775, 1: 411, 2: 601, 3: 585}",-0.717,11691.106
2,2020-2023,5,0.026,5,"{0: 900, 1: 1572, 2: 531, 3: 747, 4: 622}",-0.9,3498.851
3,2020-2023,6,0.033,6,"{0: 528, 1: 497, 2: 1077, 3: 622, 4: 900, 5: 748}",-0.931,3492.068
4,2020-2023,7,0.018,7,"{0: 410, 1: 539, 2: 183, 3: 560, 4: 576, 5: 15...",-0.873,6299.979


# KMeans
The features used by the clustering are determined by applying pca on the raw dataset.   
After the clustering, we reapply the PCA and plot the clusters in the PC1 / PC2 plane and make the pairplot with the clustered data. 

In [8]:
import cluster_data_pca
from cluster_data_pca import run_clustering, normalize_data, unnormalize
import numpy as np
import pandas as pd
import os
from kmeans import k_means
from clustering_utils_pca import ClusterData
import cluster_plotter
import high_dim_analysis
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import scores

standard_year_ranges = {
    f"{start}-{start + 3}": np.arange(start, start + 4)
    for start in [2002, 2006, 2010, 2014, 2019]
}
standard_year_ranges["2019-2023"] = np.arange(2019, 2024)
running_ranges = cluster_data_pca.generate_running_year_ranges(2002, 2023, 4)

data_set = {"2005-2008": np.arange(2005, 2009)}
binned_data = cluster_data_pca.bin_data_for_clustering(data_set, print_res=False)
plot_dir = r"Images\k_means_running_bins_pcabased_pcaevaluated"
os.makedirs(plot_dir, exist_ok=True)
cluster_plotter.clear_directory(plot_dir)

for cluster_data, year_range in binned_data:
    print(f"\nRunning K-Means for Year Range: {year_range}")

    X = np.vstack([cluster_data.ecc, cluster_data.mag_obj,
                   cluster_data.sem_maj, cluster_data.diameter,
                   cluster_data.inc, cluster_data.raan]).T
    feature_names = ["Eccentricity e", "Object magnitude [mag]",
                     "Semi major axis [km]", "Diameter [m]",
                     "Inclination [°]", "RAAN [°]"]

    normalized_data, data_min, data_max = normalize_data(X)
    k_values = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
    results = []

    for k in k_values:
        result, t, n_clusters, pts_per_cluster, _ = run_clustering(
            k_means, f"K-means (k={k})",
            normalized_data, data_min, data_max, k, init='kmeans++'
        )
        labels = result.labels
        #dbcv = scores.DBCV_score_rust(result)

        results.append({
            "Year Range": year_range,
            "k": k,
            "Runtime (s)": f"{t:.3f}",
            "Clusters": n_clusters,
            "Points per Cluster": pts_per_cluster
        })
    
        df = pd.DataFrame(results)
        # Select best by DBCV
        #best = df.loc[df['DBCV Score'].astype(float).idxmax()]
        #best_k = int(best['k'])

        # Pairplot
        dfp = pd.DataFrame(X, columns=feature_names)
        dfp['cluster'] = labels.astype(str)
        pp = sns.pairplot(dfp, hue='cluster', diag_kind='kde', plot_kws={'alpha':0.6,'s':8})
        pp.fig.suptitle(f"K-Means PCA Clusters {year_range}", y=1.02)
        pp.savefig(os.path.join(plot_dir, f"pairplot_{year_range}_k{k}.png"))
        plt.close(pp.fig)

    """# PCA scatter
    scaler = StandardScaler()
    comps = PCA(2).fit_transform(scaler.fit_transform(X))
    plt.figure(figsize=(8,6))
    plt.scatter(comps[:,0], comps[:,1], c=labels, s=8, alpha=0.7)
    plt.xlabel('PC1'); plt.ylabel('PC2')
    plt.title(f'PCA PC1 vs PC2 {year_range}')
    plt.tight_layout()
    plt.savefig(os.path.join(plot_dir, f"pca_pc1_pc2_{year_range}_k{best_k}.png"))
    plt.close()"""


Running K-Means for Year Range: 2005-2008
Runtime for k_means: 0.025048 seconds
Runtime for k_means: 0.035145 seconds
Runtime for k_means: 0.040534 seconds
Runtime for k_means: 0.036052 seconds
Runtime for k_means: 0.105766 seconds
Runtime for k_means: 0.140035 seconds
Runtime for k_means: 0.122832 seconds
Runtime for k_means: 0.178582 seconds
Runtime for k_means: 0.145988 seconds
Runtime for k_means: 0.311936 seconds


# DBSCAN

In [8]:
import cluster_data_pca
from cluster_data_pca import run_clustering, normalize_data, unnormalize
import numpy as np
import pandas as pd
import os
from DBSCAN import dbscan_clustering
import cluster_plotter
import high_dim_analysis
import seaborn as sns
import matplotlib.pyplot as plt
import scores

running_ranges = cluster_data_pca.generate_running_year_ranges(2002, 2023, 4)
binned = cluster_data_pca.bin_data_for_clustering(running_ranges, print_res=False)
plot_dir = r"Images\dbscan_tests_running_bins_pca"
os.makedirs(plot_dir, exist_ok=True)
cluster_plotter.clear_directory(plot_dir)

for data_obj, yr in binned:
    print(f"\nRunning DBSCAN for Year Range: {yr}")
    X = np.vstack([data_obj.ecc, data_obj.mag_obj,
                   data_obj.sem_maj, data_obj.diameter,
                   data_obj.inc, data_obj.raan]).T
    normalized, mn, mx = normalize_data(X)

    eps_vals = [0.02, 0.01, 0.015]
    min_samps = [10,15,25,30]
    res_list = []

    for eps in eps_vals:
        for ms in min_samps:
            result, t, n_cl, pts_cl, _ = run_clustering(
                dbscan_clustering, "DBSCAN", normalized, mn, mx,
                eps=eps, min_samples=ms
            )
            labels = result.labels
            dbcv = scores.DBCV_score_rust(result)
            noise = np.sum(labels == -1)
            res_list.append({
                "Year Range": yr,
                "eps": eps,
                "min_samples": ms,
                "Runtime (s)": f"{t:.3f}",
                "Clusters": n_cl,
                "Points per Cluster": pts_cl,
                "Noise Points": noise,
                "DBCV Score": f"{dbcv:.3f}"
            })
            high_dim_analysis.plot_correlation_heatmap(
                data_obj, plot_dir, f"corr_{yr}_eps{eps}_ms{ms}.png"
            )
    
    df = pd.DataFrame(res_list)
    best = df.loc[df['DBCV Score'].astype(float).idxmax()]
    beps, bms = best['eps'], best['min_samples']

    final, _, _, _, _ = run_clustering(
        dbscan_clustering, "DBSCAN", normalized, mn, mx,
        eps=beps, min_samples=bms
    )
    labels = final.labels

    # Pairplot
    dfp = pd.DataFrame(X, columns=["e","mag","a","d","i","r"])
    dfp['cluster'] = labels.astype(str)
    pp = sns.pairplot(dfp, hue='cluster', diag_kind='kde', plot_kws={'alpha':0.6,'s':8})
    pp.fig.suptitle(f"DBSCAN Clusters {yr}", y=1.02)
    pp.savefig(os.path.join(plot_dir, f"pairplot_{yr}_eps{beps}_ms{bms}.png"))
    plt.close(pp.fig)


Running DBSCAN for Year Range: 2002-2005
Runtime for dbscan_clustering: 0.187283 seconds
Runtime for dbscan_clustering: 0.223996 seconds
Runtime for dbscan_clustering: 0.205609 seconds
Runtime for dbscan_clustering: 0.165021 seconds
Runtime for dbscan_clustering: 0.121889 seconds
Runtime for dbscan_clustering: 0.180750 seconds
Runtime for dbscan_clustering: 0.128001 seconds
Runtime for dbscan_clustering: 0.054032 seconds
Runtime for dbscan_clustering: 0.165889 seconds
Runtime for dbscan_clustering: 0.142087 seconds
Runtime for dbscan_clustering: 0.145278 seconds
Runtime for dbscan_clustering: 0.144871 seconds
Runtime for dbscan_clustering: 0.120252 seconds

Running DBSCAN for Year Range: 2003-2006
Runtime for dbscan_clustering: 0.217514 seconds
Runtime for dbscan_clustering: 0.213635 seconds
Runtime for dbscan_clustering: 0.214136 seconds
Runtime for dbscan_clustering: 0.244580 seconds
Runtime for dbscan_clustering: 0.063986 seconds
Runtime for dbscan_clustering: 0.162415 seconds
Runt

# DBSCAN
The features used by the clustering are determined by applying pca on the raw dataset.   
After the clustering, we reapply the PCA and plot the clusters in the PC1 / PC2 plane and make the pairplot with the clustered data. 

In [1]:
import cluster_data_pca
from cluster_data_pca import run_clustering, normalize_data, unnormalize
import numpy as np
import pandas as pd
import os
from DBSCAN import dbscan_clustering
import cluster_plotter
import high_dim_analysis
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import scores

running_ranges = cluster_data_pca.generate_running_year_ranges(2002, 2023, 4)
bins = cluster_data_pca.bin_data_for_clustering(running_ranges, print_res=False)
plot_dir = r"Images\\dbscan_tests_running_bins_pcabased_pcaevaluated"
os.makedirs(plot_dir, exist_ok=True)
cluster_plotter.clear_directory(plot_dir)

for data_obj, yr in bins:
    print(f"\nRunning DBSCAN for Year Range: {yr}")
    X = np.vstack([data_obj.ecc, data_obj.mag_obj,
                   data_obj.sem_maj, data_obj.diameter,
                   data_obj.inc, data_obj.raan]).T
    norm, mn, mx = normalize_data(X)

    eps_vals = [0.02, 0.01, 0.015]
    ms_vals = [10,15,25,30]
    res = []

    for eps in eps_vals:
        for ms in ms_vals:
            result, t, ncl, pts, _ = run_clustering(
                dbscan_clustering, "DBSCAN", norm, mn, mx,
                eps=eps, min_samples=ms
            )
            lbl = result.labels
            dbcv = scores.DBCV_score_rust(result)
            res.append({
                "Year Range": yr,
                "eps": eps,
                "min_samples": ms,
                "Runtime (s)": f"{t:.3f}",
                "Clusters": ncl,
                "Points per Cluster": pts,
                "DBCV Score": f"{dbcv:.3f}"
            })
    
    df = pd.DataFrame(res)
    best = df.loc[df['DBCV Score'].astype(float).idxmax()]
    beps, bms = best['eps'], best['min_samples']

    final, _, _, _, _ = run_clustering(
        dbscan_clustering, "DBSCAN", norm, mn, mx,
        eps=beps, min_samples=bms
    )
    labels = final.labels

    # Pairplot & PCA scatter
    dfp = pd.DataFrame(X, columns=["e","mag","a","d","i","r"])
    dfp['cluster'] = labels.astype(str)
    pp = sns.pairplot(dfp, hue='cluster', diag_kind='kde', plot_kws={'alpha':0.6,'s':8})
    pp.fig.suptitle(f"DBSCAN PCA Clusters {yr}", y=1.02)
    pp.savefig(os.path.join(plot_dir, f"pairplot_{yr}_eps{beps}_ms{bms}.png"))
    plt.close(pp.fig)

    comps = PCA(2).fit_transform(StandardScaler().fit_transform(X))
    plt.figure(figsize=(8,6))
    plt.scatter(comps[:,0], comps[:,1], c=labels, s=10, alpha=0.7)
    plt.xlabel('PC1'); plt.ylabel('PC2')
    plt.title(f'PCA DBSCAN Clusters {yr}')
    plt.tight_layout()
    plt.savefig(os.path.join(plot_dir, f"pca_clusters_{yr}_eps{beps}_ms{bms}.png"))
    plt.close()



Running DBSCAN for Year Range: 2002-2005
Runtime for dbscan_clustering: 0.075176 seconds
Runtime for dbscan_clustering: 0.083376 seconds
Runtime for dbscan_clustering: 0.083105 seconds
Runtime for dbscan_clustering: 0.066313 seconds
Runtime for dbscan_clustering: 0.050082 seconds
Runtime for dbscan_clustering: 0.066332 seconds
Runtime for dbscan_clustering: 0.050448 seconds
Runtime for dbscan_clustering: 0.049724 seconds
Runtime for dbscan_clustering: 0.066506 seconds
Runtime for dbscan_clustering: 0.046329 seconds
Runtime for dbscan_clustering: 0.050279 seconds
Runtime for dbscan_clustering: 0.065580 seconds
Runtime for dbscan_clustering: 0.066485 seconds

Running DBSCAN for Year Range: 2003-2006
Runtime for dbscan_clustering: 0.103164 seconds
Runtime for dbscan_clustering: 0.080681 seconds
Runtime for dbscan_clustering: 0.084422 seconds
Runtime for dbscan_clustering: 0.099679 seconds
Runtime for dbscan_clustering: 0.051666 seconds
Runtime for dbscan_clustering: 0.071409 seconds
Runt