# KMeans

In [2]:
import cluster_data_pca
from cluster_data_pca import run_clustering, normalize_data, unnormalize
import numpy as np
import pandas as pd
import os
from kmeans import k_means
from cluster_plotter import ClusterPlotter
from clustering_utils_pca import ClusterData
import cluster_plotter
import high_dim_analysis
import seaborn as sns
import matplotlib.pyplot as plt

standard_year_ranges = {
    f"{start}-{start + 3}": np.arange(start, start + 4)
    for start in [2002, 2006, 2010, 2014, 2019]
}
standard_year_ranges["2019-2023"] = np.arange(2019, 2024)  # Include 2023
running_ranges = cluster_data_pca.generate_running_year_ranges(2002, 2023, 4)

binned_data = cluster_data_pca.bin_data_for_clustering(running_ranges, print_res=False)

k_values = [5, 6, 7]

results_per_year_range = {}

# Output directory for plots
plot_dir = "Images\\k_means_running_bins_pca"
os.makedirs(plot_dir, exist_ok=True)
cluster_plotter.clear_directory(plot_dir)

for cluster_data, year_range in binned_data:  
    print(f"\nRunning K-Means for Year Range: {year_range}")

    data_array = np.array([cluster_data.ecc, cluster_data.mag_obj, cluster_data.sem_maj, cluster_data.diameter, cluster_data.inc, cluster_data.raan]).T
    feature_names = ["Eccentricity e", "Object magnitude [mag]", "Semi major axis [km]", "Diameter [m]", "Inclination [°]", "RAAN [°]"]

    normalized_data, data_min, data_max = normalize_data(data_array)

    k_values = [3, 4, 5, 6, 7]

    results = []

    for k in k_values:
        result_kmeans, time_kmeans, n_clusters_kmeans, points_per_cluster_kmeans, metrics_kmeans = run_clustering(
            k_means, f"K-means (k={k})", normalized_data, data_min, data_max, k, init='kmeans++'
        )

        unnormalized_data, cluster_centers = unnormalize(
            result_kmeans.data, result_kmeans.cluster_centers, data_min, data_max
        )

        results.append({
            "Year Range": year_range,
            "k": k,
            "Runtime (s)": f"{time_kmeans:.3f}",
            "Clusters": n_clusters_kmeans,
            "Points per Cluster": points_per_cluster_kmeans,
            "Davies-Bouldin": f"{metrics_kmeans[0]:.3f}" if metrics_kmeans and isinstance(metrics_kmeans[0], (int, float)) else None,
            "Calinski-Harabasz": f"{metrics_kmeans[1]:.3f}" if metrics_kmeans and isinstance(metrics_kmeans[1], (int, float)) else None,
            "Dunn Index": f"{metrics_kmeans[2]:.3f}" if metrics_kmeans and isinstance(metrics_kmeans[2], (int, float)) else None,
            "Silhouette Score": f"{metrics_kmeans[3]:.3f}" if metrics_kmeans and isinstance(metrics_kmeans[3], (int, float)) else None
        })

        labels = result_kmeans.labels
        """high_dim_analysis.plot_co_membership(
            labels,
            output_folder=plot_dir,
            filename=f"co_membership_{year_range}_k{k}.png"
        )"""
        high_dim_analysis.plot_correlation_heatmap(
            cluster_data,
            output_folder=plot_dir,
            filename=f"corr_{year_range}_k{k}.png"
        )
        high_dim_analysis.plot_mutual_information(
            cluster_data,
            labels,
            output_folder=plot_dir,
            filename=f"mi_{year_range}_k{k}.png"
        )

        from sklearn.feature_selection import f_classif
        X = np.array([cluster_data.ecc, cluster_data.mag_obj, cluster_data.sem_maj,
              cluster_data.diameter, cluster_data.inc, cluster_data.raan]).T
        valid = labels != -1  # Exclude noise points (usually not present for kmeans)
        if np.any(valid) and len(np.unique(labels[valid])) > 1:
            f_vals, _ = f_classif(X[valid], labels[valid])
            mean_f = np.mean(f_vals)
        else:
            mean_f = 0.0  # Avoid invalid or single-cluster cases

        results[-1]["Mean ANOVA F"] = f"{mean_f:.3f}"

    df = pd.DataFrame(results)
    df["Mean ANOVA F"] = pd.to_numeric(df["Mean ANOVA F"], errors="coerce")
    df_sorted = df.sort_values(by="Mean ANOVA F", ascending=False)
    best_params = df_sorted.iloc[0]
    best_k = best_params["k"]

    # Re-run clustering with best parameters to get labels
    best_result, _, _, _, _ = run_clustering(
        k_means, f"K-means (k={best_k})", normalized_data, data_min, data_max, int(best_k), init='kmeans++'
    )
    best_labels = best_result.labels

    # Plot pairplot colored by cluster labels and save to plot_dir
    df_plot = pd.DataFrame(data_array, columns=feature_names)
    df_plot['cluster'] = best_labels.astype(str)  # cluster labels as strings for hue

    pairplot = sns.pairplot(df_plot, hue='cluster', diag_kind='kde', plot_kws={'alpha': 0.6, 's': 8})
    pairplot.fig.suptitle(f"K-Means Clusters for Year Range {year_range}", y=1.02)
    pairplot.savefig(os.path.join(plot_dir, f"pairplot_{year_range}_k{best_k}.png"))
    plt.close(pairplot.fig)

    # Plot ANOVA F-values only for best result
    high_dim_analysis.plot_anova_f_values(
        cluster_data,
        best_labels,
        output_folder=plot_dir,
        filename=f"anova_best_{year_range}_k{best_k}.png",
        title=f"ANOVA F-values (Best KMeans Parameters), k = {int(best_k)}"
    )
    print("Best KMeans parameters based on ANOVA F-value:")
    print(best_params)

    display(df)


Running K-Means for Year Range: 2002-2005
Runtime for k_means: 0.007735 seconds
Runtime for k_means: 0.018622 seconds
Runtime for k_means: 0.047746 seconds
Runtime for k_means: 0.091677 seconds
Runtime for k_means: 0.141947 seconds
Runtime for k_means: 0.047482 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                              2002-2005
k                                               3
Runtime (s)                                 0.008
Clusters                                        3
Points per Cluster    {0: 4362, 1: 1359, 2: 2316}
Davies-Bouldin                               None
Calinski-Harabasz                            None
Dunn Index                                   None
Silhouette Score                             None
Mean ANOVA F                            28742.022
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2002-2005,3,0.008,3,"{0: 4362, 1: 1359, 2: 2316}",,,,,28742.022
1,2002-2005,4,0.019,4,"{0: 1208, 1: 3155, 2: 1359, 3: 2315}",,,,,19750.659
2,2002-2005,5,0.048,5,"{0: 1495, 1: 514, 2: 1359, 3: 2354, 4: 2315}",,,,,15038.282
3,2002-2005,6,0.092,6,"{0: 735, 1: 3154, 2: 499, 3: 859, 4: 1208, 5: ...",,,,,12884.282
4,2002-2005,7,0.142,7,"{0: 1578, 1: 180, 2: 3041, 3: 739, 4: 495, 5: ...",,,,,10986.996



Running K-Means for Year Range: 2003-2006
Runtime for k_means: 0.051761 seconds
Runtime for k_means: 0.038968 seconds
Runtime for k_means: 0.070328 seconds
Runtime for k_means: 0.058903 seconds
Runtime for k_means: 0.137317 seconds
Runtime for k_means: 0.024183 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                              2003-2006
k                                               3
Runtime (s)                                 0.052
Clusters                                        3
Points per Cluster    {0: 1335, 1: 3874, 2: 3670}
Davies-Bouldin                               None
Calinski-Harabasz                            None
Dunn Index                                   None
Silhouette Score                             None
Mean ANOVA F                            28907.517
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2003-2006,3,0.052,3,"{0: 1335, 1: 3874, 2: 3670}",,,,,28907.517
1,2003-2006,4,0.039,4,"{0: 2228, 1: 1335, 2: 3871, 3: 1445}",,,,,20539.983
2,2003-2006,5,0.07,5,"{0: 1333, 1: 1625, 2: 751, 3: 1299, 4: 3871}",,,,,16087.032
3,2003-2006,6,0.059,6,"{0: 1296, 1: 3703, 2: 757, 3: 1619, 4: 1273, 5...",,,,,13180.536
4,2003-2006,7,0.137,7,"{0: 611, 1: 1063, 2: 614, 3: 2800, 4: 931, 5: ...",,,,,11531.544



Running K-Means for Year Range: 2004-2007
Runtime for k_means: 0.039803 seconds
Runtime for k_means: 0.021689 seconds
Runtime for k_means: 0.054654 seconds
Runtime for k_means: 0.134437 seconds
Runtime for k_means: 0.099828 seconds
Runtime for k_means: 0.032328 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                              2004-2007
k                                               3
Runtime (s)                                 0.040
Clusters                                        3
Points per Cluster    {0: 1906, 1: 1088, 2: 5555}
Davies-Bouldin                               None
Calinski-Harabasz                            None
Dunn Index                                   None
Silhouette Score                             None
Mean ANOVA F                            30331.287
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2004-2007,3,0.04,3,"{0: 1906, 1: 1088, 2: 5555}",,,,,30331.287
1,2004-2007,4,0.022,4,"{0: 1366, 1: 1904, 2: 4192, 3: 1087}",,,,,21087.568
2,2004-2007,5,0.055,5,"{0: 1446, 1: 1365, 2: 4192, 3: 628, 4: 918}",,,,,16515.739
3,2004-2007,6,0.134,6,"{0: 2963, 1: 1453, 2: 710, 3: 1885, 4: 613, 5:...",,,,,13646.28
4,2004-2007,7,0.1,7,"{0: 1868, 1: 954, 2: 1456, 3: 577, 4: 2921, 5:...",,,,,11480.965



Running K-Means for Year Range: 2005-2008
Runtime for k_means: 0.158018 seconds
Runtime for k_means: 0.047283 seconds
Runtime for k_means: 0.063148 seconds
Runtime for k_means: 0.079402 seconds
Runtime for k_means: 0.333159 seconds
Runtime for k_means: 0.046677 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                             2005-2008
k                                              3
Runtime (s)                                0.158
Clusters                                       3
Points per Cluster    {0: 941, 1: 5096, 2: 1584}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                           26936.995
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2005-2008,3,0.158,3,"{0: 941, 1: 5096, 2: 1584}",,,,,26936.995
1,2005-2008,4,0.047,4,"{0: 3859, 1: 937, 2: 1239, 3: 1586}",,,,,18755.924
2,2005-2008,5,0.063,5,"{0: 3730, 1: 1204, 2: 940, 3: 1583, 4: 164}",,,,,14305.884
3,2005-2008,6,0.079,6,"{0: 5094, 1: 469, 2: 518, 3: 1094, 4: 334, 5: ...",,,,,11559.887
4,2005-2008,7,0.333,7,"{0: 1702, 1: 778, 2: 409, 3: 664, 4: 2730, 5: ...",,,,,10296.642



Running K-Means for Year Range: 2006-2009
Runtime for k_means: 0.086057 seconds
Runtime for k_means: 0.079655 seconds
Runtime for k_means: 0.143041 seconds
Runtime for k_means: 0.094749 seconds
Runtime for k_means: 0.221040 seconds
Runtime for k_means: 0.047336 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                             2006-2009
k                                              3
Runtime (s)                                0.086
Clusters                                       3
Points per Cluster    {0: 5131, 1: 1479, 2: 869}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                            26424.32
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2006-2009,3,0.086,3,"{0: 5131, 1: 1479, 2: 869}",,,,,26424.32
1,2006-2009,4,0.08,4,"{0: 3924, 1: 1209, 2: 1480, 3: 866}",,,,,18389.167
2,2006-2009,5,0.143,5,"{0: 1206, 1: 1153, 2: 705, 3: 488, 4: 3927}",,,,,14500.63
3,2006-2009,6,0.095,6,"{0: 1944, 1: 1154, 2: 1141, 3: 701, 4: 2048, 5...",,,,,11933.265
4,2006-2009,7,0.221,7,"{0: 601, 1: 1208, 2: 99, 3: 910, 4: 3923, 5: 4...",,,,,10106.586



Running K-Means for Year Range: 2007-2010
Runtime for k_means: 0.008806 seconds
Runtime for k_means: 0.031872 seconds
Runtime for k_means: 0.032060 seconds
Runtime for k_means: 0.038829 seconds
Runtime for k_means: 0.149361 seconds
Runtime for k_means: 0.011548 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                             2007-2010
k                                              3
Runtime (s)                                0.009
Clusters                                       3
Points per Cluster    {0: 993, 1: 1634, 2: 3059}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                           20355.234
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2007-2010,3,0.009,3,"{0: 993, 1: 1634, 2: 3059}",,,,,20355.234
1,2007-2010,4,0.032,4,"{0: 1634, 1: 939, 2: 1601, 3: 1512}",,,,,13858.303
2,2007-2010,5,0.032,5,"{0: 1353, 1: 1013, 2: 2172, 3: 622, 4: 526}",,,,,10757.314
3,2007-2010,6,0.039,6,"{0: 824, 1: 2163, 2: 527, 3: 1362, 4: 331, 5: ...",,,,,9106.068
4,2007-2010,7,0.149,7,"{0: 69, 1: 470, 2: 2128, 3: 519, 4: 1336, 5: 3...",,,,,7682.181



Running K-Means for Year Range: 2008-2011
Runtime for k_means: 0.016881 seconds
Runtime for k_means: 0.006036 seconds
Runtime for k_means: 0.013081 seconds
Runtime for k_means: 0.100153 seconds
Runtime for k_means: 0.036608 seconds
Runtime for k_means: 0.008579 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                             2008-2011
k                                              3
Runtime (s)                                0.017
Clusters                                       3
Points per Cluster    {0: 744, 1: 1206, 2: 2114}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                           13357.123
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2008-2011,3,0.017,3,"{0: 744, 1: 1206, 2: 2114}",,,,,13357.123
1,2008-2011,4,0.006,4,"{0: 515, 1: 2114, 2: 744, 3: 691}",,,,,9228.152
2,2008-2011,5,0.013,5,"{0: 699, 1: 1146, 2: 515, 3: 1013, 4: 691}",,,,,7112.643
3,2008-2011,6,0.1,6,"{0: 369, 1: 2114, 2: 439, 3: 199, 4: 200, 5: 743}",,,,,5929.311
4,2008-2011,7,0.037,7,"{0: 948, 1: 391, 2: 1555, 3: 355, 4: 214, 5: 2...",,,,,5182.57



Running K-Means for Year Range: 2009-2012
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.015770 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.031789 seconds
Runtime for k_means: 0.061952 seconds
Runtime for k_means: 0.000000 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                             2009-2012
k                                              3
Runtime (s)                                0.000
Clusters                                       3
Points per Cluster    {0: 1837, 1: 1080, 2: 643}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                           11684.736
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2009-2012,3,0.0,3,"{0: 1837, 1: 1080, 2: 643}",,,,,11684.736
1,2009-2012,4,0.016,4,"{0: 2477, 1: 213, 2: 387, 3: 483}",,,,,8008.599
2,2009-2012,5,0.0,5,"{0: 1788, 1: 681, 2: 622, 3: 70, 4: 399}",,,,,6458.495
3,2009-2012,6,0.032,6,"{0: 861, 1: 486, 2: 375, 3: 336, 4: 1283, 5: 219}",,,,,5263.214
4,2009-2012,7,0.062,7,"{0: 295, 1: 613, 2: 836, 3: 160, 4: 1029, 5: 1...",,,,,4516.362



Running K-Means for Year Range: 2010-2013
Runtime for k_means: 0.014661 seconds
Runtime for k_means: 0.014274 seconds
Runtime for k_means: 0.016446 seconds
Runtime for k_means: 0.015836 seconds
Runtime for k_means: 0.016240 seconds
Runtime for k_means: 0.015011 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                            2010-2013
k                                             3
Runtime (s)                               0.015
Clusters                                      3
Points per Cluster    {0: 1124, 1: 255, 2: 200}
Davies-Bouldin                             None
Calinski-Harabasz                          None
Dunn Index                                 None
Silhouette Score                           None
Mean ANOVA F                           4929.166
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2010-2013,3,0.015,3,"{0: 1124, 1: 255, 2: 200}",,,,,4929.166
1,2010-2013,4,0.014,4,"{0: 830, 1: 224, 2: 230, 3: 295}",,,,,3418.334
2,2010-2013,5,0.016,5,"{0: 830, 1: 161, 2: 295, 3: 196, 4: 97}",,,,,2697.048
3,2010-2013,6,0.016,6,"{0: 830, 1: 105, 2: 86, 3: 160, 4: 103, 5: 295}",,,,,2232.557
4,2010-2013,7,0.016,7,"{0: 83, 1: 828, 2: 32, 3: 295, 4: 181, 5: 57, ...",,,,,2617.119



Running K-Means for Year Range: 2011-2014
Runtime for k_means: 0.004019 seconds
Runtime for k_means: 0.003014 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.018945 seconds


[WinError 2] Das System kann die angegebene Datei nicht finden
  File "c:\Users\fionu\anaconda3\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "c:\Users\fionu\anaconda3\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\fionu\anaconda3\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\Users\fionu\anaconda3\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


Runtime for k_means: 0.002391 seconds
Runtime for k_means: 0.000000 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                          2011-2014
k                                           3
Runtime (s)                             0.004
Clusters                                    3
Points per Cluster    {0: 76, 1: 168, 2: 622}
Davies-Bouldin                           None
Calinski-Harabasz                        None
Dunn Index                               None
Silhouette Score                         None
Mean ANOVA F                         2696.381
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2011-2014,3,0.004,3,"{0: 76, 1: 168, 2: 622}",,,,,2696.381
1,2011-2014,4,0.003,4,"{0: 51, 1: 622, 2: 66, 3: 127}",,,,,1795.836
2,2011-2014,5,0.0,5,"{0: 170, 1: 18, 2: 452, 3: 162, 4: 64}",,,,,1427.163
3,2011-2014,6,0.019,6,"{0: 196, 1: 244, 2: 4, 3: 131, 4: 262, 5: 29}",,,,,1152.666
4,2011-2014,7,0.002,7,"{0: 162, 1: 429, 2: 76, 3: 45, 4: 31, 5: 108, ...",,,,,957.179



Running K-Means for Year Range: 2012-2015
Runtime for k_means: 0.008214 seconds
Runtime for k_means: 0.001984 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.010606 seconds
Runtime for k_means: 0.000000 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                           2012-2015
k                                            3
Runtime (s)                              0.008
Clusters                                     3
Points per Cluster    {0: 104, 1: 572, 2: 194}
Davies-Bouldin                            None
Calinski-Harabasz                         None
Dunn Index                                None
Silhouette Score                          None
Mean ANOVA F                          2729.914
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2012-2015,3,0.008,3,"{0: 104, 1: 572, 2: 194}",,,,,2729.914
1,2012-2015,4,0.002,4,"{0: 408, 1: 103, 2: 195, 3: 164}",,,,,1823.156
2,2012-2015,5,0.0,5,"{0: 164, 1: 149, 2: 408, 3: 81, 4: 68}",,,,,1433.381
3,2012-2015,6,0.0,6,"{0: 164, 1: 408, 2: 59, 3: 84, 4: 115, 5: 40}",,,,,1188.963
4,2012-2015,7,0.011,7,"{0: 218, 1: 82, 2: 136, 3: 63, 4: 68, 5: 148, ...",,,,,1019.174



Running K-Means for Year Range: 2013-2016
Runtime for k_means: 0.012627 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.010154 seconds
Runtime for k_means: 0.020447 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.002054 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                                   2013-2016
k                                                    4
Runtime (s)                                      0.000
Clusters                                             4
Points per Cluster    {0: 592, 1: 382, 2: 184, 3: 169}
Davies-Bouldin                                    None
Calinski-Harabasz                                 None
Dunn Index                                        None
Silhouette Score                                  None
Mean ANOVA F                                  3214.639
Name: 1, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2013-2016,3,0.013,3,"{0: 538, 1: 189, 2: 600}",,,,,2052.557
1,2013-2016,4,0.0,4,"{0: 592, 1: 382, 2: 184, 3: 169}",,,,,3214.639
2,2013-2016,5,0.01,5,"{0: 151, 1: 184, 2: 592, 3: 243, 4: 157}",,,,,2410.279
3,2013-2016,6,0.02,6,"{0: 291, 1: 164, 2: 166, 3: 153, 4: 320, 5: 233}",,,,,2108.234
4,2013-2016,7,0.0,7,"{0: 186, 1: 245, 2: 472, 3: 146, 4: 60, 5: 89,...",,,,,1558.346



Running K-Means for Year Range: 2014-2017
Runtime for k_means: 0.020007 seconds
Runtime for k_means: 0.010375 seconds
Runtime for k_means: 0.014458 seconds
Runtime for k_means: 0.040418 seconds
Runtime for k_means: 0.075685 seconds
Runtime for k_means: 0.012994 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                                   2014-2017
k                                                    4
Runtime (s)                                      0.010
Clusters                                             4
Points per Cluster    {0: 547, 1: 284, 2: 256, 3: 810}
Davies-Bouldin                                    None
Calinski-Harabasz                                 None
Dunn Index                                        None
Silhouette Score                                  None
Mean ANOVA F                                   4860.46
Name: 1, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2014-2017,3,0.02,3,"{0: 522, 1: 1076, 2: 299}",,,,,3054.936
1,2014-2017,4,0.01,4,"{0: 547, 1: 284, 2: 256, 3: 810}",,,,,4860.46
2,2014-2017,5,0.014,5,"{0: 248, 1: 812, 2: 399, 3: 255, 4: 183}",,,,,3074.238
3,2014-2017,6,0.04,6,"{0: 234, 1: 401, 2: 250, 3: 421, 4: 422, 5: 169}",,,,,1451.73
4,2014-2017,7,0.076,7,"{0: 314, 1: 251, 2: 272, 3: 207, 4: 91, 5: 569...",,,,,1142.13



Running K-Means for Year Range: 2015-2018
Runtime for k_means: 0.005797 seconds
Runtime for k_means: 0.029065 seconds
Runtime for k_means: 0.024815 seconds
Runtime for k_means: 0.025358 seconds
Runtime for k_means: 0.070654 seconds
Runtime for k_means: 0.019494 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                            2015-2018
k                                             3
Runtime (s)                               0.006
Clusters                                      3
Points per Cluster    {0: 1454, 1: 691, 2: 497}
Davies-Bouldin                             None
Calinski-Harabasz                          None
Dunn Index                                 None
Silhouette Score                           None
Mean ANOVA F                          10658.635
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2015-2018,3,0.006,3,"{0: 1454, 1: 691, 2: 497}",,,,,10658.635
1,2015-2018,4,0.029,4,"{0: 440, 1: 1453, 2: 503, 3: 246}",,,,,5385.552
2,2015-2018,5,0.025,5,"{0: 1095, 1: 503, 2: 360, 3: 246, 4: 438}",,,,,4304.804
3,2015-2018,6,0.025,6,"{0: 415, 1: 190, 2: 493, 3: 466, 4: 798, 5: 280}",,,,,4509.967
4,2015-2018,7,0.071,7,"{0: 243, 1: 347, 2: 311, 3: 424, 4: 221, 5: 90...",,,,,2958.312



Running K-Means for Year Range: 2016-2019
Runtime for k_means: 0.061162 seconds
Runtime for k_means: 0.016290 seconds
Runtime for k_means: 0.048924 seconds
Runtime for k_means: 0.080628 seconds
Runtime for k_means: 0.049404 seconds
Runtime for k_means: 0.024758 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                                           2016-2019
k                                                            5
Runtime (s)                                              0.049
Clusters                                                     5
Points per Cluster    {0: 220, 1: 672, 2: 466, 3: 516, 4: 836}
Davies-Bouldin                                            None
Calinski-Harabasz                                         None
Dunn Index                                                None
Silhouette Score                                          None
Mean ANOVA F                                           3903.52
Name: 2, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2016-2019,3,0.061,3,"{0: 854, 1: 1520, 2: 336}",,,,,3294.831
1,2016-2019,4,0.016,4,"{0: 371, 1: 1150, 2: 373, 3: 816}",,,,,2650.839
2,2016-2019,5,0.049,5,"{0: 220, 1: 672, 2: 466, 3: 516, 4: 836}",,,,,3903.52
3,2016-2019,6,0.081,6,"{0: 922, 1: 515, 2: 274, 3: 466, 4: 220, 5: 313}",,,,,3269.093
4,2016-2019,7,0.049,7,"{0: 533, 1: 317, 2: 452, 3: 199, 4: 49, 5: 500...",,,,,3068.579



Running K-Means for Year Range: 2017-2020
Runtime for k_means: 0.018873 seconds
Runtime for k_means: 0.015824 seconds
Runtime for k_means: 0.041817 seconds
Runtime for k_means: 0.040318 seconds
Runtime for k_means: 0.044750 seconds
Runtime for k_means: 0.021078 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                                    2017-2020
k                                                     4
Runtime (s)                                       0.016
Clusters                                              4
Points per Cluster    {0: 460, 1: 694, 2: 705, 3: 1432}
Davies-Bouldin                                     None
Calinski-Harabasz                                  None
Dunn Index                                         None
Silhouette Score                                   None
Mean ANOVA F                                   8314.747
Name: 1, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2017-2020,3,0.019,3,"{0: 466, 1: 1440, 2: 1385}",,,,,4306.678
1,2017-2020,4,0.016,4,"{0: 460, 1: 694, 2: 705, 3: 1432}",,,,,8314.747
2,2017-2020,5,0.042,5,"{0: 242, 1: 1090, 2: 560, 3: 688, 4: 711}",,,,,6542.436
3,2017-2020,6,0.04,6,"{0: 1432, 1: 575, 2: 544, 3: 461, 4: 228, 5: 51}",,,,,4304.679
4,2017-2020,7,0.045,7,"{0: 576, 1: 972, 2: 538, 3: 417, 4: 54, 5: 501...",,,,,3769.557



Running K-Means for Year Range: 2018-2021
Runtime for k_means: 0.025156 seconds
Runtime for k_means: 0.021615 seconds
Runtime for k_means: 0.032130 seconds
Runtime for k_means: 0.045914 seconds
Runtime for k_means: 0.043960 seconds
Runtime for k_means: 0.032028 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                                            2018-2021
k                                                             5
Runtime (s)                                               0.032
Clusters                                                      5
Points per Cluster    {0: 629, 1: 305, 2: 497, 3: 1569, 4: 442}
Davies-Bouldin                                             None
Calinski-Harabasz                                          None
Dunn Index                                                 None
Silhouette Score                                           None
Mean ANOVA F                                           7487.473
Name: 2, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2018-2021,3,0.025,3,"{0: 517, 1: 1352, 2: 1573}",,,,,4585.85
1,2018-2021,4,0.022,4,"{0: 517, 1: 363, 2: 992, 3: 1570}",,,,,3543.581
2,2018-2021,5,0.032,5,"{0: 629, 1: 305, 2: 497, 3: 1569, 4: 442}",,,,,7487.473
3,2018-2021,6,0.046,6,"{0: 499, 1: 520, 2: 390, 3: 266, 4: 1567, 5: 200}",,,,,4582.357
4,2018-2021,7,0.044,7,"{0: 1170, 1: 539, 2: 251, 3: 647, 4: 328, 5: 2...",,,,,4041.404



Running K-Means for Year Range: 2019-2022
Runtime for k_means: 0.032916 seconds
Runtime for k_means: 0.018408 seconds
Runtime for k_means: 0.015862 seconds
Runtime for k_means: 0.063754 seconds
Runtime for k_means: 0.066640 seconds
Runtime for k_means: 0.020156 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                             2019-2022
k                                              3
Runtime (s)                                0.033
Clusters                                       3
Points per Cluster    {0: 1717, 1: 1317, 2: 575}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                            5229.101
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2019-2022,3,0.033,3,"{0: 1717, 1: 1317, 2: 575}",,,,,5229.101
1,2019-2022,4,0.018,4,"{0: 1720, 1: 571, 2: 515, 3: 803}",,,,,4539.42
2,2019-2022,5,0.016,5,"{0: 1302, 1: 481, 2: 966, 3: 352, 4: 508}",,,,,3305.395
3,2019-2022,6,0.064,6,"{0: 170, 1: 414, 2: 2262, 3: 300, 4: 350, 5: 113}",,,,,4634.594
4,2019-2022,7,0.067,7,"{0: 572, 1: 500, 2: 759, 3: 261, 4: 954, 5: 30...",,,,,2432.329



Running K-Means for Year Range: 2020-2023
Runtime for k_means: 0.019859 seconds
Runtime for k_means: 0.030695 seconds
Runtime for k_means: 0.047903 seconds
Runtime for k_means: 0.158988 seconds
Runtime for k_means: 0.131779 seconds
Runtime for k_means: 0.067818 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                                             2020-2023
k                                                              5
Runtime (s)                                                0.048
Clusters                                                       5
Points per Cluster    {0: 1545, 1: 586, 2: 613, 3: 1001, 4: 627}
Davies-Bouldin                                              None
Calinski-Harabasz                                           None
Dunn Index                                                  None
Silhouette Score                                            None
Mean ANOVA F                                            9320.676
Name: 2, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2020-2023,3,0.02,3,"{0: 746, 1: 2054, 2: 1572}",,,,,6472.985
1,2020-2023,4,0.031,4,"{0: 621, 1: 1547, 2: 632, 3: 1572}",,,,,4481.522
2,2020-2023,5,0.048,5,"{0: 1545, 1: 586, 2: 613, 3: 1001, 4: 627}",,,,,9320.676
3,2020-2023,6,0.159,6,"{0: 862, 1: 615, 2: 1547, 3: 402, 4: 315, 5: 631}",,,,,3456.96
4,2020-2023,7,0.132,7,"{0: 1547, 1: 486, 2: 609, 3: 369, 4: 520, 5: 6...",,,,,5173.775


# KMeans
The features used by the clustering are determined by applying pca on the raw dataset.   
After the clustering, we reapply the PCA and plot the clusters in the PC1 / PC2 plane and make the pairplot with the clustered data. 

In [4]:
import cluster_data_pca
from cluster_data_pca import run_clustering, normalize_data, unnormalize
import numpy as np
import pandas as pd
import os
from kmeans import k_means
from cluster_plotter import ClusterPlotter
from clustering_utils_pca import ClusterData
import cluster_plotter
import high_dim_analysis
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

standard_year_ranges = {
    f"{start}-{start + 3}": np.arange(start, start + 4)
    for start in [2002, 2006, 2010, 2014, 2019]
}
standard_year_ranges["2019-2023"] = np.arange(2019, 2024)
running_ranges = cluster_data_pca.generate_running_year_ranges(2002, 2023, 4)

binned_data = cluster_data_pca.bin_data_for_clustering(running_ranges, print_res=False)

k_values = [5, 6, 7]

results_per_year_range = {}

plot_dir = "Images\\k_means_running_bins_pcabased_pcaevaluated"
os.makedirs(plot_dir, exist_ok=True)
cluster_plotter.clear_directory(plot_dir)

for cluster_data, year_range in binned_data:
    print(f"\nRunning K-Means for Year Range: {year_range}")

    data_array = np.array([cluster_data.ecc, cluster_data.mag_obj, cluster_data.sem_maj, cluster_data.diameter, cluster_data.inc, cluster_data.raan]).T
    feature_names = ["Eccentricity e", "Object magnitude [mag]", "Semi major axis [km]", "Diameter [m]", "Inclination [°]", "RAAN [°]"]

    normalized_data, data_min, data_max = normalize_data(data_array)

    k_values = [3, 4, 5, 6, 7]
    results = []

    for k in k_values:
        result_kmeans, time_kmeans, n_clusters_kmeans, points_per_cluster_kmeans, metrics_kmeans = run_clustering(
            k_means, f"K-means (k={k})", normalized_data, data_min, data_max, k, init='kmeans++'
        )

        unnormalized_data, cluster_centers = unnormalize(
            result_kmeans.data, result_kmeans.cluster_centers, data_min, data_max
        )

        results.append({
            "Year Range": year_range,
            "k": k,
            "Runtime (s)": f"{time_kmeans:.3f}",
            "Clusters": n_clusters_kmeans,
            "Points per Cluster": points_per_cluster_kmeans,
            "Davies-Bouldin": f"{metrics_kmeans[0]:.3f}" if metrics_kmeans and isinstance(metrics_kmeans[0], (int, float)) else None,
            "Calinski-Harabasz": f"{metrics_kmeans[1]:.3f}" if metrics_kmeans and isinstance(metrics_kmeans[1], (int, float)) else None,
            "Dunn Index": f"{metrics_kmeans[2]:.3f}" if metrics_kmeans and isinstance(metrics_kmeans[2], (int, float)) else None,
            "Silhouette Score": f"{metrics_kmeans[3]:.3f}" if metrics_kmeans and isinstance(metrics_kmeans[3], (int, float)) else None
        })

        labels = result_kmeans.labels

        high_dim_analysis.plot_correlation_heatmap(
            cluster_data,
            output_folder=plot_dir,
            filename=f"corr_{year_range}_k{k}.png"
        )
        high_dim_analysis.plot_mutual_information(
            cluster_data,
            labels,
            output_folder=plot_dir,
            filename=f"mi_{year_range}_k{k}.png"
        )

        from sklearn.feature_selection import f_classif
        X = data_array
        valid = labels != -1
        if np.any(valid) and len(np.unique(labels[valid])) > 1:
            f_vals, _ = f_classif(X[valid], labels[valid])
            mean_f = np.mean(f_vals)
        else:
            mean_f = 0.0

        results[-1]["Mean ANOVA F"] = f"{mean_f:.3f}"

    df = pd.DataFrame(results)
    df["Mean ANOVA F"] = pd.to_numeric(df["Mean ANOVA F"], errors="coerce")
    df_sorted = df.sort_values(by="Mean ANOVA F", ascending=False)
    best_params = df_sorted.iloc[0]
    best_k = best_params["k"]

    best_result, _, _, _, _ = run_clustering(
        k_means, f"K-means (k={best_k})", normalized_data, data_min, data_max, int(best_k), init='kmeans++'
    )
    best_labels = best_result.labels

    df_plot = pd.DataFrame(data_array, columns=feature_names)
    df_plot['cluster'] = best_labels.astype(str)

    pairplot = sns.pairplot(df_plot, hue='cluster', diag_kind='kde', plot_kws={'alpha': 0.6, 's': 8})
    pairplot.fig.suptitle(f"K-Means Clusters for Year Range {year_range}", y=1.02)
    pairplot.savefig(os.path.join(plot_dir, f"pairplot_{year_range}_k{best_k}.png"))
    plt.close(pairplot.fig)

    # PCA Plot colored by clusters
    scaler = StandardScaler()
    scaled = scaler.fit_transform(data_array)
    pca = PCA(n_components=2)
    components = pca.fit_transform(scaled)
    plt.figure(figsize=(8, 6))
    scatter = plt.scatter(components[:, 0], components[:, 1], c=best_labels, cmap='tab10', s=8, alpha=0.7)
    plt.xlabel('PC1')
    plt.ylabel('PC2')
    plt.title(f'PCA: PC1 vs PC2 ({year_range})')
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(os.path.join(plot_dir, f"pca_pc1_pc2_{year_range}_k{best_k}.png"))
    plt.close()

    high_dim_analysis.plot_anova_f_values(
        cluster_data,
        best_labels,
        output_folder=plot_dir,
        filename=f"anova_best_{year_range}_k{best_k}.png",
        title=f"ANOVA F-values (Best KMeans Parameters), k = {int(best_k)}"
    )
    print("Best KMeans parameters based on ANOVA F-value:")
    print(best_params)

    display(df)


Running K-Means for Year Range: 2002-2005
Runtime for k_means: 0.023695 seconds
Runtime for k_means: 0.033191 seconds
Runtime for k_means: 0.242683 seconds
Runtime for k_means: 0.200656 seconds
Runtime for k_means: 0.411942 seconds
Runtime for k_means: 0.041855 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                              2002-2005
k                                               3
Runtime (s)                                 0.024
Clusters                                        3
Points per Cluster    {0: 3149, 1: 1216, 2: 3672}
Davies-Bouldin                               None
Calinski-Harabasz                            None
Dunn Index                                   None
Silhouette Score                             None
Mean ANOVA F                            27762.699
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2002-2005,3,0.024,3,"{0: 3149, 1: 1216, 2: 3672}",,,,,27762.699
1,2002-2005,4,0.033,4,"{0: 2316, 1: 3155, 2: 1208, 3: 1358}",,,,,19752.603
2,2002-2005,5,0.243,5,"{0: 1234, 1: 3151, 2: 1211, 3: 772, 4: 1669}",,,,,15254.666
3,2002-2005,6,0.201,6,"{0: 934, 1: 1225, 2: 3146, 3: 1216, 4: 821, 5:...",,,,,12848.121
4,2002-2005,7,0.412,7,"{0: 4360, 1: 967, 2: 455, 3: 465, 4: 450, 5: 6...",,,,,12679.857



Running K-Means for Year Range: 2003-2006
Runtime for k_means: 0.028220 seconds
Runtime for k_means: 0.047288 seconds
Runtime for k_means: 0.066481 seconds
Runtime for k_means: 0.116948 seconds
Runtime for k_means: 0.150879 seconds
Runtime for k_means: 0.015888 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                              2003-2006
k                                               3
Runtime (s)                                 0.028
Clusters                                        3
Points per Cluster    {0: 5203, 1: 2223, 2: 1453}
Davies-Bouldin                               None
Calinski-Harabasz                            None
Dunn Index                                   None
Silhouette Score                             None
Mean ANOVA F                            29683.648
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2003-2006,3,0.028,3,"{0: 5203, 1: 2223, 2: 1453}",,,,,29683.648
1,2003-2006,4,0.047,4,"{0: 5201, 1: 1300, 2: 1626, 3: 752}",,,,,20320.299
2,2003-2006,5,0.066,5,"{0: 3870, 1: 1334, 2: 1299, 3: 751, 4: 1625}",,,,,16086.83
3,2003-2006,6,0.117,6,"{0: 886, 1: 3867, 2: 561, 3: 1335, 4: 1501, 5:...",,,,,13645.09
4,2003-2006,7,0.151,7,"{0: 3867, 1: 880, 2: 546, 3: 1427, 4: 701, 5: ...",,,,,11438.42



Running K-Means for Year Range: 2004-2007
Runtime for k_means: 0.063708 seconds
Runtime for k_means: 0.047589 seconds
Runtime for k_means: 0.110463 seconds
Runtime for k_means: 0.206940 seconds
Runtime for k_means: 0.300730 seconds
Runtime for k_means: 0.018130 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                              2004-2007
k                                               3
Runtime (s)                                 0.064
Clusters                                        3
Points per Cluster    {0: 1903, 1: 5555, 2: 1091}
Davies-Bouldin                               None
Calinski-Harabasz                            None
Dunn Index                                   None
Silhouette Score                             None
Mean ANOVA F                            30329.468
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2004-2007,3,0.064,3,"{0: 1903, 1: 5555, 2: 1091}",,,,,30329.468
1,2004-2007,4,0.048,4,"{0: 4202, 1: 1356, 2: 1904, 3: 1087}",,,,,21089.604
2,2004-2007,5,0.11,5,"{0: 1446, 1: 4192, 2: 1365, 3: 628, 4: 918}",,,,,16515.739
3,2004-2007,6,0.207,6,"{0: 1364, 1: 4192, 2: 595, 3: 1373, 4: 892, 5:...",,,,,13326.893
4,2004-2007,7,0.301,7,"{0: 827, 1: 343, 2: 1364, 3: 468, 4: 467, 5: 8...",,,,,11758.895



Running K-Means for Year Range: 2005-2008
Runtime for k_means: 0.033839 seconds
Runtime for k_means: 0.025164 seconds
Runtime for k_means: 0.031689 seconds
Runtime for k_means: 0.079833 seconds
Runtime for k_means: 0.104110 seconds
Runtime for k_means: 0.013546 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                             2005-2008
k                                              3
Runtime (s)                                0.034
Clusters                                       3
Points per Cluster    {0: 5097, 1: 1585, 2: 939}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                           27000.725
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2005-2008,3,0.034,3,"{0: 5097, 1: 1585, 2: 939}",,,,,27000.725
1,2005-2008,4,0.025,4,"{0: 2733, 1: 664, 2: 1704, 3: 2520}",,,,,18501.721
2,2005-2008,5,0.032,5,"{0: 940, 1: 2736, 2: 1706, 3: 1582, 4: 657}",,,,,14384.049
3,2005-2008,6,0.08,6,"{0: 164, 1: 1254, 2: 1204, 3: 767, 4: 503, 5: ...",,,,,11817.691
4,2005-2008,7,0.104,7,"{0: 2668, 1: 767, 2: 530, 3: 1631, 4: 1225, 5:...",,,,,10250.939



Running K-Means for Year Range: 2006-2009
Runtime for k_means: 0.018518 seconds
Runtime for k_means: 0.038312 seconds
Runtime for k_means: 0.031763 seconds
Runtime for k_means: 0.102149 seconds
Runtime for k_means: 0.063437 seconds
Runtime for k_means: 0.018145 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                             2006-2009
k                                              3
Runtime (s)                                0.019
Clusters                                       3
Points per Cluster    {0: 5131, 1: 1439, 2: 909}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                           26352.185
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2006-2009,3,0.019,3,"{0: 5131, 1: 1439, 2: 909}",,,,,26352.185
1,2006-2009,4,0.038,4,"{0: 1206, 1: 908, 2: 1438, 3: 3927}",,,,,18339.128
2,2006-2009,5,0.032,5,"{0: 3924, 1: 488, 2: 705, 3: 1153, 4: 1209}",,,,,14500.695
3,2006-2009,6,0.102,6,"{0: 1195, 1: 1154, 2: 3863, 3: 705, 4: 485, 5:...",,,,,11761.595
4,2006-2009,7,0.063,7,"{0: 1144, 1: 1486, 2: 702, 3: 619, 4: 498, 5: ...",,,,,10227.382



Running K-Means for Year Range: 2007-2010
Runtime for k_means: 0.022404 seconds
Runtime for k_means: 0.030365 seconds
Runtime for k_means: 0.030666 seconds
Runtime for k_means: 0.015849 seconds
Runtime for k_means: 0.088446 seconds
Runtime for k_means: 0.013309 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                             2007-2010
k                                              3
Runtime (s)                                0.022
Clusters                                       3
Points per Cluster    {0: 624, 1: 4049, 2: 1013}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                            20068.34
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2007-2010,3,0.022,3,"{0: 624, 1: 4049, 2: 1013}",,,,,20068.34
1,2007-2010,4,0.03,4,"{0: 4046, 1: 345, 2: 466, 3: 829}",,,,,13295.933
2,2007-2010,5,0.031,5,"{0: 3059, 1: 479, 2: 993, 3: 326, 4: 829}",,,,,10989.218
3,2007-2010,6,0.016,6,"{0: 3057, 1: 578, 2: 266, 3: 258, 4: 992, 5: 535}",,,,,8824.489
4,2007-2010,7,0.088,7,"{0: 1355, 1: 731, 2: 2171, 3: 223, 4: 288, 5: ...",,,,,7869.101



Running K-Means for Year Range: 2008-2011
Runtime for k_means: 0.018436 seconds
Runtime for k_means: 0.022851 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.035016 seconds
Runtime for k_means: 0.034304 seconds
Runtime for k_means: 0.018308 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                             2008-2011
k                                              3
Runtime (s)                                0.018
Clusters                                       3
Points per Cluster    {0: 2114, 1: 1206, 2: 744}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                           13357.123
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2008-2011,3,0.018,3,"{0: 2114, 1: 1206, 2: 744}",,,,,13357.123
1,2008-2011,4,0.023,4,"{0: 2114, 1: 694, 2: 744, 3: 512}",,,,,9226.607
2,2008-2011,5,0.0,5,"{0: 2114, 1: 545, 2: 249, 3: 744, 4: 412}",,,,,7276.49
3,2008-2011,6,0.035,6,"{0: 377, 1: 1517, 2: 419, 3: 540, 4: 964, 5: 247}",,,,,5969.702
4,2008-2011,7,0.034,7,"{0: 947, 1: 254, 2: 1555, 3: 459, 4: 482, 5: 3...",,,,,5387.726



Running K-Means for Year Range: 2009-2012
Runtime for k_means: 0.012860 seconds
Runtime for k_means: 0.014577 seconds
Runtime for k_means: 0.015429 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.067399 seconds
Runtime for k_means: 0.000000 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                             2009-2012
k                                              3
Runtime (s)                                0.013
Clusters                                       3
Points per Cluster    {0: 643, 1: 1080, 2: 1837}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                           11684.736
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2009-2012,3,0.013,3,"{0: 643, 1: 1080, 2: 1837}",,,,,11684.736
1,2009-2012,4,0.015,4,"{0: 1837, 1: 461, 2: 619, 3: 643}",,,,,8043.03
2,2009-2012,5,0.015,5,"{0: 643, 1: 375, 2: 486, 3: 219, 4: 1837}",,,,,6413.687
3,2009-2012,6,0.0,6,"{0: 648, 1: 283, 2: 185, 3: 444, 4: 169, 5: 1831}",,,,,5252.971
4,2009-2012,7,0.067,7,"{0: 1827, 1: 243, 2: 368, 3: 161, 4: 649, 5: 2...",,,,,6131.066



Running K-Means for Year Range: 2010-2013
Runtime for k_means: 0.016757 seconds
Runtime for k_means: 0.000930 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.016299 seconds
Runtime for k_means: 0.016315 seconds
Runtime for k_means: 0.018750 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                            2010-2013
k                                             3
Runtime (s)                               0.017
Clusters                                      3
Points per Cluster    {0: 200, 1: 1124, 2: 255}
Davies-Bouldin                             None
Calinski-Harabasz                          None
Dunn Index                                 None
Silhouette Score                           None
Mean ANOVA F                           4929.166
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2010-2013,3,0.017,3,"{0: 200, 1: 1124, 2: 255}",,,,,4929.166
1,2010-2013,4,0.001,4,"{0: 830, 1: 310, 2: 295, 3: 144}",,,,,3612.778
2,2010-2013,5,0.0,5,"{0: 830, 1: 209, 2: 295, 3: 96, 4: 149}",,,,,2719.656
3,2010-2013,6,0.016,6,"{0: 274, 1: 92, 2: 198, 3: 366, 4: 484, 5: 165}",,,,,2151.966
4,2010-2013,7,0.016,7,"{0: 93, 1: 46, 2: 457, 3: 165, 4: 197, 5: 254,...",,,,,1832.725



Running K-Means for Year Range: 2011-2014
Runtime for k_means: 0.006040 seconds
Runtime for k_means: 0.004003 seconds
Runtime for k_means: 0.006685 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.015184 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                           2011-2014
k                                            3
Runtime (s)                              0.006
Clusters                                     3
Points per Cluster    {0: 462, 1: 244, 2: 160}
Davies-Bouldin                            None
Calinski-Harabasz                         None
Dunn Index                                None
Silhouette Score                          None
Mean ANOVA F                          2561.188
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2011-2014,3,0.006,3,"{0: 462, 1: 244, 2: 160}",,,,,2561.188
1,2011-2014,4,0.004,4,"{0: 462, 1: 76, 2: 160, 3: 168}",,,,,1908.874
2,2011-2014,5,0.007,5,"{0: 126, 1: 75, 2: 169, 3: 283, 4: 213}",,,,,1511.276
3,2011-2014,6,0.0,6,"{0: 58, 1: 622, 2: 7, 3: 116, 4: 42, 5: 21}",,,,,1129.327
4,2011-2014,7,0.0,7,"{0: 73, 1: 213, 2: 125, 3: 284, 4: 99, 5: 24, ...",,,,,1037.587



Running K-Means for Year Range: 2012-2015
Runtime for k_means: 0.009467 seconds
Runtime for k_means: 0.008403 seconds
Runtime for k_means: 0.016996 seconds
Runtime for k_means: 0.004445 seconds
Runtime for k_means: 0.012353 seconds
Runtime for k_means: 0.000000 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                           2012-2015
k                                            3
Runtime (s)                              0.009
Clusters                                     3
Points per Cluster    {0: 408, 1: 298, 2: 164}
Davies-Bouldin                            None
Calinski-Harabasz                         None
Dunn Index                                None
Silhouette Score                          None
Mean ANOVA F                          2631.497
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2012-2015,3,0.009,3,"{0: 408, 1: 298, 2: 164}",,,,,2631.497
1,2012-2015,4,0.008,4,"{0: 572, 1: 70, 2: 150, 3: 78}",,,,,1819.274
2,2012-2015,5,0.017,5,"{0: 150, 1: 491, 2: 78, 3: 70, 4: 81}",,,,,1421.877
3,2012-2015,6,0.004,6,"{0: 107, 1: 164, 2: 55, 3: 47, 4: 89, 5: 408}",,,,,1217.285
4,2012-2015,7,0.012,7,"{0: 64, 1: 150, 2: 209, 3: 78, 4: 70, 5: 135, ...",,,,,1025.188



Running K-Means for Year Range: 2013-2016
Runtime for k_means: 0.007422 seconds
Runtime for k_means: 0.020767 seconds
Runtime for k_means: 0.017203 seconds
Runtime for k_means: 0.015879 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.000000 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                                                  2013-2016
k                                                                   6
Runtime (s)                                                     0.016
Clusters                                                            6
Points per Cluster    {0: 164, 1: 184, 2: 592, 3: 128, 4: 164, 5: 95}
Davies-Bouldin                                                   None
Calinski-Harabasz                                                None
Dunn Index                                                       None
Silhouette Score                                                 None
Mean ANOVA F                                        


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2013-2016,3,0.007,3,"{0: 600, 1: 538, 2: 189}",,,,,2052.557
1,2013-2016,4,0.021,4,"{0: 186, 1: 597, 2: 355, 3: 189}",,,,,1606.501
2,2013-2016,5,0.017,5,"{0: 597, 1: 147, 2: 110, 3: 284, 4: 189}",,,,,1260.851
3,2013-2016,6,0.016,6,"{0: 164, 1: 184, 2: 592, 3: 128, 4: 164, 5: 95}",,,,,2060.186
4,2013-2016,7,0.0,7,"{0: 265, 1: 150, 2: 194, 3: 67, 4: 117, 5: 90,...",,,,,1604.815



Running K-Means for Year Range: 2014-2017
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.002011 seconds
Runtime for k_means: 0.009224 seconds
Runtime for k_means: 0.012594 seconds
Runtime for k_means: 0.009047 seconds
Runtime for k_means: 0.003008 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                                            2014-2017
k                                                             5
Runtime (s)                                               0.009
Clusters                                                      5
Points per Cluster    {0: 275, 1: 1064, 2: 180, 3: 248, 4: 130}
Davies-Bouldin                                             None
Calinski-Harabasz                                          None
Dunn Index                                                 None
Silhouette Score                                           None
Mean ANOVA F                                           3034.501
Name: 2, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2014-2017,3,0.0,3,"{0: 570, 1: 1077, 2: 250}",,,,,2626.228
1,2014-2017,4,0.002,4,"{0: 259, 1: 334, 2: 487, 3: 817}",,,,,2253.56
2,2014-2017,5,0.009,5,"{0: 275, 1: 1064, 2: 180, 3: 248, 4: 130}",,,,,3034.501
3,2014-2017,6,0.013,6,"{0: 329, 1: 198, 2: 492, 3: 216, 4: 380, 5: 282}",,,,,1497.337
4,2014-2017,7,0.009,7,"{0: 640, 1: 246, 2: 180, 3: 130, 4: 226, 5: 27...",,,,,2195.704



Running K-Means for Year Range: 2015-2018
Runtime for k_means: 0.003998 seconds
Runtime for k_means: 0.015816 seconds
Runtime for k_means: 0.007085 seconds
Runtime for k_means: 0.021980 seconds
Runtime for k_means: 0.008455 seconds
Runtime for k_means: 0.000000 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                            2015-2018
k                                             3
Runtime (s)                               0.004
Clusters                                      3
Points per Cluster    {0: 1454, 1: 690, 2: 498}
Davies-Bouldin                             None
Calinski-Harabasz                          None
Dunn Index                                 None
Silhouette Score                           None
Mean ANOVA F                          10833.521
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2015-2018,3,0.004,3,"{0: 1454, 1: 690, 2: 498}",,,,,10833.521
1,2015-2018,4,0.016,4,"{0: 1095, 1: 498, 2: 690, 3: 359}",,,,,7380.371
2,2015-2018,5,0.007,5,"{0: 1096, 1: 423, 2: 272, 3: 493, 4: 358}",,,,,5529.666
3,2015-2018,6,0.022,6,"{0: 690, 1: 337, 2: 494, 3: 608, 4: 236, 5: 277}",,,,,4664.892
4,2015-2018,7,0.008,7,"{0: 311, 1: 347, 2: 424, 3: 192, 4: 243, 5: 90...",,,,,2967.807



Running K-Means for Year Range: 2016-2019
Runtime for k_means: 0.014287 seconds
Runtime for k_means: 0.015768 seconds
Runtime for k_means: 0.015837 seconds
Runtime for k_means: 0.015755 seconds
Runtime for k_means: 0.015888 seconds
Runtime for k_means: 0.000000 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                                    2016-2019
k                                                     4
Runtime (s)                                       0.016
Clusters                                              4
Points per Cluster    {0: 1506, 1: 517, 2: 467, 3: 220}
Davies-Bouldin                                     None
Calinski-Harabasz                                  None
Dunn Index                                         None
Silhouette Score                                   None
Mean ANOVA F                                   5019.313
Name: 1, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2016-2019,3,0.014,3,"{0: 1187, 1: 373, 2: 1150}",,,,,3427.118
1,2016-2019,4,0.016,4,"{0: 1506, 1: 517, 2: 467, 3: 220}",,,,,5019.313
2,2016-2019,5,0.016,5,"{0: 273, 1: 605, 2: 924, 3: 595, 4: 313}",,,,,4640.553
3,2016-2019,6,0.016,6,"{0: 270, 1: 605, 2: 595, 3: 328, 4: 641, 5: 271}",,,,,3885.918
4,2016-2019,7,0.016,7,"{0: 241, 1: 172, 2: 843, 3: 494, 4: 340, 5: 29...",,,,,2906.153



Running K-Means for Year Range: 2017-2020
Runtime for k_means: 0.011878 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.012217 seconds
Runtime for k_means: 0.005909 seconds
Runtime for k_means: 0.033396 seconds
Runtime for k_means: 0.000000 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                                            2017-2020
k                                                             5
Runtime (s)                                               0.012
Clusters                                                      5
Points per Cluster    {0: 1089, 1: 695, 2: 242, 3: 705, 4: 560}
Davies-Bouldin                                             None
Calinski-Harabasz                                          None
Dunn Index                                                 None
Silhouette Score                                           None
Mean ANOVA F                                           6339.677
Name: 2, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2017-2020,3,0.012,3,"{0: 1902, 1: 1010, 2: 379}",,,,,4168.908
1,2017-2020,4,0.0,4,"{0: 1439, 1: 466, 2: 374, 3: 1012}",,,,,3345.543
2,2017-2020,5,0.012,5,"{0: 1089, 1: 695, 2: 242, 3: 705, 4: 560}",,,,,6339.677
3,2017-2020,6,0.006,6,"{0: 1497, 1: 215, 2: 295, 3: 393, 4: 579, 5: 312}",,,,,4109.749
4,2017-2020,7,0.033,7,"{0: 376, 1: 1089, 2: 204, 3: 242, 4: 560, 5: 5...",,,,,3713.071



Running K-Means for Year Range: 2018-2021
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.008412 seconds
Runtime for k_means: 0.023643 seconds
Runtime for k_means: 0.027280 seconds
Runtime for k_means: 0.016788 seconds
Runtime for k_means: 0.000747 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                            2018-2021
k                                             3
Runtime (s)                               0.000
Clusters                                      3
Points per Cluster    {0: 633, 1: 743, 2: 2066}
Davies-Bouldin                             None
Calinski-Harabasz                          None
Dunn Index                                 None
Silhouette Score                           None
Mean ANOVA F                          14777.081
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2018-2021,3,0.0,3,"{0: 633, 1: 743, 2: 2066}",,,,,14777.081
1,2018-2021,4,0.008,4,"{0: 1351, 1: 1283, 2: 438, 3: 370}",,,,,3174.747
2,2018-2021,5,0.024,5,"{0: 255, 1: 529, 2: 2061, 3: 197, 4: 400}",,,,,5509.657
3,2018-2021,6,0.027,6,"{0: 277, 1: 1283, 2: 722, 3: 432, 4: 357, 5: 371}",,,,,2769.495
4,2018-2021,7,0.017,7,"{0: 359, 1: 1285, 2: 426, 3: 386, 4: 525, 5: 2...",,,,,4073.793



Running K-Means for Year Range: 2019-2022
Runtime for k_means: 0.033334 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.002816 seconds
Runtime for k_means: 0.016274 seconds
Runtime for k_means: 0.033202 seconds
Runtime for k_means: 0.040421 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                                            2019-2022
k                                                             5
Runtime (s)                                               0.003
Clusters                                                      5
Points per Cluster    {0: 635, 1: 1707, 2: 446, 3: 563, 4: 258}
Davies-Bouldin                                             None
Calinski-Harabasz                                          None
Dunn Index                                                 None
Silhouette Score                                           None
Mean ANOVA F                                           6002.438
Name: 2, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2019-2022,3,0.033,3,"{0: 2286, 1: 352, 2: 971}",,,,,5694.683
1,2019-2022,4,0.0,4,"{0: 482, 1: 1302, 2: 509, 3: 1316}",,,,,3616.997
2,2019-2022,5,0.003,5,"{0: 635, 1: 1707, 2: 446, 3: 563, 4: 258}",,,,,6002.438
3,2019-2022,6,0.016,6,"{0: 1307, 1: 495, 2: 330, 3: 501, 4: 469, 5: 507}",,,,,5938.169
4,2019-2022,7,0.033,7,"{0: 1306, 1: 425, 2: 188, 3: 497, 4: 468, 5: 2...",,,,,4182.057



Running K-Means for Year Range: 2020-2023
Runtime for k_means: 0.016548 seconds
Runtime for k_means: 0.017481 seconds
Runtime for k_means: 0.033439 seconds
Runtime for k_means: 0.018434 seconds
Runtime for k_means: 0.040402 seconds
Runtime for k_means: 0.017027 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                             2020-2023
k                                              3
Runtime (s)                                0.017
Clusters                                       3
Points per Cluster    {0: 2775, 1: 1006, 2: 591}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                           17473.893
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2020-2023,3,0.017,3,"{0: 2775, 1: 1006, 2: 591}",,,,,17473.893
1,2020-2023,4,0.017,4,"{0: 1078, 1: 744, 2: 497, 3: 2053}",,,,,5112.742
2,2020-2023,5,0.033,5,"{0: 581, 1: 2040, 2: 412, 3: 741, 4: 598}",,,,,9115.729
3,2020-2023,6,0.018,6,"{0: 875, 1: 631, 2: 314, 3: 615, 4: 1547, 5: 390}",,,,,3447.406
4,2020-2023,7,0.04,7,"{0: 512, 1: 613, 2: 626, 3: 1544, 4: 210, 5: 4...",,,,,5204.487


# DBSCAN

In [3]:
import cluster_data_pca
from cluster_data_pca import run_clustering, normalize_data, unnormalize
import numpy as np
import pandas as pd
import os
from kmeans import k_means
from cluster_plotter import ClusterPlotter
from clustering_utils_pca import ClusterData
import cluster_plotter
from DBSCAN import dbscan_clustering
import high_dim_analysis
import seaborn as sns
import matplotlib.pyplot as plt

# Define year ranges
standard_year_ranges = {
    f"{start}-{start + 3}": np.arange(start, start + 4)
    for start in [2002, 2006, 2010, 2014, 2019]
}
standard_year_ranges["2019-2023"] = np.arange(2019, 2024)  # Include 2023
running_ranges = cluster_data_pca.generate_running_year_ranges(2002, 2023, 4)

binned_data = cluster_data_pca.bin_data_for_clustering(running_ranges, print_res=False)

results_per_year_range = {}

# Output directory for plots
plot_dir = r"Images\dbscan_tests_running_bins_pca"
os.makedirs(plot_dir, exist_ok=True)
cluster_plotter.clear_directory(plot_dir)

for cluster_data, year_range in binned_data:  
    print(f"\nRunning DBSCAN for Year Range: {year_range}")
    data_array = np.array([cluster_data.ecc, cluster_data.mag_obj, cluster_data.sem_maj, cluster_data.diameter, cluster_data.inc, cluster_data.raan]).T
    feature_names = ["Eccentricity e", "Object magnitude [mag]", "Semi major axis [km]", "Diameter [m]", "Inclination [°]", "RAAN [°]"]

    normalized_data, data_min, data_max = normalize_data(data_array)

    # DBSCAN parameter ranges
    eps_values = [0.02, 0.01, 0.015]
    min_samples_values = [10, 15, 25, 30]

    results = []

    for eps in eps_values:
        for min_samples in min_samples_values:
            result_dbscan, time_dbscan, n_clusters_dbscan, points_per_cluster_dbscan, metrics_dbscan = run_clustering(
                dbscan_clustering, "DBSCAN", normalized_data, data_min, data_max, eps=eps, min_samples=min_samples
            )

            unnormalized_data, _ = unnormalize(
                result_dbscan.data, None, data_min, data_max
            )
            # Count noise points
            noise_points = np.sum(result_dbscan.labels == -1)

            results.append({
                "Year Range": year_range,
                "eps": eps,
                "min_samples": min_samples,
                "Runtime (s)": f"{time_dbscan:.3f}",
                "Clusters": n_clusters_dbscan,
                "Points per Cluster": points_per_cluster_dbscan,
                "Noise Points": noise_points,
                "Davies-Bouldin": f"{metrics_dbscan[0]:.3f}" if metrics_dbscan and isinstance(metrics_dbscan[0], (int, float)) else None,
                "Calinski-Harabasz": f"{metrics_dbscan[1]:.3f}" if metrics_dbscan and isinstance(metrics_dbscan[1], (int, float)) else None
            })


            labels = result_dbscan.labels
            """high_dim_analysis.plot_co_membership(
                labels,
                output_folder=plot_dir,
                filename=f"co_membership_{year_range}_eps{eps}_minsamples_{min_samples}.png"
            )"""
            high_dim_analysis.plot_correlation_heatmap(
                cluster_data,
                output_folder=plot_dir,
                filename=f"corr_{year_range}_eps{eps}_minsamples_{min_samples}.png"
            )
            high_dim_analysis.plot_mutual_information(
                cluster_data,
                labels,
                output_folder=plot_dir,
                filename=f"mi_{year_range}_eps{eps}_minsamples_{min_samples}.png"
            )
            
        from sklearn.feature_selection import f_classif
        X = np.array([cluster_data.ecc, cluster_data.mag_obj, cluster_data.sem_maj,
              cluster_data.diameter, cluster_data.inc, cluster_data.raan]).T
        valid = labels != -1  # Exclude noise points
        if np.any(valid) and len(np.unique(labels[valid])) > 1:
            f_vals, _ = f_classif(X[valid], labels[valid])
            weights = np.var(X[valid], axis=0)
            weighted_mean_f = np.average(f_vals, weights=weights)
        else:
            weighted_mean_f = 0.0  # Avoid invalid or single-cluster cases

        results[-1]["Weighted Mean ANOVA F"] = f"{weighted_mean_f:.3f}"

    df = pd.DataFrame(results)

    df["Weighted Mean ANOVA F"] = pd.to_numeric(df["Weighted Mean ANOVA F"], errors="coerce")
    df_sorted = df.sort_values(by="Weighted Mean ANOVA F", ascending=False)
    best_params = df_sorted.iloc[0]
    best_eps = best_params["eps"]
    best_min_samples = best_params["min_samples"]

    # Re-run clustering with best parameters to get labels
    best_result, _, _, _, _ = run_clustering(
        dbscan_clustering, "DBSCAN", normalized_data, data_min, data_max,
        eps=best_eps, min_samples=best_min_samples
    )
    best_labels = best_result.labels

    df_plot = pd.DataFrame(data_array, columns=feature_names)
    df_plot['cluster'] = best_labels.astype(str)

    pairplot = sns.pairplot(df_plot, hue='cluster', diag_kind='kde', plot_kws={'alpha':0.6, 's':40})
    pairplot.fig.suptitle(f"DBSCAN Clusters for Year Range {year_range}", y=1.02)

    # Save the figure to the plot directory
    pairplot.savefig(os.path.join(plot_dir, f"pairplot_{year_range}_eps{best_eps}_minsamples_{best_min_samples}.png"))

    plt.close(pairplot.fig)  # Close the figure to free memory


    # Plot ANOVA F-values only for best result
    high_dim_analysis.plot_anova_f_values(
        cluster_data,
        best_labels,
        output_folder=plot_dir,
        filename=f"anova_best_{year_range}_eps{best_eps}_minsamples_{best_min_samples}.png",
        title=f"ANOVA F-values (Best DBSCAN Parameters), eps = {float(best_eps)}, minsamples = {float(best_min_samples)}"
    )

    print("Best DBSCAN parameters based on ANOVA F-value:")
    print(best_params)
    display(df)



Running DBSCAN for Year Range: 2002-2005
Runtime for dbscan_clustering: 0.160578 seconds
Runtime for dbscan_clustering: 0.160060 seconds
Runtime for dbscan_clustering: 0.174719 seconds
Runtime for dbscan_clustering: 0.159367 seconds
Runtime for dbscan_clustering: 0.129251 seconds
Runtime for dbscan_clustering: 0.165635 seconds
Runtime for dbscan_clustering: 0.124753 seconds
Runtime for dbscan_clustering: 0.139642 seconds
Runtime for dbscan_clustering: 0.158799 seconds
Runtime for dbscan_clustering: 0.161056 seconds
Runtime for dbscan_clustering: 0.127797 seconds
Runtime for dbscan_clustering: 0.142746 seconds
Runtime for dbscan_clustering: 0.174946 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                     2002-2005
eps                                                                 0.02
min_samples                                                           30
Runtime (s)                                                        0.159
Clusters                                                               6
Points per Cluster       {0: 122, 1: 314, 2: 96, 3: 75, 4: 46, -1: 7384}
Noise Points                                                        7384
Davies-Bouldin                                                      None
Calinski-Harabasz                                                   None
Weighted Mean ANOVA F                                            112.751
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2002-2005,0.02,10,0.161,28,"{0: 21, 1: 28, 2: 37, 3: 1009, 4: 17, 5: 141, ...",6159,,,
1,2002-2005,0.02,15,0.16,15,"{0: 15, 1: 21, 2: 210, 3: 717, 4: 13, 5: 25, 6...",6625,,,
2,2002-2005,0.02,25,0.175,6,"{0: 557, 1: 101, 2: 66, 3: 64, 4: 40, -1: 7209}",7209,,,
3,2002-2005,0.02,30,0.159,6,"{0: 122, 1: 314, 2: 96, 3: 75, 4: 46, -1: 7384}",7384,,,112.751
4,2002-2005,0.01,10,0.129,3,"{0: 33, 1: 22, -1: 7982}",7982,,,
5,2002-2005,0.01,15,0.166,2,"{0: 31, -1: 8006}",8006,,,
6,2002-2005,0.01,25,0.125,1,{-1: 8037},8037,,,
7,2002-2005,0.01,30,0.14,1,{-1: 8037},8037,,,0.0
8,2002-2005,0.015,10,0.159,25,"{0: 11, 1: 16, 2: 292, 3: 94, 4: 44, 5: 80, 6:...",7115,,,
9,2002-2005,0.015,15,0.161,8,"{0: 75, 1: 232, 2: 71, 3: 15, 4: 17, 5: 16, 6:...",7587,,,



Running DBSCAN for Year Range: 2003-2006
Runtime for dbscan_clustering: 0.241416 seconds
Runtime for dbscan_clustering: 0.202453 seconds
Runtime for dbscan_clustering: 0.223565 seconds
Runtime for dbscan_clustering: 0.191605 seconds
Runtime for dbscan_clustering: 0.143635 seconds
Runtime for dbscan_clustering: 0.159599 seconds
Runtime for dbscan_clustering: 0.146052 seconds
Runtime for dbscan_clustering: 0.159258 seconds
Runtime for dbscan_clustering: 0.169363 seconds
Runtime for dbscan_clustering: 0.190180 seconds
Runtime for dbscan_clustering: 0.174931 seconds
Runtime for dbscan_clustering: 0.174240 seconds
Runtime for dbscan_clustering: 0.223315 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2003-2006
eps                                                                   0.02
min_samples                                                             30
Runtime (s)                                                          0.192
Clusters                                                                 8
Points per Cluster       {0: 218, 1: 335, 2: 86, 3: 45, 4: 63, 5: 69, 6...
Noise Points                                                          8001
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                              160.712
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2003-2006,0.02,10,0.241,22,"{0: 123, 1: 534, 2: 870, 3: 413, 4: 23, 5: 10,...",6606,,,
1,2003-2006,0.02,15,0.202,16,"{0: 316, 1: 217, 2: 121, 3: 710, 4: 89, 5: 30,...",7212,,,
2,2003-2006,0.02,25,0.224,9,"{0: 90, 1: 89, 2: 275, 3: 347, 4: 76, 5: 91, 6...",7804,,,
3,2003-2006,0.02,30,0.192,8,"{0: 218, 1: 335, 2: 86, 3: 45, 4: 63, 5: 69, 6...",8001,,,160.712
4,2003-2006,0.01,10,0.144,7,"{0: 25, 1: 14, 2: 28, 3: 19, 4: 22, 5: 10, -1:...",8761,,,
5,2003-2006,0.01,15,0.16,4,"{0: 20, 1: 15, 2: 22, -1: 8822}",8822,,,
6,2003-2006,0.01,25,0.146,1,{-1: 8879},8879,,,
7,2003-2006,0.01,30,0.159,1,{-1: 8879},8879,,,0.0
8,2003-2006,0.015,10,0.169,28,"{0: 59, 1: 69, 2: 29, 3: 292, 4: 202, 5: 15, 6...",7777,,,
9,2003-2006,0.015,15,0.19,10,"{0: 64, 1: 88, 2: 236, 3: 15, 4: 15, 5: 43, 6:...",8316,,,



Running DBSCAN for Year Range: 2004-2007
Runtime for dbscan_clustering: 0.223533 seconds
Runtime for dbscan_clustering: 0.192119 seconds
Runtime for dbscan_clustering: 0.191704 seconds
Runtime for dbscan_clustering: 0.244970 seconds
Runtime for dbscan_clustering: 0.127513 seconds
Runtime for dbscan_clustering: 0.159004 seconds
Runtime for dbscan_clustering: 0.141413 seconds
Runtime for dbscan_clustering: 0.143574 seconds
Runtime for dbscan_clustering: 0.157755 seconds
Runtime for dbscan_clustering: 0.159350 seconds
Runtime for dbscan_clustering: 0.158878 seconds
Runtime for dbscan_clustering: 0.145040 seconds
Runtime for dbscan_clustering: 0.210081 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2004-2007
eps                                                                   0.02
min_samples                                                             30
Runtime (s)                                                          0.245
Clusters                                                                 8
Points per Cluster       {0: 107, 1: 382, 2: 274, 3: 107, 4: 63, 5: 88,...
Noise Points                                                          7481
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                              199.833
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2004-2007,0.02,10,0.224,20,"{0: 37, 1: 962, 2: 30, 3: 1341, 4: 18, 5: 9, 6...",5934,,,
1,2004-2007,0.02,15,0.192,14,"{0: 589, 1: 17, 2: 976, 3: 16, 4: 122, 5: 44, ...",6567,,,
2,2004-2007,0.02,25,0.192,7,"{0: 135, 1: 117, 2: 696, 3: 84, 4: 134, 5: 68,...",7315,,,
3,2004-2007,0.02,30,0.245,8,"{0: 107, 1: 382, 2: 274, 3: 107, 4: 63, 5: 88,...",7481,,,199.833
4,2004-2007,0.01,10,0.128,8,"{0: 27, 1: 10, 2: 29, 3: 21, 4: 15, 5: 14, 6: ...",8426,,,
5,2004-2007,0.01,15,0.159,4,"{0: 19, 1: 15, 2: 22, -1: 8493}",8493,,,
6,2004-2007,0.01,25,0.141,1,{-1: 8549},8549,,,
7,2004-2007,0.01,30,0.144,1,{-1: 8549},8549,,,0.0
8,2004-2007,0.015,10,0.158,26,"{0: 97, 1: 10, 2: 80, 3: 563, 4: 10, 5: 18, 6:...",7303,,,
9,2004-2007,0.015,15,0.159,12,"{0: 248, 1: 68, 2: 55, 3: 40, 4: 148, 5: 25, 6...",7862,,,



Running DBSCAN for Year Range: 2005-2008
Runtime for dbscan_clustering: 0.171910 seconds
Runtime for dbscan_clustering: 0.185038 seconds
Runtime for dbscan_clustering: 0.158425 seconds
Runtime for dbscan_clustering: 0.170013 seconds
Runtime for dbscan_clustering: 0.126379 seconds
Runtime for dbscan_clustering: 0.143130 seconds
Runtime for dbscan_clustering: 0.126447 seconds
Runtime for dbscan_clustering: 0.126706 seconds
Runtime for dbscan_clustering: 0.158778 seconds
Runtime for dbscan_clustering: 0.158517 seconds
Runtime for dbscan_clustering: 0.158663 seconds
Runtime for dbscan_clustering: 0.130270 seconds
Runtime for dbscan_clustering: 0.159914 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2005-2008
eps                                                                   0.02
min_samples                                                             30
Runtime (s)                                                          0.170
Clusters                                                                 8
Points per Cluster       {0: 127, 1: 672, 2: 105, 3: 47, 4: 118, 5: 59,...
Noise Points                                                          6463
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                              176.523
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2005-2008,0.02,10,0.172,22,"{0: 1002, 1: 19, 2: 1232, 3: 29, 4: 16, 5: 18,...",5097,,,
1,2005-2008,0.02,15,0.185,16,"{0: 49, 1: 56, 2: 619, 3: 15, 4: 781, 5: 127, ...",5617,,,
2,2005-2008,0.02,25,0.158,8,"{0: 295, 1: 114, 2: 711, 3: 77, 4: 72, 5: 45, ...",6280,,,
3,2005-2008,0.02,30,0.17,8,"{0: 127, 1: 672, 2: 105, 3: 47, 4: 118, 5: 59,...",6463,,,176.523
4,2005-2008,0.01,10,0.126,9,"{0: 26, 1: 29, 2: 22, 3: 16, 4: 14, 5: 7, 6: 1...",7487,,,
5,2005-2008,0.01,15,0.143,4,"{0: 19, 1: 15, 2: 20, -1: 7567}",7567,,,
6,2005-2008,0.01,25,0.126,1,{-1: 7621},7621,,,
7,2005-2008,0.01,30,0.127,1,{-1: 7621},7621,,,0.0
8,2005-2008,0.015,10,0.159,28,"{0: 31, 1: 111, 2: 144, 3: 13, 4: 9, 5: 593, 6...",6281,,,
9,2005-2008,0.015,15,0.159,13,"{0: 60, 1: 465, 2: 49, 3: 39, 4: 43, 5: 11, 6:...",6792,,,



Running DBSCAN for Year Range: 2006-2009
Runtime for dbscan_clustering: 0.175017 seconds
Runtime for dbscan_clustering: 0.214586 seconds
Runtime for dbscan_clustering: 0.160275 seconds
Runtime for dbscan_clustering: 0.173100 seconds
Runtime for dbscan_clustering: 0.114866 seconds
Runtime for dbscan_clustering: 0.113177 seconds
Runtime for dbscan_clustering: 0.126739 seconds
Runtime for dbscan_clustering: 0.111054 seconds
Runtime for dbscan_clustering: 0.127825 seconds
Runtime for dbscan_clustering: 0.158998 seconds
Runtime for dbscan_clustering: 0.143099 seconds
Runtime for dbscan_clustering: 0.142965 seconds
Runtime for dbscan_clustering: 0.158993 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2006-2009
eps                                                                   0.02
min_samples                                                             30
Runtime (s)                                                          0.173
Clusters                                                                 8
Points per Cluster       {0: 97, 1: 112, 2: 65, 3: 583, 4: 45, 5: 45, 6...
Noise Points                                                          6495
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                              199.532
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2006-2009,0.02,10,0.175,19,"{0: 946, 1: 1136, 2: 12, 3: 7, 4: 36, 5: 11, 6...",5179,,,
1,2006-2009,0.02,15,0.215,9,"{0: 22, 1: 48, 2: 697, 3: 843, 4: 100, 5: 33, ...",5692,,,
2,2006-2009,0.02,25,0.16,7,"{0: 322, 1: 92, 2: 639, 3: 54, 4: 27, 5: 62, -...",6283,,,
3,2006-2009,0.02,30,0.173,8,"{0: 97, 1: 112, 2: 65, 3: 583, 4: 45, 5: 45, 6...",6495,,,199.532
4,2006-2009,0.01,10,0.115,10,"{0: 16, 1: 13, 2: 14, 3: 17, 4: 10, 5: 12, 6: ...",7357,,,
5,2006-2009,0.01,15,0.113,1,{-1: 7479},7479,,,
6,2006-2009,0.01,25,0.127,1,{-1: 7479},7479,,,
7,2006-2009,0.01,30,0.111,1,{-1: 7479},7479,,,0.0
8,2006-2009,0.015,10,0.128,24,"{0: 16, 1: 183, 2: 108, 3: 59, 4: 17, 5: 517, ...",6310,,,
9,2006-2009,0.015,15,0.159,13,"{0: 43, 1: 49, 2: 213, 3: 154, 4: 21, 5: 36, 6...",6790,,,



Running DBSCAN for Year Range: 2007-2010
Runtime for dbscan_clustering: 0.143230 seconds
Runtime for dbscan_clustering: 0.165638 seconds
Runtime for dbscan_clustering: 0.169132 seconds
Runtime for dbscan_clustering: 0.135738 seconds
Runtime for dbscan_clustering: 0.094777 seconds
Runtime for dbscan_clustering: 0.079906 seconds
Runtime for dbscan_clustering: 0.090197 seconds
Runtime for dbscan_clustering: 0.079645 seconds
Runtime for dbscan_clustering: 0.110666 seconds
Runtime for dbscan_clustering: 0.095387 seconds
Runtime for dbscan_clustering: 0.095489 seconds
Runtime for dbscan_clustering: 0.111582 seconds
Runtime for dbscan_clustering: 0.112164 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                             2007-2010
eps                                                         0.02
min_samples                                                   30
Runtime (s)                                                0.136
Clusters                                                       5
Points per Cluster       {0: 58, 1: 445, 2: 30, 3: 35, -1: 5118}
Noise Points                                                5118
Davies-Bouldin                                              None
Calinski-Harabasz                                           None
Weighted Mean ANOVA F                                     207.73
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2007-2010,0.02,10,0.143,14,"{0: 601, 1: 54, 2: 769, 3: 15, 4: 32, 5: 18, 6...",4070,,,
1,2007-2010,0.02,15,0.166,6,"{0: 469, 1: 639, 2: 63, 3: 22, 4: 16, -1: 4477}",4477,,,
2,2007-2010,0.02,25,0.169,6,"{0: 66, 1: 105, 2: 45, 3: 473, 4: 42, -1: 4955}",4955,,,
3,2007-2010,0.02,30,0.136,5,"{0: 58, 1: 445, 2: 30, 3: 35, -1: 5118}",5118,,,207.73
4,2007-2010,0.01,10,0.095,5,"{0: 10, 1: 13, 2: 18, 3: 10, -1: 5635}",5635,,,
5,2007-2010,0.01,15,0.08,2,"{0: 15, -1: 5671}",5671,,,
6,2007-2010,0.01,25,0.09,1,{-1: 5686},5686,,,
7,2007-2010,0.01,30,0.08,1,{-1: 5686},5686,,,0.0
8,2007-2010,0.015,10,0.111,18,"{0: 68, 1: 63, 2: 52, 3: 277, 4: 99, 5: 29, 6:...",4950,,,
9,2007-2010,0.015,15,0.095,8,"{0: 36, 1: 29, 2: 77, 3: 156, 4: 41, 5: 22, 6:...",5310,,,



Running DBSCAN for Year Range: 2008-2011
Runtime for dbscan_clustering: 0.095670 seconds
Runtime for dbscan_clustering: 0.095470 seconds
Runtime for dbscan_clustering: 0.079959 seconds
Runtime for dbscan_clustering: 0.095812 seconds
Runtime for dbscan_clustering: 0.064003 seconds
Runtime for dbscan_clustering: 0.063295 seconds
Runtime for dbscan_clustering: 0.048075 seconds
Runtime for dbscan_clustering: 0.063282 seconds
Runtime for dbscan_clustering: 0.079700 seconds
Runtime for dbscan_clustering: 0.063813 seconds
Runtime for dbscan_clustering: 0.079862 seconds
Runtime for dbscan_clustering: 0.064437 seconds
Runtime for dbscan_clustering: 0.080182 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                        2008-2011
eps                                    0.02
min_samples                              30
Runtime (s)                           0.096
Clusters                                  2
Points per Cluster       {0: 130, -1: 3934}
Noise Points                           3934
Davies-Bouldin                         None
Calinski-Harabasz                      None
Weighted Mean ANOVA F                   0.0
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2008-2011,0.02,10,0.096,16,"{0: 307, 1: 482, 2: 28, 3: 53, 4: 13, 5: 22, 6...",3030,,,
1,2008-2011,0.02,15,0.095,7,"{0: 189, 1: 428, 2: 19, 3: 47, 4: 16, 5: 16, -...",3349,,,
2,2008-2011,0.02,25,0.08,5,"{0: 180, 1: 39, 2: 25, 3: 27, -1: 3793}",3793,,,
3,2008-2011,0.02,30,0.096,2,"{0: 130, -1: 3934}",3934,,,0.0
4,2008-2011,0.01,10,0.064,2,"{0: 16, -1: 4048}",4048,,,
5,2008-2011,0.01,15,0.063,1,{-1: 4064},4064,,,
6,2008-2011,0.01,25,0.048,1,{-1: 4064},4064,,,
7,2008-2011,0.01,30,0.063,1,{-1: 4064},4064,,,0.0
8,2008-2011,0.015,10,0.08,15,"{0: 18, 1: 87, 2: 36, 3: 42, 4: 32, 5: 16, 6: ...",3691,,,
9,2008-2011,0.015,15,0.064,5,"{0: 68, 1: 19, 2: 18, 3: 17, -1: 3942}",3942,,,



Running DBSCAN for Year Range: 2009-2012
Runtime for dbscan_clustering: 0.072976 seconds
Runtime for dbscan_clustering: 0.068640 seconds
Runtime for dbscan_clustering: 0.047821 seconds
Runtime for dbscan_clustering: 0.050880 seconds
Runtime for dbscan_clustering: 0.047828 seconds
Runtime for dbscan_clustering: 0.047690 seconds
Runtime for dbscan_clustering: 0.048083 seconds
Runtime for dbscan_clustering: 0.047431 seconds
Runtime for dbscan_clustering: 0.064108 seconds
Runtime for dbscan_clustering: 0.047484 seconds
Runtime for dbscan_clustering: 0.047609 seconds
Runtime for dbscan_clustering: 0.046692 seconds
Runtime for dbscan_clustering: 0.047689 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                       2009-2012
eps                                   0.02
min_samples                             30
Runtime (s)                          0.051
Clusters                                 2
Points per Cluster       {0: 58, -1: 3502}
Noise Points                          3502
Davies-Bouldin                        None
Calinski-Harabasz                     None
Weighted Mean ANOVA F                  0.0
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2009-2012,0.02,10,0.073,15,"{0: 164, 1: 402, 2: 13, 3: 33, 4: 19, 5: 10, 6...",2798,,,
1,2009-2012,0.02,15,0.069,8,"{0: 111, 1: 51, 2: 198, 3: 74, 4: 25, 5: 10, 6...",3066,,,
2,2009-2012,0.02,25,0.048,2,"{0: 111, -1: 3449}",3449,,,
3,2009-2012,0.02,30,0.051,2,"{0: 58, -1: 3502}",3502,,,0.0
4,2009-2012,0.01,10,0.048,1,{-1: 3560},3560,,,
5,2009-2012,0.01,15,0.048,1,{-1: 3560},3560,,,
6,2009-2012,0.01,25,0.048,1,{-1: 3560},3560,,,
7,2009-2012,0.01,30,0.047,1,{-1: 3560},3560,,,0.0
8,2009-2012,0.015,10,0.064,14,"{0: 19, 1: 73, 2: 11, 3: 17, 4: 15, 5: 25, 6: ...",3297,,,
9,2009-2012,0.015,15,0.047,2,"{0: 43, -1: 3517}",3517,,,



Running DBSCAN for Year Range: 2010-2013
Runtime for dbscan_clustering: 0.028104 seconds
Runtime for dbscan_clustering: 0.021654 seconds
Runtime for dbscan_clustering: 0.022900 seconds
Runtime for dbscan_clustering: 0.030241 seconds
Runtime for dbscan_clustering: 0.015787 seconds
Runtime for dbscan_clustering: 0.009022 seconds
Runtime for dbscan_clustering: 0.015655 seconds
Runtime for dbscan_clustering: 0.014213 seconds
Runtime for dbscan_clustering: 0.015700 seconds
Runtime for dbscan_clustering: 0.015657 seconds
Runtime for dbscan_clustering: 0.015947 seconds
Runtime for dbscan_clustering: 0.015810 seconds
Runtime for dbscan_clustering: 0.015894 seconds


  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                2010-2013
eps                            0.02
min_samples                      30
Runtime (s)                   0.030
Clusters                          1
Points per Cluster       {-1: 1579}
Noise Points                   1579
Davies-Bouldin                 None
Calinski-Harabasz              None
Weighted Mean ANOVA F           0.0
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2010-2013,0.02,10,0.028,7,"{0: 10, 1: 17, 2: 35, 3: 19, 4: 24, 5: 14, -1:...",1460,,,
1,2010-2013,0.02,15,0.022,2,"{0: 26, -1: 1553}",1553,,,
2,2010-2013,0.02,25,0.023,1,{-1: 1579},1579,,,
3,2010-2013,0.02,30,0.03,1,{-1: 1579},1579,,,0.0
4,2010-2013,0.01,10,0.016,1,{-1: 1579},1579,,,
5,2010-2013,0.01,15,0.009,1,{-1: 1579},1579,,,
6,2010-2013,0.01,25,0.016,1,{-1: 1579},1579,,,
7,2010-2013,0.01,30,0.014,1,{-1: 1579},1579,,,0.0
8,2010-2013,0.015,10,0.016,4,"{0: 15, 1: 11, 2: 10, -1: 1543}",1543,,,
9,2010-2013,0.015,15,0.016,1,{-1: 1579},1579,,,



Running DBSCAN for Year Range: 2011-2014
Runtime for dbscan_clustering: 0.010266 seconds
Runtime for dbscan_clustering: 0.015643 seconds
Runtime for dbscan_clustering: 0.015658 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.015643 seconds
Runtime for dbscan_clustering: 0.006754 seconds
Runtime for dbscan_clustering: 0.016008 seconds
Runtime for dbscan_clustering: 0.016146 seconds
Runtime for dbscan_clustering: 0.007802 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.015657 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.000000 seconds


  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range               2011-2014
eps                           0.02
min_samples                     30
Runtime (s)                  0.000
Clusters                         1
Points per Cluster       {-1: 866}
Noise Points                   866
Davies-Bouldin                None
Calinski-Harabasz             None
Weighted Mean ANOVA F          0.0
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2011-2014,0.02,10,0.01,3,"{0: 13, 1: 16, -1: 837}",837,,,
1,2011-2014,0.02,15,0.016,1,{-1: 866},866,,,
2,2011-2014,0.02,25,0.016,1,{-1: 866},866,,,
3,2011-2014,0.02,30,0.0,1,{-1: 866},866,,,0.0
4,2011-2014,0.01,10,0.016,1,{-1: 866},866,,,
5,2011-2014,0.01,15,0.007,1,{-1: 866},866,,,
6,2011-2014,0.01,25,0.016,1,{-1: 866},866,,,
7,2011-2014,0.01,30,0.016,1,{-1: 866},866,,,0.0
8,2011-2014,0.015,10,0.008,2,"{0: 10, -1: 856}",856,,,
9,2011-2014,0.015,15,0.0,1,{-1: 866},866,,,



Running DBSCAN for Year Range: 2012-2015
Runtime for dbscan_clustering: 0.016745 seconds
Runtime for dbscan_clustering: 0.015667 seconds
Runtime for dbscan_clustering: 0.015716 seconds
Runtime for dbscan_clustering: 0.016001 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.009548 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.015879 seconds
Runtime for dbscan_clustering: 0.015725 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.015772 seconds
Runtime for dbscan_clustering: 0.015756 seconds


  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range               2012-2015
eps                           0.02
min_samples                     30
Runtime (s)                  0.016
Clusters                         1
Points per Cluster       {-1: 870}
Noise Points                   870
Davies-Bouldin                None
Calinski-Harabasz             None
Weighted Mean ANOVA F          0.0
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2012-2015,0.02,10,0.017,1,{-1: 870},870,,,
1,2012-2015,0.02,15,0.016,1,{-1: 870},870,,,
2,2012-2015,0.02,25,0.016,1,{-1: 870},870,,,
3,2012-2015,0.02,30,0.016,1,{-1: 870},870,,,0.0
4,2012-2015,0.01,10,0.0,1,{-1: 870},870,,,
5,2012-2015,0.01,15,0.0,1,{-1: 870},870,,,
6,2012-2015,0.01,25,0.01,1,{-1: 870},870,,,
7,2012-2015,0.01,30,0.0,1,{-1: 870},870,,,0.0
8,2012-2015,0.015,10,0.016,1,{-1: 870},870,,,
9,2012-2015,0.015,15,0.016,1,{-1: 870},870,,,



Running DBSCAN for Year Range: 2013-2016
Runtime for dbscan_clustering: 0.031734 seconds
Runtime for dbscan_clustering: 0.031523 seconds
Runtime for dbscan_clustering: 0.015673 seconds
Runtime for dbscan_clustering: 0.015952 seconds
Runtime for dbscan_clustering: 0.016216 seconds
Runtime for dbscan_clustering: 0.024993 seconds
Runtime for dbscan_clustering: 0.016144 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.015736 seconds
Runtime for dbscan_clustering: 0.015899 seconds
Runtime for dbscan_clustering: 0.015678 seconds
Runtime for dbscan_clustering: 0.015657 seconds
Runtime for dbscan_clustering: 0.015730 seconds


  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                2013-2016
eps                            0.02
min_samples                      30
Runtime (s)                   0.016
Clusters                          1
Points per Cluster       {-1: 1327}
Noise Points                   1327
Davies-Bouldin                 None
Calinski-Harabasz              None
Weighted Mean ANOVA F           0.0
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2013-2016,0.02,10,0.032,1,{-1: 1327},1327,,,
1,2013-2016,0.02,15,0.032,1,{-1: 1327},1327,,,
2,2013-2016,0.02,25,0.016,1,{-1: 1327},1327,,,
3,2013-2016,0.02,30,0.016,1,{-1: 1327},1327,,,0.0
4,2013-2016,0.01,10,0.016,1,{-1: 1327},1327,,,
5,2013-2016,0.01,15,0.025,1,{-1: 1327},1327,,,
6,2013-2016,0.01,25,0.016,1,{-1: 1327},1327,,,
7,2013-2016,0.01,30,0.0,1,{-1: 1327},1327,,,0.0
8,2013-2016,0.015,10,0.016,1,{-1: 1327},1327,,,
9,2013-2016,0.015,15,0.016,1,{-1: 1327},1327,,,



Running DBSCAN for Year Range: 2014-2017
Runtime for dbscan_clustering: 0.016155 seconds
Runtime for dbscan_clustering: 0.031633 seconds
Runtime for dbscan_clustering: 0.035055 seconds
Runtime for dbscan_clustering: 0.025503 seconds
Runtime for dbscan_clustering: 0.031592 seconds
Runtime for dbscan_clustering: 0.015802 seconds
Runtime for dbscan_clustering: 0.015649 seconds
Runtime for dbscan_clustering: 0.015682 seconds
Runtime for dbscan_clustering: 0.017693 seconds
Runtime for dbscan_clustering: 0.032112 seconds
Runtime for dbscan_clustering: 0.015799 seconds
Runtime for dbscan_clustering: 0.015887 seconds
Runtime for dbscan_clustering: 0.016224 seconds


  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                2014-2017
eps                            0.02
min_samples                      30
Runtime (s)                   0.026
Clusters                          1
Points per Cluster       {-1: 1897}
Noise Points                   1897
Davies-Bouldin                 None
Calinski-Harabasz              None
Weighted Mean ANOVA F           0.0
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2014-2017,0.02,10,0.016,2,"{0: 14, -1: 1883}",1883,,,
1,2014-2017,0.02,15,0.032,1,{-1: 1897},1897,,,
2,2014-2017,0.02,25,0.035,1,{-1: 1897},1897,,,
3,2014-2017,0.02,30,0.026,1,{-1: 1897},1897,,,0.0
4,2014-2017,0.01,10,0.032,1,{-1: 1897},1897,,,
5,2014-2017,0.01,15,0.016,1,{-1: 1897},1897,,,
6,2014-2017,0.01,25,0.016,1,{-1: 1897},1897,,,
7,2014-2017,0.01,30,0.016,1,{-1: 1897},1897,,,0.0
8,2014-2017,0.015,10,0.018,1,{-1: 1897},1897,,,
9,2014-2017,0.015,15,0.032,1,{-1: 1897},1897,,,



Running DBSCAN for Year Range: 2015-2018
Runtime for dbscan_clustering: 0.028063 seconds
Runtime for dbscan_clustering: 0.048303 seconds
Runtime for dbscan_clustering: 0.048140 seconds
Runtime for dbscan_clustering: 0.032773 seconds
Runtime for dbscan_clustering: 0.015918 seconds
Runtime for dbscan_clustering: 0.032132 seconds
Runtime for dbscan_clustering: 0.031874 seconds
Runtime for dbscan_clustering: 0.032154 seconds
Runtime for dbscan_clustering: 0.031747 seconds
Runtime for dbscan_clustering: 0.032317 seconds
Runtime for dbscan_clustering: 0.032069 seconds
Runtime for dbscan_clustering: 0.031035 seconds
Runtime for dbscan_clustering: 0.032074 seconds


  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                2015-2018
eps                            0.02
min_samples                      30
Runtime (s)                   0.033
Clusters                          1
Points per Cluster       {-1: 2642}
Noise Points                   2642
Davies-Bouldin                 None
Calinski-Harabasz              None
Weighted Mean ANOVA F           0.0
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2015-2018,0.02,10,0.028,3,"{0: 12, 1: 18, -1: 2612}",2612,,,
1,2015-2018,0.02,15,0.048,2,"{0: 15, -1: 2627}",2627,,,
2,2015-2018,0.02,25,0.048,1,{-1: 2642},2642,,,
3,2015-2018,0.02,30,0.033,1,{-1: 2642},2642,,,0.0
4,2015-2018,0.01,10,0.016,1,{-1: 2642},2642,,,
5,2015-2018,0.01,15,0.032,1,{-1: 2642},2642,,,
6,2015-2018,0.01,25,0.032,1,{-1: 2642},2642,,,
7,2015-2018,0.01,30,0.032,1,{-1: 2642},2642,,,0.0
8,2015-2018,0.015,10,0.032,2,"{0: 13, -1: 2629}",2629,,,
9,2015-2018,0.015,15,0.032,1,{-1: 2642},2642,,,



Running DBSCAN for Year Range: 2016-2019
Runtime for dbscan_clustering: 0.048177 seconds
Runtime for dbscan_clustering: 0.031731 seconds
Runtime for dbscan_clustering: 0.031709 seconds
Runtime for dbscan_clustering: 0.044794 seconds
Runtime for dbscan_clustering: 0.029867 seconds
Runtime for dbscan_clustering: 0.039938 seconds
Runtime for dbscan_clustering: 0.032376 seconds
Runtime for dbscan_clustering: 0.032068 seconds
Runtime for dbscan_clustering: 0.031924 seconds
Runtime for dbscan_clustering: 0.031651 seconds
Runtime for dbscan_clustering: 0.045534 seconds
Runtime for dbscan_clustering: 0.047940 seconds
Runtime for dbscan_clustering: 0.031708 seconds
Best DBSCAN parameters based on ANOVA F-value:
Year Range                2016-2019
eps                            0.02
min_samples                      30
Runtime (s)                   0.045
Clusters                          1
Points per Cluster       {-1: 2710}
Noise Points                   2710
Davies-Bouldin                 None

  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2016-2019,0.02,10,0.048,6,"{0: 12, 1: 13, 2: 23, 3: 13, 4: 10, -1: 2639}",2639,,,
1,2016-2019,0.02,15,0.032,2,"{0: 16, -1: 2694}",2694,,,
2,2016-2019,0.02,25,0.032,1,{-1: 2710},2710,,,
3,2016-2019,0.02,30,0.045,1,{-1: 2710},2710,,,0.0
4,2016-2019,0.01,10,0.03,1,{-1: 2710},2710,,,
5,2016-2019,0.01,15,0.04,1,{-1: 2710},2710,,,
6,2016-2019,0.01,25,0.032,1,{-1: 2710},2710,,,
7,2016-2019,0.01,30,0.032,1,{-1: 2710},2710,,,0.0
8,2016-2019,0.015,10,0.032,3,"{0: 10, 1: 10, -1: 2690}",2690,,,
9,2016-2019,0.015,15,0.032,1,{-1: 2710},2710,,,



Running DBSCAN for Year Range: 2017-2020
Runtime for dbscan_clustering: 0.016134 seconds
Runtime for dbscan_clustering: 0.015832 seconds
Runtime for dbscan_clustering: 0.015938 seconds
Runtime for dbscan_clustering: 0.025199 seconds
Runtime for dbscan_clustering: 0.019720 seconds
Runtime for dbscan_clustering: 0.015721 seconds
Runtime for dbscan_clustering: 0.015723 seconds
Runtime for dbscan_clustering: 0.015970 seconds
Runtime for dbscan_clustering: 0.031759 seconds
Runtime for dbscan_clustering: 0.016156 seconds
Runtime for dbscan_clustering: 0.013320 seconds
Runtime for dbscan_clustering: 0.020790 seconds
Runtime for dbscan_clustering: 0.031715 seconds
Best DBSCAN parameters based on ANOVA F-value:
Year Range                2017-2020
eps                            0.02
min_samples                      30
Runtime (s)                   0.025
Clusters                          1
Points per Cluster       {-1: 3291}
Noise Points                   3291
Davies-Bouldin                 None

  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2017-2020,0.02,10,0.016,9,"{0: 21, 1: 18, 2: 28, 3: 17, 4: 21, 5: 11, 6: ...",3154,,,
1,2017-2020,0.02,15,0.016,3,"{0: 25, 1: 15, -1: 3251}",3251,,,
2,2017-2020,0.02,25,0.016,1,{-1: 3291},3291,,,
3,2017-2020,0.02,30,0.025,1,{-1: 3291},3291,,,0.0
4,2017-2020,0.01,10,0.02,1,{-1: 3291},3291,,,
5,2017-2020,0.01,15,0.016,1,{-1: 3291},3291,,,
6,2017-2020,0.01,25,0.016,1,{-1: 3291},3291,,,
7,2017-2020,0.01,30,0.016,1,{-1: 3291},3291,,,0.0
8,2017-2020,0.015,10,0.032,3,"{0: 11, 1: 11, -1: 3269}",3269,,,
9,2017-2020,0.015,15,0.016,1,{-1: 3291},3291,,,



Running DBSCAN for Year Range: 2018-2021
Runtime for dbscan_clustering: 0.033234 seconds
Runtime for dbscan_clustering: 0.016626 seconds
Runtime for dbscan_clustering: 0.033165 seconds
Runtime for dbscan_clustering: 0.033325 seconds
Runtime for dbscan_clustering: 0.019801 seconds
Runtime for dbscan_clustering: 0.018016 seconds
Runtime for dbscan_clustering: 0.012439 seconds
Runtime for dbscan_clustering: 0.015090 seconds
Runtime for dbscan_clustering: 0.021566 seconds
Runtime for dbscan_clustering: 0.031590 seconds
Runtime for dbscan_clustering: 0.016456 seconds
Runtime for dbscan_clustering: 0.019071 seconds
Runtime for dbscan_clustering: 0.017007 seconds


  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                2018-2021
eps                            0.02
min_samples                      30
Runtime (s)                   0.033
Clusters                          1
Points per Cluster       {-1: 3442}
Noise Points                   3442
Davies-Bouldin                 None
Calinski-Harabasz              None
Weighted Mean ANOVA F           0.0
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2018-2021,0.02,10,0.033,12,"{0: 15, 1: 10, 2: 37, 3: 31, 4: 24, 5: 18, 6: ...",3222,,,
1,2018-2021,0.02,15,0.017,4,"{0: 23, 1: 17, 2: 15, -1: 3387}",3387,,,
2,2018-2021,0.02,25,0.033,1,{-1: 3442},3442,,,
3,2018-2021,0.02,30,0.033,1,{-1: 3442},3442,,,0.0
4,2018-2021,0.01,10,0.02,1,{-1: 3442},3442,,,
5,2018-2021,0.01,15,0.018,1,{-1: 3442},3442,,,
6,2018-2021,0.01,25,0.012,1,{-1: 3442},3442,,,
7,2018-2021,0.01,30,0.015,1,{-1: 3442},3442,,,0.0
8,2018-2021,0.015,10,0.022,2,"{0: 18, -1: 3424}",3424,,,
9,2018-2021,0.015,15,0.032,1,{-1: 3442},3442,,,



Running DBSCAN for Year Range: 2019-2022
Runtime for dbscan_clustering: 0.033425 seconds
Runtime for dbscan_clustering: 0.023154 seconds
Runtime for dbscan_clustering: 0.036754 seconds
Runtime for dbscan_clustering: 0.031381 seconds
Runtime for dbscan_clustering: 0.014580 seconds
Runtime for dbscan_clustering: 0.020205 seconds
Runtime for dbscan_clustering: 0.018555 seconds
Runtime for dbscan_clustering: 0.019334 seconds
Runtime for dbscan_clustering: 0.031962 seconds
Runtime for dbscan_clustering: 0.020857 seconds
Runtime for dbscan_clustering: 0.031702 seconds
Runtime for dbscan_clustering: 0.018498 seconds
Runtime for dbscan_clustering: 0.021240 seconds


  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                2019-2022
eps                            0.02
min_samples                      30
Runtime (s)                   0.031
Clusters                          1
Points per Cluster       {-1: 3609}
Noise Points                   3609
Davies-Bouldin                 None
Calinski-Harabasz              None
Weighted Mean ANOVA F           0.0
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2019-2022,0.02,10,0.033,20,"{0: 71, 1: 15, 2: 17, 3: 10, 4: 36, 5: 15, 6: ...",3262,,,
1,2019-2022,0.02,15,0.023,3,"{0: 49, 1: 32, -1: 3528}",3528,,,
2,2019-2022,0.02,25,0.037,1,{-1: 3609},3609,,,
3,2019-2022,0.02,30,0.031,1,{-1: 3609},3609,,,0.0
4,2019-2022,0.01,10,0.015,1,{-1: 3609},3609,,,
5,2019-2022,0.01,15,0.02,1,{-1: 3609},3609,,,
6,2019-2022,0.01,25,0.019,1,{-1: 3609},3609,,,
7,2019-2022,0.01,30,0.019,1,{-1: 3609},3609,,,0.0
8,2019-2022,0.015,10,0.032,6,"{0: 20, 1: 21, 2: 10, 3: 11, 4: 10, -1: 3537}",3537,,,
9,2019-2022,0.015,15,0.021,1,{-1: 3609},3609,,,



Running DBSCAN for Year Range: 2020-2023
Runtime for dbscan_clustering: 0.094976 seconds
Runtime for dbscan_clustering: 0.080393 seconds
Runtime for dbscan_clustering: 0.073501 seconds
Runtime for dbscan_clustering: 0.080064 seconds
Runtime for dbscan_clustering: 0.063390 seconds
Runtime for dbscan_clustering: 0.063852 seconds
Runtime for dbscan_clustering: 0.060745 seconds
Runtime for dbscan_clustering: 0.055810 seconds
Runtime for dbscan_clustering: 0.047822 seconds
Runtime for dbscan_clustering: 0.047640 seconds
Runtime for dbscan_clustering: 0.062133 seconds
Runtime for dbscan_clustering: 0.082419 seconds
Runtime for dbscan_clustering: 0.072542 seconds


  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                2020-2023
eps                            0.02
min_samples                      30
Runtime (s)                   0.080
Clusters                          1
Points per Cluster       {-1: 4372}
Noise Points                   4372
Davies-Bouldin                 None
Calinski-Harabasz              None
Weighted Mean ANOVA F           0.0
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2020-2023,0.02,10,0.095,26,"{0: 10, 1: 23, 2: 18, 3: 60, 4: 17, 5: 28, 6: ...",3884,,,
1,2020-2023,0.02,15,0.08,5,"{0: 15, 1: 53, 2: 15, 3: 33, -1: 4256}",4256,,,
2,2020-2023,0.02,25,0.074,1,{-1: 4372},4372,,,
3,2020-2023,0.02,30,0.08,1,{-1: 4372},4372,,,0.0
4,2020-2023,0.01,10,0.063,1,{-1: 4372},4372,,,
5,2020-2023,0.01,15,0.064,1,{-1: 4372},4372,,,
6,2020-2023,0.01,25,0.061,1,{-1: 4372},4372,,,
7,2020-2023,0.01,30,0.056,1,{-1: 4372},4372,,,0.0
8,2020-2023,0.015,10,0.048,8,"{0: 11, 1: 24, 2: 11, 3: 10, 4: 17, 5: 11, 6: ...",4274,,,
9,2020-2023,0.015,15,0.048,1,{-1: 4372},4372,,,


# DBSCAN
The features used by the clustering are determined by applying pca on the raw dataset.   
After the clustering, we reapply the PCA and plot the clusters in the PC1 / PC2 plane and make the pairplot with the clustered data. 

In [5]:
import cluster_data_pca
from cluster_data_pca import run_clustering, normalize_data, unnormalize
import numpy as np
import pandas as pd
import os
from kmeans import k_means
from cluster_plotter import ClusterPlotter
from clustering_utils_pca import ClusterData
import cluster_plotter
from DBSCAN import dbscan_clustering
import high_dim_analysis
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

standard_year_ranges = {
    f"{start}-{start + 3}": np.arange(start, start + 4)
    for start in [2002, 2006, 2010, 2014, 2019]
}
standard_year_ranges["2019-2023"] = np.arange(2019, 2024)
running_ranges = cluster_data_pca.generate_running_year_ranges(2002, 2023, 4)

binned_data = cluster_data_pca.bin_data_for_clustering(running_ranges, print_res=False)

results_per_year_range = {}

plot_dir = r"Images\\dbscan_tests_running_bins_pcabased_pcaevaluated"
os.makedirs(plot_dir, exist_ok=True)
cluster_plotter.clear_directory(plot_dir)

for cluster_data, year_range in binned_data:
    print(f"\nRunning DBSCAN for Year Range: {year_range}")
    data_array = np.array([
        cluster_data.ecc, cluster_data.mag_obj, cluster_data.sem_maj,
        cluster_data.diameter, cluster_data.inc, cluster_data.raan
    ]).T
    feature_names = ["Eccentricity e", "Object magnitude [mag]", "Semi major axis [km]", "Diameter [m]", "Inclination [°]", "RAAN [°]"]

    normalized_data, data_min, data_max = normalize_data(data_array)

    eps_values = [0.02, 0.01, 0.015]
    min_samples_values = [10, 15, 25, 30]

    results = []

    for eps in eps_values:
        for min_samples in min_samples_values:
            result_dbscan, time_dbscan, n_clusters_dbscan, points_per_cluster_dbscan, metrics_dbscan = run_clustering(
                dbscan_clustering, "DBSCAN", normalized_data, data_min, data_max, eps=eps, min_samples=min_samples
            )

            unnormalized_data, _ = unnormalize(result_dbscan.data, None, data_min, data_max)
            noise_points = np.sum(result_dbscan.labels == -1)

            results.append({
                "Year Range": year_range,
                "eps": eps,
                "min_samples": min_samples,
                "Runtime (s)": f"{time_dbscan:.3f}",
                "Clusters": n_clusters_dbscan,
                "Points per Cluster": points_per_cluster_dbscan,
                "Noise Points": noise_points,
                "Davies-Bouldin": f"{metrics_dbscan[0]:.3f}" if metrics_dbscan and isinstance(metrics_dbscan[0], (int, float)) else None,
                "Calinski-Harabasz": f"{metrics_dbscan[1]:.3f}" if metrics_dbscan and isinstance(metrics_dbscan[1], (int, float)) else None
            })

            labels = result_dbscan.labels

            high_dim_analysis.plot_correlation_heatmap(
                cluster_data,
                output_folder=plot_dir,
                filename=f"corr_{year_range}_eps{eps}_minsamples_{min_samples}.png"
            )
            high_dim_analysis.plot_mutual_information(
                cluster_data,
                labels,
                output_folder=plot_dir,
                filename=f"mi_{year_range}_eps{eps}_minsamples_{min_samples}.png"
            )

            from sklearn.feature_selection import f_classif
            X = data_array
            valid = labels != -1
            if np.any(valid) and len(np.unique(labels[valid])) > 1:
                f_vals, _ = f_classif(X[valid], labels[valid])
                weights = np.var(X[valid], axis=0)
                weighted_mean_f = np.average(f_vals, weights=weights)
            else:
                weighted_mean_f = 0.0

            results[-1]["Weighted Mean ANOVA F"] = f"{weighted_mean_f:.3f}"

    df = pd.DataFrame(results)
    df["Weighted Mean ANOVA F"] = pd.to_numeric(df["Weighted Mean ANOVA F"], errors="coerce")
    df_sorted = df.sort_values(by="Weighted Mean ANOVA F", ascending=False)
    best_params = df_sorted.iloc[0]
    best_eps = best_params["eps"]
    best_min_samples = best_params["min_samples"]

    best_result, _, _, _, _ = run_clustering(
        dbscan_clustering, "DBSCAN", normalized_data, data_min, data_max,
        eps=best_eps, min_samples=best_min_samples
    )
    best_labels = best_result.labels

    df_plot = pd.DataFrame(data_array, columns=feature_names)
    df_plot['cluster'] = best_labels.astype(str)

    pairplot = sns.pairplot(df_plot, hue='cluster', diag_kind='kde', plot_kws={'alpha':0.6, 's':40})
    pairplot.fig.suptitle(f"DBSCAN Clusters for Year Range {year_range}", y=1.02)
    pairplot.savefig(os.path.join(plot_dir, f"pairplot_{year_range}_eps{best_eps}_minsamples_{best_min_samples}.png"))
    plt.close(pairplot.fig)

    high_dim_analysis.plot_anova_f_values(
        cluster_data,
        best_labels,
        output_folder=plot_dir,
        filename=f"anova_best_{year_range}_eps{best_eps}_minsamples_{best_min_samples}.png",
        title=f"ANOVA F-values (Best DBSCAN Parameters), eps = {float(best_eps)}, minsamples = {float(best_min_samples)}"
    )

    # PCA colored by clusters
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data_array)
    pca = PCA(n_components=2)
    components = pca.fit_transform(scaled_data)

    plt.figure(figsize=(8, 6))
    scatter = plt.scatter(components[:, 0], components[:, 1], c=best_labels, cmap='tab10', s=10, alpha=0.7)
    plt.xlabel('PC1')
    plt.ylabel('PC2')
    plt.title(f'PCA Scatter Plot (DBSCAN Clusters) {year_range}')
    plt.grid(True, linestyle='--', linewidth=0.5)
    plt.tight_layout()
    plt.savefig(os.path.join(plot_dir, f"pca_clusters_{year_range}_eps{best_eps}_minsamples_{best_min_samples}.png"))
    plt.close()

    print("Best DBSCAN parameters based on ANOVA F-value:")
    print(best_params)
    display(df)


Running DBSCAN for Year Range: 2002-2005
Runtime for dbscan_clustering: 0.083050 seconds
Runtime for dbscan_clustering: 0.068838 seconds
Runtime for dbscan_clustering: 0.079146 seconds
Runtime for dbscan_clustering: 0.086649 seconds
Runtime for dbscan_clustering: 0.047804 seconds
Runtime for dbscan_clustering: 0.050260 seconds
Runtime for dbscan_clustering: 0.050540 seconds
Runtime for dbscan_clustering: 0.066400 seconds
Runtime for dbscan_clustering: 0.059345 seconds
Runtime for dbscan_clustering: 0.063897 seconds
Runtime for dbscan_clustering: 0.051811 seconds
Runtime for dbscan_clustering: 0.071604 seconds
Runtime for dbscan_clustering: 0.066436 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2002-2005
eps                                                                   0.02
min_samples                                                             15
Runtime (s)                                                          0.069
Clusters                                                                15
Points per Cluster       {0: 15, 1: 21, 2: 210, 3: 717, 4: 13, 5: 25, 6...
Noise Points                                                          6625
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                            11163.204
Name: 1, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2002-2005,0.02,10,0.083,28,"{0: 21, 1: 28, 2: 37, 3: 1009, 4: 17, 5: 141, ...",6159,,,7029.527
1,2002-2005,0.02,15,0.069,15,"{0: 15, 1: 21, 2: 210, 3: 717, 4: 13, 5: 25, 6...",6625,,,11163.204
2,2002-2005,0.02,25,0.079,6,"{0: 557, 1: 101, 2: 66, 3: 64, 4: 40, -1: 7209}",7209,,,102.785
3,2002-2005,0.02,30,0.087,6,"{0: 122, 1: 314, 2: 96, 3: 75, 4: 46, -1: 7384}",7384,,,112.751
4,2002-2005,0.01,10,0.048,3,"{0: 33, 1: 22, -1: 7982}",7982,,,4.495
5,2002-2005,0.01,15,0.05,2,"{0: 31, -1: 8006}",8006,,,0.0
6,2002-2005,0.01,25,0.051,1,{-1: 8037},8037,,,0.0
7,2002-2005,0.01,30,0.066,1,{-1: 8037},8037,,,0.0
8,2002-2005,0.015,10,0.059,25,"{0: 11, 1: 16, 2: 292, 3: 94, 4: 44, 5: 80, 6:...",7115,,,9797.821
9,2002-2005,0.015,15,0.064,8,"{0: 75, 1: 232, 2: 71, 3: 15, 4: 17, 5: 16, 6:...",7587,,,5.811



Running DBSCAN for Year Range: 2003-2006
Runtime for dbscan_clustering: 0.117733 seconds
Runtime for dbscan_clustering: 0.080259 seconds
Runtime for dbscan_clustering: 0.100119 seconds
Runtime for dbscan_clustering: 0.100202 seconds
Runtime for dbscan_clustering: 0.066368 seconds
Runtime for dbscan_clustering: 0.071976 seconds
Runtime for dbscan_clustering: 0.066837 seconds
Runtime for dbscan_clustering: 0.049731 seconds
Runtime for dbscan_clustering: 0.083028 seconds
Runtime for dbscan_clustering: 0.061517 seconds
Runtime for dbscan_clustering: 0.076121 seconds
Runtime for dbscan_clustering: 0.066893 seconds
Runtime for dbscan_clustering: 0.099800 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2003-2006
eps                                                                   0.02
min_samples                                                             10
Runtime (s)                                                          0.118
Clusters                                                                22
Points per Cluster       {0: 123, 1: 534, 2: 870, 3: 413, 4: 23, 5: 10,...
Noise Points                                                          6606
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                              327.021
Name: 0, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2003-2006,0.02,10,0.118,22,"{0: 123, 1: 534, 2: 870, 3: 413, 4: 23, 5: 10,...",6606,,,327.021
1,2003-2006,0.02,15,0.08,16,"{0: 316, 1: 217, 2: 121, 3: 710, 4: 89, 5: 30,...",7212,,,177.763
2,2003-2006,0.02,25,0.1,9,"{0: 90, 1: 89, 2: 275, 3: 347, 4: 76, 5: 91, 6...",7804,,,175.278
3,2003-2006,0.02,30,0.1,8,"{0: 218, 1: 335, 2: 86, 3: 45, 4: 63, 5: 69, 6...",8001,,,160.712
4,2003-2006,0.01,10,0.066,7,"{0: 25, 1: 14, 2: 28, 3: 19, 4: 22, 5: 10, -1:...",8761,,,133.047
5,2003-2006,0.01,15,0.072,4,"{0: 20, 1: 15, 2: 22, -1: 8822}",8822,,,4.996
6,2003-2006,0.01,25,0.067,1,{-1: 8879},8879,,,0.0
7,2003-2006,0.01,30,0.05,1,{-1: 8879},8879,,,0.0
8,2003-2006,0.015,10,0.083,28,"{0: 59, 1: 69, 2: 29, 3: 292, 4: 202, 5: 15, 6...",7777,,,112.281
9,2003-2006,0.015,15,0.062,10,"{0: 64, 1: 88, 2: 236, 3: 15, 4: 15, 5: 43, 6:...",8316,,,89.9



Running DBSCAN for Year Range: 2004-2007
Runtime for dbscan_clustering: 0.120421 seconds
Runtime for dbscan_clustering: 0.100128 seconds
Runtime for dbscan_clustering: 0.093167 seconds
Runtime for dbscan_clustering: 0.083368 seconds
Runtime for dbscan_clustering: 0.052543 seconds
Runtime for dbscan_clustering: 0.071020 seconds
Runtime for dbscan_clustering: 0.074066 seconds
Runtime for dbscan_clustering: 0.070060 seconds
Runtime for dbscan_clustering: 0.078647 seconds
Runtime for dbscan_clustering: 0.091578 seconds
Runtime for dbscan_clustering: 0.082877 seconds
Runtime for dbscan_clustering: 0.070341 seconds
Runtime for dbscan_clustering: 0.084306 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2004-2007
eps                                                                   0.02
min_samples                                                             15
Runtime (s)                                                          0.100
Clusters                                                                14
Points per Cluster       {0: 589, 1: 17, 2: 976, 3: 16, 4: 122, 5: 44, ...
Noise Points                                                          6567
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                              431.287
Name: 1, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2004-2007,0.02,10,0.12,20,"{0: 37, 1: 962, 2: 30, 3: 1341, 4: 18, 5: 9, 6...",5934,,,92.592
1,2004-2007,0.02,15,0.1,14,"{0: 589, 1: 17, 2: 976, 3: 16, 4: 122, 5: 44, ...",6567,,,431.287
2,2004-2007,0.02,25,0.093,7,"{0: 135, 1: 117, 2: 696, 3: 84, 4: 134, 5: 68,...",7315,,,233.276
3,2004-2007,0.02,30,0.083,8,"{0: 107, 1: 382, 2: 274, 3: 107, 4: 63, 5: 88,...",7481,,,199.833
4,2004-2007,0.01,10,0.053,8,"{0: 27, 1: 10, 2: 29, 3: 21, 4: 15, 5: 14, 6: ...",8426,,,189.621
5,2004-2007,0.01,15,0.071,4,"{0: 19, 1: 15, 2: 22, -1: 8493}",8493,,,61.336
6,2004-2007,0.01,25,0.074,1,{-1: 8549},8549,,,0.0
7,2004-2007,0.01,30,0.07,1,{-1: 8549},8549,,,0.0
8,2004-2007,0.015,10,0.079,26,"{0: 97, 1: 10, 2: 80, 3: 563, 4: 10, 5: 18, 6:...",7303,,,150.202
9,2004-2007,0.015,15,0.092,12,"{0: 248, 1: 68, 2: 55, 3: 40, 4: 148, 5: 25, 6...",7862,,,116.512



Running DBSCAN for Year Range: 2005-2008
Runtime for dbscan_clustering: 0.093813 seconds
Runtime for dbscan_clustering: 0.066883 seconds
Runtime for dbscan_clustering: 0.083013 seconds
Runtime for dbscan_clustering: 0.070524 seconds
Runtime for dbscan_clustering: 0.062009 seconds
Runtime for dbscan_clustering: 0.049848 seconds
Runtime for dbscan_clustering: 0.045496 seconds
Runtime for dbscan_clustering: 0.049580 seconds
Runtime for dbscan_clustering: 0.066331 seconds
Runtime for dbscan_clustering: 0.046540 seconds
Runtime for dbscan_clustering: 0.068544 seconds
Runtime for dbscan_clustering: 0.053136 seconds
Runtime for dbscan_clustering: 0.082869 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2005-2008
eps                                                                   0.02
min_samples                                                             15
Runtime (s)                                                          0.067
Clusters                                                                16
Points per Cluster       {0: 49, 1: 56, 2: 619, 3: 15, 4: 781, 5: 127, ...
Noise Points                                                          5617
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                              401.398
Name: 1, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2005-2008,0.02,10,0.094,22,"{0: 1002, 1: 19, 2: 1232, 3: 29, 4: 16, 5: 18,...",5097,,,82.034
1,2005-2008,0.02,15,0.067,16,"{0: 49, 1: 56, 2: 619, 3: 15, 4: 781, 5: 127, ...",5617,,,401.398
2,2005-2008,0.02,25,0.083,8,"{0: 295, 1: 114, 2: 711, 3: 77, 4: 72, 5: 45, ...",6280,,,204.781
3,2005-2008,0.02,30,0.071,8,"{0: 127, 1: 672, 2: 105, 3: 47, 4: 118, 5: 59,...",6463,,,176.523
4,2005-2008,0.01,10,0.062,9,"{0: 26, 1: 29, 2: 22, 3: 16, 4: 14, 5: 7, 6: 1...",7487,,,108.148
5,2005-2008,0.01,15,0.05,4,"{0: 19, 1: 15, 2: 20, -1: 7567}",7567,,,58.762
6,2005-2008,0.01,25,0.045,1,{-1: 7621},7621,,,0.0
7,2005-2008,0.01,30,0.05,1,{-1: 7621},7621,,,0.0
8,2005-2008,0.015,10,0.066,28,"{0: 31, 1: 111, 2: 144, 3: 13, 4: 9, 5: 593, 6...",6281,,,163.395
9,2005-2008,0.015,15,0.047,13,"{0: 60, 1: 465, 2: 49, 3: 39, 4: 43, 5: 11, 6:...",6792,,,116.754



Running DBSCAN for Year Range: 2006-2009
Runtime for dbscan_clustering: 0.080332 seconds
Runtime for dbscan_clustering: 0.063679 seconds
Runtime for dbscan_clustering: 0.082901 seconds
Runtime for dbscan_clustering: 0.075786 seconds
Runtime for dbscan_clustering: 0.066597 seconds
Runtime for dbscan_clustering: 0.059046 seconds
Runtime for dbscan_clustering: 0.053685 seconds
Runtime for dbscan_clustering: 0.047302 seconds
Runtime for dbscan_clustering: 0.075476 seconds
Runtime for dbscan_clustering: 0.056956 seconds
Runtime for dbscan_clustering: 0.065085 seconds
Runtime for dbscan_clustering: 0.065844 seconds
Runtime for dbscan_clustering: 0.081162 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2006-2009
eps                                                                   0.02
min_samples                                                             15
Runtime (s)                                                          0.064
Clusters                                                                 9
Points per Cluster       {0: 22, 1: 48, 2: 697, 3: 843, 4: 100, 5: 33, ...
Noise Points                                                          5692
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                              339.171
Name: 1, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2006-2009,0.02,10,0.08,19,"{0: 946, 1: 1136, 2: 12, 3: 7, 4: 36, 5: 11, 6...",5179,,,100.084
1,2006-2009,0.02,15,0.064,9,"{0: 22, 1: 48, 2: 697, 3: 843, 4: 100, 5: 33, ...",5692,,,339.171
2,2006-2009,0.02,25,0.083,7,"{0: 322, 1: 92, 2: 639, 3: 54, 4: 27, 5: 62, -...",6283,,,278.57
3,2006-2009,0.02,30,0.076,8,"{0: 97, 1: 112, 2: 65, 3: 583, 4: 45, 5: 45, 6...",6495,,,199.532
4,2006-2009,0.01,10,0.067,10,"{0: 16, 1: 13, 2: 14, 3: 17, 4: 10, 5: 12, 6: ...",7357,,,150.337
5,2006-2009,0.01,15,0.059,1,{-1: 7479},7479,,,0.0
6,2006-2009,0.01,25,0.054,1,{-1: 7479},7479,,,0.0
7,2006-2009,0.01,30,0.047,1,{-1: 7479},7479,,,0.0
8,2006-2009,0.015,10,0.075,24,"{0: 16, 1: 183, 2: 108, 3: 59, 4: 17, 5: 517, ...",6310,,,150.928
9,2006-2009,0.015,15,0.057,13,"{0: 43, 1: 49, 2: 213, 3: 154, 4: 21, 5: 36, 6...",6790,,,111.956



Running DBSCAN for Year Range: 2007-2010
Runtime for dbscan_clustering: 0.238534 seconds
Runtime for dbscan_clustering: 0.133255 seconds
Runtime for dbscan_clustering: 0.127038 seconds
Runtime for dbscan_clustering: 0.150298 seconds
Runtime for dbscan_clustering: 0.111454 seconds
Runtime for dbscan_clustering: 0.093675 seconds
Runtime for dbscan_clustering: 0.091424 seconds
Runtime for dbscan_clustering: 0.097334 seconds
Runtime for dbscan_clustering: 0.109734 seconds
Runtime for dbscan_clustering: 0.180230 seconds
Runtime for dbscan_clustering: 0.129966 seconds
Runtime for dbscan_clustering: 0.109982 seconds
Runtime for dbscan_clustering: 0.134267 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                     2007-2010
eps                                                                 0.02
min_samples                                                           25
Runtime (s)                                                        0.127
Clusters                                                               6
Points per Cluster       {0: 66, 1: 105, 2: 45, 3: 473, 4: 42, -1: 4955}
Noise Points                                                        4955
Davies-Bouldin                                                      None
Calinski-Harabasz                                                   None
Weighted Mean ANOVA F                                            263.102
Name: 2, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2007-2010,0.02,10,0.239,14,"{0: 601, 1: 54, 2: 769, 3: 15, 4: 32, 5: 18, 6...",4070,,,203.226
1,2007-2010,0.02,15,0.133,6,"{0: 469, 1: 639, 2: 63, 3: 22, 4: 16, -1: 4477}",4477,,,261.205
2,2007-2010,0.02,25,0.127,6,"{0: 66, 1: 105, 2: 45, 3: 473, 4: 42, -1: 4955}",4955,,,263.102
3,2007-2010,0.02,30,0.15,5,"{0: 58, 1: 445, 2: 30, 3: 35, -1: 5118}",5118,,,207.73
4,2007-2010,0.01,10,0.111,5,"{0: 10, 1: 13, 2: 18, 3: 10, -1: 5635}",5635,,,45.098
5,2007-2010,0.01,15,0.094,2,"{0: 15, -1: 5671}",5671,,,0.0
6,2007-2010,0.01,25,0.091,1,{-1: 5686},5686,,,0.0
7,2007-2010,0.01,30,0.097,1,{-1: 5686},5686,,,0.0
8,2007-2010,0.015,10,0.11,18,"{0: 68, 1: 63, 2: 52, 3: 277, 4: 99, 5: 29, 6:...",4950,,,139.275
9,2007-2010,0.015,15,0.18,8,"{0: 36, 1: 29, 2: 77, 3: 156, 4: 41, 5: 22, 6:...",5310,,,129.344



Running DBSCAN for Year Range: 2008-2011
Runtime for dbscan_clustering: 0.157508 seconds
Runtime for dbscan_clustering: 0.051281 seconds
Runtime for dbscan_clustering: 0.117894 seconds
Runtime for dbscan_clustering: 0.051083 seconds
Runtime for dbscan_clustering: 0.033094 seconds
Runtime for dbscan_clustering: 0.033309 seconds
Runtime for dbscan_clustering: 0.034189 seconds
Runtime for dbscan_clustering: 0.033183 seconds
Runtime for dbscan_clustering: 0.022705 seconds
Runtime for dbscan_clustering: 0.033524 seconds
Runtime for dbscan_clustering: 0.041087 seconds
Runtime for dbscan_clustering: 0.039691 seconds
Runtime for dbscan_clustering: 0.049423 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2008-2011
eps                                                                   0.02
min_samples                                                             15
Runtime (s)                                                          0.051
Clusters                                                                 7
Points per Cluster       {0: 189, 1: 428, 2: 19, 3: 47, 4: 16, 5: 16, -...
Noise Points                                                          3349
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                               341.62
Name: 1, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2008-2011,0.02,10,0.158,16,"{0: 307, 1: 482, 2: 28, 3: 53, 4: 13, 5: 22, 6...",3030,,,199.647
1,2008-2011,0.02,15,0.051,7,"{0: 189, 1: 428, 2: 19, 3: 47, 4: 16, 5: 16, -...",3349,,,341.62
2,2008-2011,0.02,25,0.118,5,"{0: 180, 1: 39, 2: 25, 3: 27, -1: 3793}",3793,,,118.67
3,2008-2011,0.02,30,0.051,2,"{0: 130, -1: 3934}",3934,,,0.0
4,2008-2011,0.01,10,0.033,2,"{0: 16, -1: 4048}",4048,,,0.0
5,2008-2011,0.01,15,0.033,1,{-1: 4064},4064,,,0.0
6,2008-2011,0.01,25,0.034,1,{-1: 4064},4064,,,0.0
7,2008-2011,0.01,30,0.033,1,{-1: 4064},4064,,,0.0
8,2008-2011,0.015,10,0.023,15,"{0: 18, 1: 87, 2: 36, 3: 42, 4: 32, 5: 16, 6: ...",3691,,,71.165
9,2008-2011,0.015,15,0.034,5,"{0: 68, 1: 19, 2: 18, 3: 17, -1: 3942}",3942,,,75.846



Running DBSCAN for Year Range: 2009-2012
Runtime for dbscan_clustering: 0.055776 seconds
Runtime for dbscan_clustering: 0.035162 seconds
Runtime for dbscan_clustering: 0.030930 seconds
Runtime for dbscan_clustering: 0.057093 seconds
Runtime for dbscan_clustering: 0.057901 seconds
Runtime for dbscan_clustering: 0.047345 seconds
Runtime for dbscan_clustering: 0.045830 seconds
Runtime for dbscan_clustering: 0.061591 seconds
Runtime for dbscan_clustering: 0.054693 seconds
Runtime for dbscan_clustering: 0.067618 seconds
Runtime for dbscan_clustering: 0.053301 seconds
Runtime for dbscan_clustering: 0.050301 seconds
Runtime for dbscan_clustering: 0.060622 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2009-2012
eps                                                                   0.02
min_samples                                                             10
Runtime (s)                                                          0.056
Clusters                                                                15
Points per Cluster       {0: 164, 1: 402, 2: 13, 3: 33, 4: 19, 5: 10, 6...
Noise Points                                                          2798
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                               279.19
Name: 0, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2009-2012,0.02,10,0.056,15,"{0: 164, 1: 402, 2: 13, 3: 33, 4: 19, 5: 10, 6...",2798,,,279.19
1,2009-2012,0.02,15,0.035,8,"{0: 111, 1: 51, 2: 198, 3: 74, 4: 25, 5: 10, 6...",3066,,,112.641
2,2009-2012,0.02,25,0.031,2,"{0: 111, -1: 3449}",3449,,,0.0
3,2009-2012,0.02,30,0.057,2,"{0: 58, -1: 3502}",3502,,,0.0
4,2009-2012,0.01,10,0.058,1,{-1: 3560},3560,,,0.0
5,2009-2012,0.01,15,0.047,1,{-1: 3560},3560,,,0.0
6,2009-2012,0.01,25,0.046,1,{-1: 3560},3560,,,0.0
7,2009-2012,0.01,30,0.062,1,{-1: 3560},3560,,,0.0
8,2009-2012,0.015,10,0.055,14,"{0: 19, 1: 73, 2: 11, 3: 17, 4: 15, 5: 25, 6: ...",3297,,,47.976
9,2009-2012,0.015,15,0.068,2,"{0: 43, -1: 3517}",3517,,,0.0



Running DBSCAN for Year Range: 2010-2013
Runtime for dbscan_clustering: 0.013153 seconds
Runtime for dbscan_clustering: 0.008887 seconds
Runtime for dbscan_clustering: 0.017800 seconds
Runtime for dbscan_clustering: 0.010200 seconds
Runtime for dbscan_clustering: 0.008379 seconds
Runtime for dbscan_clustering: 0.004152 seconds
Runtime for dbscan_clustering: 0.008791 seconds
Runtime for dbscan_clustering: 0.008478 seconds
Runtime for dbscan_clustering: 0.008154 seconds
Runtime for dbscan_clustering: 0.012176 seconds
Runtime for dbscan_clustering: 0.008599 seconds
Runtime for dbscan_clustering: 0.007047 seconds
Runtime for dbscan_clustering: 0.008988 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                     2010-2013
eps                                                0.015
min_samples                                           10
Runtime (s)                                        0.008
Clusters                                               4
Points per Cluster       {0: 15, 1: 11, 2: 10, -1: 1543}
Noise Points                                        1543
Davies-Bouldin                                      None
Calinski-Harabasz                                   None
Weighted Mean ANOVA F                             36.175
Name: 8, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2010-2013,0.02,10,0.013,7,"{0: 10, 1: 17, 2: 35, 3: 19, 4: 24, 5: 14, -1:...",1460,,,29.044
1,2010-2013,0.02,15,0.009,2,"{0: 26, -1: 1553}",1553,,,0.0
2,2010-2013,0.02,25,0.018,1,{-1: 1579},1579,,,0.0
3,2010-2013,0.02,30,0.01,1,{-1: 1579},1579,,,0.0
4,2010-2013,0.01,10,0.008,1,{-1: 1579},1579,,,0.0
5,2010-2013,0.01,15,0.004,1,{-1: 1579},1579,,,0.0
6,2010-2013,0.01,25,0.009,1,{-1: 1579},1579,,,0.0
7,2010-2013,0.01,30,0.008,1,{-1: 1579},1579,,,0.0
8,2010-2013,0.015,10,0.008,4,"{0: 15, 1: 11, 2: 10, -1: 1543}",1543,,,36.175
9,2010-2013,0.015,15,0.012,1,{-1: 1579},1579,,,0.0



Running DBSCAN for Year Range: 2011-2014
Runtime for dbscan_clustering: 0.004997 seconds
Runtime for dbscan_clustering: 0.011656 seconds
Runtime for dbscan_clustering: 0.008193 seconds
Runtime for dbscan_clustering: 0.004372 seconds
Runtime for dbscan_clustering: 0.004196 seconds
Runtime for dbscan_clustering: 0.003657 seconds
Runtime for dbscan_clustering: 0.003833 seconds
Runtime for dbscan_clustering: 0.004399 seconds
Runtime for dbscan_clustering: 0.004648 seconds
Runtime for dbscan_clustering: 0.004192 seconds
Runtime for dbscan_clustering: 0.006020 seconds
Runtime for dbscan_clustering: 0.004306 seconds
Runtime for dbscan_clustering: 0.004133 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                             2011-2014
eps                                         0.02
min_samples                                   10
Runtime (s)                                0.005
Clusters                                       3
Points per Cluster       {0: 13, 1: 16, -1: 837}
Noise Points                                 837
Davies-Bouldin                              None
Calinski-Harabasz                           None
Weighted Mean ANOVA F                      2.197
Name: 0, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2011-2014,0.02,10,0.005,3,"{0: 13, 1: 16, -1: 837}",837,,,2.197
1,2011-2014,0.02,15,0.012,1,{-1: 866},866,,,0.0
2,2011-2014,0.02,25,0.008,1,{-1: 866},866,,,0.0
3,2011-2014,0.02,30,0.004,1,{-1: 866},866,,,0.0
4,2011-2014,0.01,10,0.004,1,{-1: 866},866,,,0.0
5,2011-2014,0.01,15,0.004,1,{-1: 866},866,,,0.0
6,2011-2014,0.01,25,0.004,1,{-1: 866},866,,,0.0
7,2011-2014,0.01,30,0.004,1,{-1: 866},866,,,0.0
8,2011-2014,0.015,10,0.005,2,"{0: 10, -1: 856}",856,,,0.0
9,2011-2014,0.015,15,0.004,1,{-1: 866},866,,,0.0



Running DBSCAN for Year Range: 2012-2015
Runtime for dbscan_clustering: 0.008002 seconds
Runtime for dbscan_clustering: 0.008421 seconds
Runtime for dbscan_clustering: 0.008307 seconds
Runtime for dbscan_clustering: 0.008065 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.002014 seconds
Runtime for dbscan_clustering: 0.004012 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.008214 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.010091 seconds
Runtime for dbscan_clustering: 0.004272 seconds


  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range               2012-2015
eps                           0.02
min_samples                     10
Runtime (s)                  0.008
Clusters                         1
Points per Cluster       {-1: 870}
Noise Points                   870
Davies-Bouldin                None
Calinski-Harabasz             None
Weighted Mean ANOVA F          0.0
Name: 0, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2012-2015,0.02,10,0.008,1,{-1: 870},870,,,0.0
1,2012-2015,0.02,15,0.008,1,{-1: 870},870,,,0.0
2,2012-2015,0.02,25,0.008,1,{-1: 870},870,,,0.0
3,2012-2015,0.02,30,0.008,1,{-1: 870},870,,,0.0
4,2012-2015,0.01,10,0.0,1,{-1: 870},870,,,0.0
5,2012-2015,0.01,15,0.002,1,{-1: 870},870,,,0.0
6,2012-2015,0.01,25,0.004,1,{-1: 870},870,,,0.0
7,2012-2015,0.01,30,0.0,1,{-1: 870},870,,,0.0
8,2012-2015,0.015,10,0.0,1,{-1: 870},870,,,0.0
9,2012-2015,0.015,15,0.008,1,{-1: 870},870,,,0.0



Running DBSCAN for Year Range: 2013-2016
Runtime for dbscan_clustering: 0.008475 seconds
Runtime for dbscan_clustering: 0.008338 seconds
Runtime for dbscan_clustering: 0.010065 seconds
Runtime for dbscan_clustering: 0.008052 seconds
Runtime for dbscan_clustering: 0.008517 seconds
Runtime for dbscan_clustering: 0.008356 seconds
Runtime for dbscan_clustering: 0.008552 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.006020 seconds
Runtime for dbscan_clustering: 0.008154 seconds
Runtime for dbscan_clustering: 0.008286 seconds
Runtime for dbscan_clustering: 0.016572 seconds
Runtime for dbscan_clustering: 0.008439 seconds


  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                2013-2016
eps                            0.02
min_samples                      10
Runtime (s)                   0.008
Clusters                          1
Points per Cluster       {-1: 1327}
Noise Points                   1327
Davies-Bouldin                 None
Calinski-Harabasz              None
Weighted Mean ANOVA F           0.0
Name: 0, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2013-2016,0.02,10,0.008,1,{-1: 1327},1327,,,0.0
1,2013-2016,0.02,15,0.008,1,{-1: 1327},1327,,,0.0
2,2013-2016,0.02,25,0.01,1,{-1: 1327},1327,,,0.0
3,2013-2016,0.02,30,0.008,1,{-1: 1327},1327,,,0.0
4,2013-2016,0.01,10,0.009,1,{-1: 1327},1327,,,0.0
5,2013-2016,0.01,15,0.008,1,{-1: 1327},1327,,,0.0
6,2013-2016,0.01,25,0.009,1,{-1: 1327},1327,,,0.0
7,2013-2016,0.01,30,0.0,1,{-1: 1327},1327,,,0.0
8,2013-2016,0.015,10,0.006,1,{-1: 1327},1327,,,0.0
9,2013-2016,0.015,15,0.008,1,{-1: 1327},1327,,,0.0



Running DBSCAN for Year Range: 2014-2017
Runtime for dbscan_clustering: 0.011281 seconds
Runtime for dbscan_clustering: 0.010871 seconds
Runtime for dbscan_clustering: 0.008484 seconds
Runtime for dbscan_clustering: 0.047466 seconds
Runtime for dbscan_clustering: 0.017220 seconds
Runtime for dbscan_clustering: 0.005579 seconds
Runtime for dbscan_clustering: 0.008478 seconds
Runtime for dbscan_clustering: 0.008020 seconds
Runtime for dbscan_clustering: 0.008571 seconds
Runtime for dbscan_clustering: 0.014383 seconds
Runtime for dbscan_clustering: 0.002873 seconds
Runtime for dbscan_clustering: 0.008075 seconds
Runtime for dbscan_clustering: 0.016001 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                       2014-2017
eps                                   0.02
min_samples                             10
Runtime (s)                          0.011
Clusters                                 2
Points per Cluster       {0: 14, -1: 1883}
Noise Points                          1883
Davies-Bouldin                        None
Calinski-Harabasz                     None
Weighted Mean ANOVA F                  0.0
Name: 0, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2014-2017,0.02,10,0.011,2,"{0: 14, -1: 1883}",1883,,,0.0
1,2014-2017,0.02,15,0.011,1,{-1: 1897},1897,,,0.0
2,2014-2017,0.02,25,0.008,1,{-1: 1897},1897,,,0.0
3,2014-2017,0.02,30,0.047,1,{-1: 1897},1897,,,0.0
4,2014-2017,0.01,10,0.017,1,{-1: 1897},1897,,,0.0
5,2014-2017,0.01,15,0.006,1,{-1: 1897},1897,,,0.0
6,2014-2017,0.01,25,0.008,1,{-1: 1897},1897,,,0.0
7,2014-2017,0.01,30,0.008,1,{-1: 1897},1897,,,0.0
8,2014-2017,0.015,10,0.009,1,{-1: 1897},1897,,,0.0
9,2014-2017,0.015,15,0.014,1,{-1: 1897},1897,,,0.0



Running DBSCAN for Year Range: 2015-2018
Runtime for dbscan_clustering: 0.017869 seconds
Runtime for dbscan_clustering: 0.038876 seconds
Runtime for dbscan_clustering: 0.015431 seconds
Runtime for dbscan_clustering: 0.016944 seconds
Runtime for dbscan_clustering: 0.010167 seconds
Runtime for dbscan_clustering: 0.008360 seconds
Runtime for dbscan_clustering: 0.014335 seconds
Runtime for dbscan_clustering: 0.016932 seconds
Runtime for dbscan_clustering: 0.018213 seconds
Runtime for dbscan_clustering: 0.018353 seconds
Runtime for dbscan_clustering: 0.013886 seconds
Runtime for dbscan_clustering: 0.014001 seconds
Runtime for dbscan_clustering: 0.019992 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                              2015-2018
eps                                          0.02
min_samples                                    10
Runtime (s)                                 0.018
Clusters                                        3
Points per Cluster       {0: 12, 1: 18, -1: 2612}
Noise Points                                 2612
Davies-Bouldin                               None
Calinski-Harabasz                            None
Weighted Mean ANOVA F                      28.339
Name: 0, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2015-2018,0.02,10,0.018,3,"{0: 12, 1: 18, -1: 2612}",2612,,,28.339
1,2015-2018,0.02,15,0.039,2,"{0: 15, -1: 2627}",2627,,,0.0
2,2015-2018,0.02,25,0.015,1,{-1: 2642},2642,,,0.0
3,2015-2018,0.02,30,0.017,1,{-1: 2642},2642,,,0.0
4,2015-2018,0.01,10,0.01,1,{-1: 2642},2642,,,0.0
5,2015-2018,0.01,15,0.008,1,{-1: 2642},2642,,,0.0
6,2015-2018,0.01,25,0.014,1,{-1: 2642},2642,,,0.0
7,2015-2018,0.01,30,0.017,1,{-1: 2642},2642,,,0.0
8,2015-2018,0.015,10,0.018,2,"{0: 13, -1: 2629}",2629,,,0.0
9,2015-2018,0.015,15,0.018,1,{-1: 2642},2642,,,0.0



Running DBSCAN for Year Range: 2016-2019
Runtime for dbscan_clustering: 0.020995 seconds
Runtime for dbscan_clustering: 0.025699 seconds
Runtime for dbscan_clustering: 0.022944 seconds
Runtime for dbscan_clustering: 0.017606 seconds
Runtime for dbscan_clustering: 0.012995 seconds
Runtime for dbscan_clustering: 0.015074 seconds
Runtime for dbscan_clustering: 0.015013 seconds
Runtime for dbscan_clustering: 0.014282 seconds
Runtime for dbscan_clustering: 0.016002 seconds
Runtime for dbscan_clustering: 0.014837 seconds
Runtime for dbscan_clustering: 0.020949 seconds
Runtime for dbscan_clustering: 0.018033 seconds
Runtime for dbscan_clustering: 0.019098 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                              2016-2019
eps                                         0.015
min_samples                                    10
Runtime (s)                                 0.016
Clusters                                        3
Points per Cluster       {0: 10, 1: 10, -1: 2690}
Noise Points                                 2690
Davies-Bouldin                               None
Calinski-Harabasz                            None
Weighted Mean ANOVA F                     667.595
Name: 8, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2016-2019,0.02,10,0.021,6,"{0: 12, 1: 13, 2: 23, 3: 13, 4: 10, -1: 2639}",2639,,,502.137
1,2016-2019,0.02,15,0.026,2,"{0: 16, -1: 2694}",2694,,,0.0
2,2016-2019,0.02,25,0.023,1,{-1: 2710},2710,,,0.0
3,2016-2019,0.02,30,0.018,1,{-1: 2710},2710,,,0.0
4,2016-2019,0.01,10,0.013,1,{-1: 2710},2710,,,0.0
5,2016-2019,0.01,15,0.015,1,{-1: 2710},2710,,,0.0
6,2016-2019,0.01,25,0.015,1,{-1: 2710},2710,,,0.0
7,2016-2019,0.01,30,0.014,1,{-1: 2710},2710,,,0.0
8,2016-2019,0.015,10,0.016,3,"{0: 10, 1: 10, -1: 2690}",2690,,,667.595
9,2016-2019,0.015,15,0.015,1,{-1: 2710},2710,,,0.0



Running DBSCAN for Year Range: 2017-2020
Runtime for dbscan_clustering: 0.024010 seconds
Runtime for dbscan_clustering: 0.021999 seconds
Runtime for dbscan_clustering: 0.020987 seconds
Runtime for dbscan_clustering: 0.019000 seconds
Runtime for dbscan_clustering: 0.021004 seconds
Runtime for dbscan_clustering: 0.017042 seconds
Runtime for dbscan_clustering: 0.027908 seconds
Runtime for dbscan_clustering: 0.017015 seconds
Runtime for dbscan_clustering: 0.021040 seconds
Runtime for dbscan_clustering: 0.018983 seconds
Runtime for dbscan_clustering: 0.021384 seconds
Runtime for dbscan_clustering: 0.018455 seconds
Runtime for dbscan_clustering: 0.020965 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2017-2020
eps                                                                   0.02
min_samples                                                             10
Runtime (s)                                                          0.024
Clusters                                                                 9
Points per Cluster       {0: 21, 1: 18, 2: 28, 3: 17, 4: 21, 5: 11, 6: ...
Noise Points                                                          3154
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                            12043.191
Name: 0, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2017-2020,0.02,10,0.024,9,"{0: 21, 1: 18, 2: 28, 3: 17, 4: 21, 5: 11, 6: ...",3154,,,12043.191
1,2017-2020,0.02,15,0.022,3,"{0: 25, 1: 15, -1: 3251}",3251,,,1438.69
2,2017-2020,0.02,25,0.021,1,{-1: 3291},3291,,,0.0
3,2017-2020,0.02,30,0.019,1,{-1: 3291},3291,,,0.0
4,2017-2020,0.01,10,0.021,1,{-1: 3291},3291,,,0.0
5,2017-2020,0.01,15,0.017,1,{-1: 3291},3291,,,0.0
6,2017-2020,0.01,25,0.028,1,{-1: 3291},3291,,,0.0
7,2017-2020,0.01,30,0.017,1,{-1: 3291},3291,,,0.0
8,2017-2020,0.015,10,0.021,3,"{0: 11, 1: 11, -1: 3269}",3269,,,2159.445
9,2017-2020,0.015,15,0.019,1,{-1: 3291},3291,,,0.0



Running DBSCAN for Year Range: 2018-2021
Runtime for dbscan_clustering: 0.027998 seconds
Runtime for dbscan_clustering: 0.056955 seconds
Runtime for dbscan_clustering: 0.028143 seconds
Runtime for dbscan_clustering: 0.016898 seconds
Runtime for dbscan_clustering: 0.016176 seconds
Runtime for dbscan_clustering: 0.025106 seconds
Runtime for dbscan_clustering: 0.020503 seconds
Runtime for dbscan_clustering: 0.019198 seconds
Runtime for dbscan_clustering: 0.022151 seconds
Runtime for dbscan_clustering: 0.027445 seconds
Runtime for dbscan_clustering: 0.024646 seconds
Runtime for dbscan_clustering: 0.022988 seconds
Runtime for dbscan_clustering: 0.025001 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2018-2021
eps                                                                   0.02
min_samples                                                             10
Runtime (s)                                                          0.028
Clusters                                                                12
Points per Cluster       {0: 15, 1: 10, 2: 37, 3: 31, 4: 24, 5: 18, 6: ...
Noise Points                                                          3222
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                             12609.69
Name: 0, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2018-2021,0.02,10,0.028,12,"{0: 15, 1: 10, 2: 37, 3: 31, 4: 24, 5: 18, 6: ...",3222,,,12609.69
1,2018-2021,0.02,15,0.057,4,"{0: 23, 1: 17, 2: 15, -1: 3387}",3387,,,922.713
2,2018-2021,0.02,25,0.028,1,{-1: 3442},3442,,,0.0
3,2018-2021,0.02,30,0.017,1,{-1: 3442},3442,,,0.0
4,2018-2021,0.01,10,0.016,1,{-1: 3442},3442,,,0.0
5,2018-2021,0.01,15,0.025,1,{-1: 3442},3442,,,0.0
6,2018-2021,0.01,25,0.021,1,{-1: 3442},3442,,,0.0
7,2018-2021,0.01,30,0.019,1,{-1: 3442},3442,,,0.0
8,2018-2021,0.015,10,0.022,2,"{0: 18, -1: 3424}",3424,,,0.0
9,2018-2021,0.015,15,0.027,1,{-1: 3442},3442,,,0.0



Running DBSCAN for Year Range: 2019-2022
Runtime for dbscan_clustering: 0.023093 seconds
Runtime for dbscan_clustering: 0.026963 seconds
Runtime for dbscan_clustering: 0.025705 seconds
Runtime for dbscan_clustering: 0.030406 seconds
Runtime for dbscan_clustering: 0.020283 seconds
Runtime for dbscan_clustering: 0.027591 seconds
Runtime for dbscan_clustering: 0.031765 seconds
Runtime for dbscan_clustering: 0.024419 seconds
Runtime for dbscan_clustering: 0.018476 seconds
Runtime for dbscan_clustering: 0.017552 seconds
Runtime for dbscan_clustering: 0.033052 seconds
Runtime for dbscan_clustering: 0.024037 seconds
Runtime for dbscan_clustering: 0.025207 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2019-2022
eps                                                                   0.02
min_samples                                                             10
Runtime (s)                                                          0.023
Clusters                                                                20
Points per Cluster       {0: 71, 1: 15, 2: 17, 3: 10, 4: 36, 5: 15, 6: ...
Noise Points                                                          3262
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                             2684.112
Name: 0, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2019-2022,0.02,10,0.023,20,"{0: 71, 1: 15, 2: 17, 3: 10, 4: 36, 5: 15, 6: ...",3262,,,2684.112
1,2019-2022,0.02,15,0.027,3,"{0: 49, 1: 32, -1: 3528}",3528,,,607.808
2,2019-2022,0.02,25,0.026,1,{-1: 3609},3609,,,0.0
3,2019-2022,0.02,30,0.03,1,{-1: 3609},3609,,,0.0
4,2019-2022,0.01,10,0.02,1,{-1: 3609},3609,,,0.0
5,2019-2022,0.01,15,0.028,1,{-1: 3609},3609,,,0.0
6,2019-2022,0.01,25,0.032,1,{-1: 3609},3609,,,0.0
7,2019-2022,0.01,30,0.024,1,{-1: 3609},3609,,,0.0
8,2019-2022,0.015,10,0.018,6,"{0: 20, 1: 21, 2: 10, 3: 11, 4: 10, -1: 3537}",3537,,,470.541
9,2019-2022,0.015,15,0.018,1,{-1: 3609},3609,,,0.0



Running DBSCAN for Year Range: 2020-2023
Runtime for dbscan_clustering: 0.035340 seconds
Runtime for dbscan_clustering: 0.033391 seconds
Runtime for dbscan_clustering: 0.033021 seconds
Runtime for dbscan_clustering: 0.033222 seconds
Runtime for dbscan_clustering: 0.026034 seconds
Runtime for dbscan_clustering: 0.027105 seconds
Runtime for dbscan_clustering: 0.028706 seconds
Runtime for dbscan_clustering: 0.020517 seconds
Runtime for dbscan_clustering: 0.051768 seconds
Runtime for dbscan_clustering: 0.033111 seconds
Runtime for dbscan_clustering: 0.031460 seconds
Runtime for dbscan_clustering: 0.033531 seconds
Runtime for dbscan_clustering: 0.034890 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2020-2023
eps                                                                   0.02
min_samples                                                             10
Runtime (s)                                                          0.035
Clusters                                                                26
Points per Cluster       {0: 10, 1: 23, 2: 18, 3: 60, 4: 17, 5: 28, 6: ...
Noise Points                                                          3884
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                             2954.547
Name: 0, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2020-2023,0.02,10,0.035,26,"{0: 10, 1: 23, 2: 18, 3: 60, 4: 17, 5: 28, 6: ...",3884,,,2954.547
1,2020-2023,0.02,15,0.033,5,"{0: 15, 1: 53, 2: 15, 3: 33, -1: 4256}",4256,,,85.899
2,2020-2023,0.02,25,0.033,1,{-1: 4372},4372,,,0.0
3,2020-2023,0.02,30,0.033,1,{-1: 4372},4372,,,0.0
4,2020-2023,0.01,10,0.026,1,{-1: 4372},4372,,,0.0
5,2020-2023,0.01,15,0.027,1,{-1: 4372},4372,,,0.0
6,2020-2023,0.01,25,0.029,1,{-1: 4372},4372,,,0.0
7,2020-2023,0.01,30,0.021,1,{-1: 4372},4372,,,0.0
8,2020-2023,0.015,10,0.052,8,"{0: 11, 1: 24, 2: 11, 3: 10, 4: 17, 5: 11, 6: ...",4274,,,415.989
9,2020-2023,0.015,15,0.033,1,{-1: 4372},4372,,,0.0
