# Main with 5d 
Dimensions used: Eccentricity, magnitude, perigee, inclination, raan  
Files: clustering_utils_5d.py, cluster_data_5d.py

# KMeans

In [2]:
import cluster_data_5d
from cluster_data_5d import run_clustering, normalize_data, unnormalize
import numpy as np
import pandas as pd
import os
from kmeans import k_means
from cluster_plotter import ClusterPlotter
from clustering_utils_5d import ClusterData
import cluster_plotter
import high_dim_analysis
import seaborn as sns
import matplotlib.pyplot as plt

standard_year_ranges = {
    f"{start}-{start + 3}": np.arange(start, start + 4)
    for start in [2002, 2006, 2010, 2014, 2019]
}
standard_year_ranges["2019-2023"] = np.arange(2019, 2024)  # Include 2023
running_ranges = cluster_data_5d.generate_running_year_ranges(2002, 2023, 4)

binned_data = cluster_data_5d.bin_data_for_clustering(running_ranges, print_res=False)

k_values = [5, 6, 7]

results_per_year_range = {}

# Output directory for plots
plot_dir = "Images\\k_means_running_bins_5d_with_perigee"
os.makedirs(plot_dir, exist_ok=True)
cluster_plotter.clear_directory(plot_dir)

for cluster_data, year_range in binned_data:  
    print(f"\nRunning K-Means for Year Range: {year_range}")

    data_array = np.array([cluster_data.inc, cluster_data.raan, cluster_data.ecc]).T
    feature_names = ["inclination [°]", "RAAN [°]", "Eccentricity"]

    normalized_data, data_min, data_max = normalize_data(data_array)

    k_values = [3, 4, 5, 6, 7]

    results = []

    for k in k_values:
        result_kmeans, time_kmeans, n_clusters_kmeans, points_per_cluster_kmeans, metrics_kmeans = run_clustering(
            k_means, f"K-means (k={k})", normalized_data, data_min, data_max, k, init='kmeans++'
        )

        unnormalized_data, cluster_centers = unnormalize(
            result_kmeans.data, result_kmeans.cluster_centers, data_min, data_max
        )

        results.append({
            "Year Range": year_range,
            "k": k,
            "Runtime (s)": f"{time_kmeans:.3f}",
            "Clusters": n_clusters_kmeans,
            "Points per Cluster": points_per_cluster_kmeans,
            "Davies-Bouldin": f"{metrics_kmeans[0]:.3f}" if metrics_kmeans and isinstance(metrics_kmeans[0], (int, float)) else None,
            "Calinski-Harabasz": f"{metrics_kmeans[1]:.3f}" if metrics_kmeans and isinstance(metrics_kmeans[1], (int, float)) else None,
            "Dunn Index": f"{metrics_kmeans[2]:.3f}" if metrics_kmeans and isinstance(metrics_kmeans[2], (int, float)) else None,
            "Silhouette Score": f"{metrics_kmeans[3]:.3f}" if metrics_kmeans and isinstance(metrics_kmeans[3], (int, float)) else None
        })

        labels = result_kmeans.labels
        """high_dim_analysis.plot_co_membership(
            labels,
            output_folder=plot_dir,
            filename=f"co_membership_{year_range}_k{k}.png"
        )"""
        high_dim_analysis.plot_correlation_heatmap(
            cluster_data,
            output_folder=plot_dir,
            filename=f"corr_{year_range}_k{k}.png"
        )
        high_dim_analysis.plot_mutual_information(
            cluster_data,
            labels,
            output_folder=plot_dir,
            filename=f"mi_{year_range}_k{k}.png"
        )

        from sklearn.feature_selection import f_classif
        X = np.array([cluster_data.inc, cluster_data.raan, cluster_data.perigee,
              cluster_data.ecc, cluster_data.mag]).T
        valid = labels != -1  # Exclude noise points (usually not present for kmeans)
        if np.any(valid) and len(np.unique(labels[valid])) > 1:
            f_vals, _ = f_classif(X[valid], labels[valid])
            mean_f = np.mean(f_vals)
        else:
            mean_f = 0.0  # Avoid invalid or single-cluster cases

        results[-1]["Mean ANOVA F"] = f"{mean_f:.3f}"

    df = pd.DataFrame(results)
    df["Mean ANOVA F"] = pd.to_numeric(df["Mean ANOVA F"], errors="coerce")
    df_sorted = df.sort_values(by="Mean ANOVA F", ascending=False)
    best_params = df_sorted.iloc[0]
    best_k = best_params["k"]

    # Re-run clustering with best parameters to get labels
    best_result, _, _, _, _ = run_clustering(
        k_means, f"K-means (k={best_k})", normalized_data, data_min, data_max, int(best_k), init='kmeans++'
    )
    best_labels = best_result.labels

    # Plot pairplot colored by cluster labels and save to plot_dir
    df_plot = pd.DataFrame(data_array, columns=feature_names)
    df_plot['cluster'] = best_labels.astype(str)  # cluster labels as strings for hue

    pairplot = sns.pairplot(df_plot, hue='cluster', diag_kind='kde', plot_kws={'alpha': 0.6, 's': 8})
    pairplot.fig.suptitle(f"K-Means Clusters for Year Range {year_range}", y=1.02)
    pairplot.savefig(os.path.join(plot_dir, f"pairplot_{year_range}_k{best_k}.png"))
    plt.close(pairplot.fig)

    # Plot ANOVA F-values only for best result
    high_dim_analysis.plot_anova_f_values(
        cluster_data,
        best_labels,
        output_folder=plot_dir,
        filename=f"anova_best_{year_range}_k{best_k}.png",
        title=f"ANOVA F-values (Best KMeans Parameters), k = {int(best_k)}"
    )
    print("Best KMeans parameters based on ANOVA F-value:")
    print(best_params)

    display(df)


Running K-Means for Year Range: 2002-2005
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.015187 seconds
Runtime for k_means: 0.009794 seconds
Runtime for k_means: 0.034844 seconds
Runtime for k_means: 0.016660 seconds
Runtime for k_means: 0.000000 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                              2002-2005
k                                               3
Runtime (s)                                 0.000
Clusters                                        3
Points per Cluster    {0: 4365, 1: 2318, 2: 1354}
Davies-Bouldin                               None
Calinski-Harabasz                            None
Dunn Index                                   None
Silhouette Score                             None
Mean ANOVA F                              24616.4
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2002-2005,3,0.0,3,"{0: 4365, 1: 2318, 2: 1354}",,,,,24616.4
1,2002-2005,4,0.015,4,"{0: 1076, 1: 4363, 2: 1001, 3: 1597}",,,,,17694.073
2,2002-2005,5,0.01,5,"{0: 996, 1: 1592, 2: 1458, 3: 2913, 4: 1078}",,,,,14273.368
3,2002-2005,6,0.035,6,"{0: 1547, 1: 2911, 2: 984, 3: 1458, 4: 111, 5:...",,,,,11661.928
4,2002-2005,7,0.017,7,"{0: 709, 1: 4158, 2: 1398, 3: 568, 4: 855, 5: ...",,,,,12648.052



Running K-Means for Year Range: 2003-2006
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.036316 seconds
Runtime for k_means: 0.035908 seconds
Runtime for k_means: 0.043586 seconds
Runtime for k_means: 0.032285 seconds
Runtime for k_means: 0.011110 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                              2003-2006
k                                               3
Runtime (s)                                 0.000
Clusters                                        3
Points per Cluster    {0: 5211, 1: 2219, 2: 1449}
Davies-Bouldin                               None
Calinski-Harabasz                            None
Dunn Index                                   None
Silhouette Score                             None
Mean ANOVA F                            25304.639
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2003-2006,3,0.0,3,"{0: 5211, 1: 2219, 2: 1449}",,,,,25304.639
1,2003-2006,4,0.036,4,"{0: 5207, 1: 1186, 2: 1048, 3: 1438}",,,,,18058.964
2,2003-2006,5,0.036,5,"{0: 1103, 1: 5204, 2: 835, 3: 719, 4: 1018}",,,,,14209.666
3,2003-2006,6,0.044,6,"{0: 5206, 1: 1095, 2: 1018, 3: 134, 4: 440, 5:...",,,,,14729.242
4,2003-2006,7,0.032,7,"{0: 3002, 1: 1080, 2: 108, 3: 865, 4: 746, 5: ...",,,,,11078.229



Running K-Means for Year Range: 2004-2007
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.008001 seconds
Runtime for k_means: 0.015035 seconds
Runtime for k_means: 0.047417 seconds
Runtime for k_means: 0.052305 seconds
Runtime for k_means: 0.022002 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                             2004-2007
k                                              3
Runtime (s)                                0.000
Clusters                                       3
Points per Cluster    {0: 5362, 1: 295, 2: 2892}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                           21678.856
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2004-2007,3,0.0,3,"{0: 5362, 1: 295, 2: 2892}",,,,,21678.856
1,2004-2007,4,0.008,4,"{0: 2779, 1: 3026, 2: 185, 3: 2559}",,,,,17996.997
2,2004-2007,5,0.015,5,"{0: 1053, 1: 5558, 2: 298, 3: 691, 4: 949}",,,,,18509.463
3,2004-2007,6,0.047,6,"{0: 5249, 1: 1050, 2: 259, 3: 732, 4: 1164, 5:...",,,,,12653.144
4,2004-2007,7,0.052,7,"{0: 905, 1: 988, 2: 2541, 3: 3019, 4: 686, 5: ...",,,,,13263.98



Running K-Means for Year Range: 2005-2008
Runtime for k_means: 0.030810 seconds
Runtime for k_means: 0.039288 seconds
Runtime for k_means: 0.031834 seconds
Runtime for k_means: 0.033163 seconds
Runtime for k_means: 0.032276 seconds
Runtime for k_means: 0.015852 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                             2005-2008
k                                              3
Runtime (s)                                0.031
Clusters                                       3
Points per Cluster    {0: 968, 1: 5102, 2: 1551}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                             23183.8
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2005-2008,3,0.031,3,"{0: 968, 1: 5102, 2: 1551}",,,,,23183.8
1,2005-2008,4,0.039,4,"{0: 898, 1: 5099, 2: 594, 3: 1030}",,,,,16073.922
2,2005-2008,5,0.032,5,"{0: 593, 1: 2338, 2: 1030, 3: 897, 4: 2763}",,,,,13023.197
3,2005-2008,6,0.033,6,"{0: 2251, 1: 910, 2: 562, 3: 1036, 4: 2686, 5:...",,,,,11210.354
4,2005-2008,7,0.032,7,"{0: 2249, 1: 548, 2: 818, 3: 2686, 4: 261, 5: ...",,,,,12914.784



Running K-Means for Year Range: 2006-2009
Runtime for k_means: 0.023622 seconds
Runtime for k_means: 0.008030 seconds
Runtime for k_means: 0.021112 seconds
Runtime for k_means: 0.017468 seconds
Runtime for k_means: 0.072195 seconds
Runtime for k_means: 0.013073 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                             2006-2009
k                                              3
Runtime (s)                                0.024
Clusters                                       3
Points per Cluster    {0: 5135, 1: 975, 2: 1369}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                           22624.486
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2006-2009,3,0.024,3,"{0: 5135, 1: 975, 2: 1369}",,,,,22624.486
1,2006-2009,4,0.008,4,"{0: 5133, 1: 839, 2: 577, 3: 930}",,,,,15936.942
2,2006-2009,5,0.021,5,"{0: 2415, 1: 1365, 2: 157, 3: 2569, 4: 973}",,,,,12296.248
3,2006-2009,6,0.017,6,"{0: 2684, 1: 800, 2: 2452, 3: 423, 4: 475, 5: ...",,,,,10858.032
4,2006-2009,7,0.072,7,"{0: 2435, 1: 513, 2: 586, 3: 549, 4: 665, 5: 1...",,,,,9849.697



Running K-Means for Year Range: 2007-2010
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.016542 seconds
Runtime for k_means: 0.017542 seconds
Runtime for k_means: 0.011041 seconds
Runtime for k_means: 0.012849 seconds
Runtime for k_means: 0.005763 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                             2007-2010
k                                              3
Runtime (s)                                0.000
Clusters                                       3
Points per Cluster    {0: 3992, 1: 1591, 2: 103}
Davies-Bouldin                              None
Calinski-Harabasz                           None
Dunn Index                                  None
Silhouette Score                            None
Mean ANOVA F                           18249.678
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2007-2010,3,0.0,3,"{0: 3992, 1: 1591, 2: 103}",,,,,18249.678
1,2007-2010,4,0.017,4,"{0: 4054, 1: 665, 2: 578, 3: 389}",,,,,12554.421
2,2007-2010,5,0.018,5,"{0: 2128, 1: 1922, 2: 615, 3: 102, 4: 919}",,,,,9694.854
3,2007-2010,6,0.011,6,"{0: 1910, 1: 609, 2: 102, 3: 896, 4: 2063, 5: ...",,,,,8710.363
4,2007-2010,7,0.013,7,"{0: 100, 1: 375, 2: 1911, 3: 2069, 4: 517, 5: ...",,,,,7810.419



Running K-Means for Year Range: 2008-2011
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.008030 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.016005 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                            2008-2011
k                                             3
Runtime (s)                               0.000
Clusters                                      3
Points per Cluster    {0: 2865, 1: 567, 2: 632}
Davies-Bouldin                             None
Calinski-Harabasz                          None
Dunn Index                                 None
Silhouette Score                           None
Mean ANOVA F                          11948.194
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2008-2011,3,0.0,3,"{0: 2865, 1: 567, 2: 632}",,,,,11948.194
1,2008-2011,4,0.0,4,"{0: 1570, 1: 640, 2: 557, 3: 1297}",,,,,8297.171
2,2008-2011,5,0.008,5,"{0: 1363, 1: 1504, 2: 326, 3: 516, 4: 355}",,,,,6575.099
3,2008-2011,6,0.0,6,"{0: 1287, 1: 516, 2: 344, 3: 1496, 4: 322, 5: 99}",,,,,6156.322
4,2008-2011,7,0.0,7,"{0: 1491, 1: 60, 2: 413, 3: 591, 4: 1284, 5: 1...",,,,,6666.36



Running K-Means for Year Range: 2009-2012
Runtime for k_means: 0.015798 seconds
Runtime for k_means: 0.013897 seconds
Runtime for k_means: 0.007251 seconds
Runtime for k_means: 0.016988 seconds
Runtime for k_means: 0.016980 seconds
Runtime for k_means: 0.017793 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                              2009-2012
k                                               3
Runtime (s)                                 0.016
Clusters                                        3
Points per Cluster    {0: 1092, 1: 1411, 2: 1057}
Davies-Bouldin                               None
Calinski-Harabasz                            None
Dunn Index                                   None
Silhouette Score                             None
Mean ANOVA F                            10803.012
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2009-2012,3,0.016,3,"{0: 1092, 1: 1411, 2: 1057}",,,,,10803.012
1,2009-2012,4,0.014,4,"{0: 1402, 1: 1088, 2: 502, 3: 568}",,,,,7255.451
2,2009-2012,5,0.007,5,"{0: 2488, 1: 281, 2: 208, 3: 242, 4: 341}",,,,,5708.104
3,2009-2012,6,0.017,6,"{0: 308, 1: 1331, 2: 435, 3: 282, 4: 1155, 5: 49}",,,,,4917.128
4,2009-2012,7,0.017,7,"{0: 100, 1: 1076, 2: 284, 3: 295, 4: 1325, 5: ...",,,,,4695.417



Running K-Means for Year Range: 2010-2013
Runtime for k_means: 0.007005 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.005438 seconds
Runtime for k_means: 0.003471 seconds
Runtime for k_means: 0.018281 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                            2010-2013
k                                             3
Runtime (s)                               0.007
Clusters                                      3
Points per Cluster    {0: 1129, 1: 250, 2: 200}
Davies-Bouldin                             None
Calinski-Harabasz                          None
Dunn Index                                 None
Silhouette Score                           None
Mean ANOVA F                           4514.187
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2010-2013,3,0.007,3,"{0: 1129, 1: 250, 2: 200}",,,,,4514.187
1,2010-2013,4,0.0,4,"{0: 250, 1: 52, 2: 197, 3: 1080}",,,,,3253.961
2,2010-2013,5,0.0,5,"{0: 250, 1: 605, 2: 475, 3: 197, 4: 52}",,,,,2568.501
3,2010-2013,6,0.005,6,"{0: 31, 1: 226, 2: 475, 3: 52, 4: 191, 5: 604}",,,,,2178.743
4,2010-2013,7,0.003,7,"{0: 604, 1: 101, 2: 132, 3: 476, 4: 29, 5: 52,...",,,,,1988.175



Running K-Means for Year Range: 2011-2014
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.005448 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.001425 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                           2011-2014
k                                            3
Runtime (s)                              0.000
Clusters                                     3
Points per Cluster    {0: 236, 1: 293, 2: 337}
Davies-Bouldin                            None
Calinski-Harabasz                         None
Dunn Index                                None
Silhouette Score                          None
Mean ANOVA F                          2357.502
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2011-2014,3,0.0,3,"{0: 236, 1: 293, 2: 337}",,,,,2357.502
1,2011-2014,4,0.005,4,"{0: 591, 1: 102, 2: 38, 3: 135}",,,,,1655.262
2,2011-2014,5,0.0,5,"{0: 38, 1: 130, 2: 21, 3: 588, 4: 89}",,,,,1195.366
3,2011-2014,6,0.0,6,"{0: 338, 1: 49, 2: 63, 3: 288, 4: 13, 5: 115}",,,,,1000.864
4,2011-2014,7,0.0,7,"{0: 68, 1: 255, 2: 105, 3: 335, 4: 37, 5: 19, ...",,,,,1003.55



Running K-Means for Year Range: 2012-2015
Runtime for k_means: 0.008039 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.013274 seconds
Runtime for k_means: 0.012116 seconds
Runtime for k_means: 0.016923 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                           2012-2015
k                                            3
Runtime (s)                              0.008
Clusters                                     3
Points per Cluster    {0: 574, 1: 189, 2: 107}
Davies-Bouldin                            None
Calinski-Harabasz                         None
Dunn Index                                None
Silhouette Score                          None
Mean ANOVA F                          2162.699
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2012-2015,3,0.008,3,"{0: 574, 1: 189, 2: 107}",,,,,2162.699
1,2012-2015,4,0.0,4,"{0: 353, 1: 190, 2: 222, 3: 105}",,,,,1548.842
2,2012-2015,5,0.0,5,"{0: 307, 1: 96, 2: 183, 3: 266, 4: 18}",,,,,1245.963
3,2012-2015,6,0.013,6,"{0: 295, 1: 182, 2: 96, 3: 16, 4: 198, 5: 83}",,,,,1079.368
4,2012-2015,7,0.012,7,"{0: 93, 1: 491, 2: 18, 3: 72, 4: 19, 5: 116, 6...",,,,,1661.586



Running K-Means for Year Range: 2013-2016
Runtime for k_means: 0.010262 seconds
Runtime for k_means: 0.006369 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.004325 seconds
Runtime for k_means: 0.016315 seconds
Runtime for k_means: 0.000000 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                           2013-2016
k                                            3
Runtime (s)                              0.010
Clusters                                     3
Points per Cluster    {0: 775, 1: 360, 2: 192}
Davies-Bouldin                            None
Calinski-Harabasz                         None
Dunn Index                                None
Silhouette Score                          None
Mean ANOVA F                          4067.093
Name: 0, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2013-2016,3,0.01,3,"{0: 775, 1: 360, 2: 192}",,,,,4067.093
1,2013-2016,4,0.006,4,"{0: 354, 1: 449, 2: 196, 3: 328}",,,,,2681.657
2,2013-2016,5,0.0,5,"{0: 215, 1: 449, 2: 327, 3: 179, 4: 157}",,,,,2559.212
3,2013-2016,6,0.004,6,"{0: 175, 1: 335, 2: 118, 3: 215, 4: 157, 5: 327}",,,,,2260.618
4,2013-2016,7,0.016,7,"{0: 174, 1: 326, 2: 157, 3: 215, 4: 107, 5: 33...",,,,,1884.293



Running K-Means for Year Range: 2014-2017
Runtime for k_means: 0.001089 seconds
Runtime for k_means: 0.004014 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.012394 seconds
Runtime for k_means: 0.014940 seconds
Runtime for k_means: 0.008060 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                                    2014-2017
k                                                     4
Runtime (s)                                       0.004
Clusters                                              4
Points per Cluster    {0: 297, 1: 225, 2: 1060, 3: 315}
Davies-Bouldin                                     None
Calinski-Harabasz                                  None
Dunn Index                                         None
Silhouette Score                                   None
Mean ANOVA F                                   4953.836
Name: 1, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2014-2017,3,0.001,3,"{0: 541, 1: 808, 2: 548}",,,,,1773.46
1,2014-2017,4,0.004,4,"{0: 297, 1: 225, 2: 1060, 3: 315}",,,,,4953.836
2,2014-2017,5,0.0,5,"{0: 236, 1: 479, 2: 323, 3: 366, 4: 493}",,,,,1442.611
3,2014-2017,6,0.012,6,"{0: 289, 1: 229, 2: 359, 3: 315, 4: 480, 5: 225}",,,,,3278.484
4,2014-2017,7,0.015,7,"{0: 225, 1: 480, 2: 207, 3: 315, 4: 27, 5: 288...",,,,,2742.421



Running K-Means for Year Range: 2015-2018
Runtime for k_means: 0.017172 seconds
Runtime for k_means: 0.015386 seconds
Runtime for k_means: 0.027298 seconds
Runtime for k_means: 0.026676 seconds
Runtime for k_means: 0.047999 seconds
Runtime for k_means: 0.019416 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                                           2015-2018
k                                                            5
Runtime (s)                                              0.027
Clusters                                                     5
Points per Cluster    {0: 505, 1: 805, 2: 297, 3: 650, 4: 385}
Davies-Bouldin                                            None
Calinski-Harabasz                                         None
Dunn Index                                                None
Silhouette Score                                          None
Mean ANOVA F                                          5917.641
Name: 2, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2015-2018,3,0.017,3,"{0: 1158, 1: 1171, 2: 313}",,,,,2277.833
1,2015-2018,4,0.015,4,"{0: 528, 1: 660, 2: 647, 3: 807}",,,,,3027.512
2,2015-2018,5,0.027,5,"{0: 505, 1: 805, 2: 297, 3: 650, 4: 385}",,,,,5917.641
3,2015-2018,6,0.027,6,"{0: 803, 1: 650, 2: 191, 3: 170, 4: 325, 5: 503}",,,,,4956.282
4,2015-2018,7,0.048,7,"{0: 191, 1: 428, 2: 502, 3: 170, 4: 325, 5: 26...",,,,,4507.313



Running K-Means for Year Range: 2016-2019
Runtime for k_means: 0.016008 seconds
Runtime for k_means: 0.015811 seconds
Runtime for k_means: 0.018344 seconds
Runtime for k_means: 0.012906 seconds
Runtime for k_means: 0.010116 seconds
Runtime for k_means: 0.000000 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                            2016-2019
k                                             3
Runtime (s)                               0.016
Clusters                                      3
Points per Cluster    {0: 1503, 1: 584, 2: 623}
Davies-Bouldin                             None
Calinski-Harabasz                          None
Dunn Index                                 None
Silhouette Score                           None
Mean ANOVA F                           8453.664
Name: 0, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2016-2019,3,0.016,3,"{0: 1503, 1: 584, 2: 623}",,,,,8453.664
1,2016-2019,4,0.016,4,"{0: 612, 1: 688, 2: 583, 3: 827}",,,,,5687.41
2,2016-2019,5,0.018,5,"{0: 1186, 1: 609, 2: 317, 3: 293, 4: 305}",,,,,4854.838
3,2016-2019,6,0.013,6,"{0: 373, 1: 607, 2: 307, 3: 582, 4: 796, 5: 45}",,,,,3407.821
4,2016-2019,7,0.01,7,"{0: 794, 1: 589, 2: 405, 3: 168, 4: 307, 5: 14...",,,,,4674.59



Running K-Means for Year Range: 2017-2020
Runtime for k_means: 0.033723 seconds
Runtime for k_means: 0.021413 seconds
Runtime for k_means: 0.012833 seconds
Runtime for k_means: 0.018053 seconds
Runtime for k_means: 0.020149 seconds
Runtime for k_means: 0.020443 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                                          2017-2020
k                                                           5
Runtime (s)                                             0.013
Clusters                                                    5
Points per Cluster    {0: 668, 1: 938, 2: 671, 3: 62, 4: 952}
Davies-Bouldin                                           None
Calinski-Harabasz                                        None
Dunn Index                                               None
Silhouette Score                                         None
Mean ANOVA F                                         7157.506
Name: 2, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2017-2020,3,0.034,3,"{0: 800, 1: 1890, 2: 601}",,,,,6922.851
1,2017-2020,4,0.021,4,"{0: 798, 1: 633, 2: 600, 3: 1260}",,,,,4803.211
2,2017-2020,5,0.013,5,"{0: 668, 1: 938, 2: 671, 3: 62, 4: 952}",,,,,7157.506
3,2017-2020,6,0.018,6,"{0: 386, 1: 685, 2: 1501, 3: 210, 4: 314, 5: 195}",,,,,6767.031
4,2017-2020,7,0.02,7,"{0: 195, 1: 900, 2: 210, 3: 683, 4: 606, 5: 38...",,,,,5994.02



Running K-Means for Year Range: 2018-2021
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.018411 seconds
Runtime for k_means: 0.002007 seconds
Runtime for k_means: 0.027039 seconds
Runtime for k_means: 0.006059 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                                    2018-2021
k                                                     4
Runtime (s)                                       0.000
Clusters                                              4
Points per Cluster    {0: 2056, 1: 384, 2: 650, 3: 352}
Davies-Bouldin                                     None
Calinski-Harabasz                                  None
Dunn Index                                         None
Silhouette Score                                   None
Mean ANOVA F                                  10017.883
Name: 1, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2018-2021,3,0.0,3,"{0: 2072, 1: 456, 2: 914}",,,,,4576.04
1,2018-2021,4,0.0,4,"{0: 2056, 1: 384, 2: 650, 3: 352}",,,,,10017.883
2,2018-2021,5,0.018,5,"{0: 917, 1: 691, 2: 438, 3: 415, 4: 981}",,,,,2752.275
3,2018-2021,6,0.002,6,"{0: 234, 1: 1659, 2: 303, 3: 640, 4: 198, 5: 408}",,,,,7222.114
4,2018-2021,7,0.027,7,"{0: 641, 1: 686, 2: 303, 3: 234, 4: 198, 5: 97...",,,,,6365.596



Running K-Means for Year Range: 2019-2022
Runtime for k_means: 0.008011 seconds
Runtime for k_means: 0.014986 seconds
Runtime for k_means: 0.005126 seconds
Runtime for k_means: 0.011764 seconds
Runtime for k_means: 0.000000 seconds
Runtime for k_means: 0.003650 seconds
Best KMeans parameters based on ANOVA F-value:
Year Range                                    2019-2022
k                                                     4
Runtime (s)                                       0.015
Clusters                                              4
Points per Cluster    {0: 2262, 1: 476, 2: 535, 3: 336}
Davies-Bouldin                                     None
Calinski-Harabasz                                  None
Dunn Index                                         None
Silhouette Score                                   None
Mean ANOVA F                                   9942.704
Name: 1, dtype: object



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2019-2022,3,0.008,3,"{0: 1680, 1: 1312, 2: 617}",,,,,3742.545
1,2019-2022,4,0.015,4,"{0: 2262, 1: 476, 2: 535, 3: 336}",,,,,9942.704
2,2019-2022,5,0.005,5,"{0: 358, 1: 813, 2: 955, 3: 600, 4: 883}",,,,,2816.437
3,2019-2022,6,0.012,6,"{0: 1669, 1: 285, 2: 298, 3: 526, 4: 234, 5: 597}",,,,,6983.3
4,2019-2022,7,0.0,7,"{0: 591, 1: 326, 2: 869, 3: 55, 4: 510, 5: 448...",,,,,5636.04



Running K-Means for Year Range: 2020-2023
Runtime for k_means: 0.028061 seconds
Runtime for k_means: 0.003536 seconds
Runtime for k_means: 0.015726 seconds
Runtime for k_means: 0.010081 seconds
Runtime for k_means: 0.006508 seconds
Runtime for k_means: 0.014353 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best KMeans parameters based on ANOVA F-value:
Year Range                                    2020-2023
k                                                     4
Runtime (s)                                       0.004
Clusters                                              4
Points per Cluster    {0: 742, 1: 2035, 2: 952, 3: 643}
Davies-Bouldin                                     None
Calinski-Harabasz                                  None
Dunn Index                                         None
Silhouette Score                                   None
Mean ANOVA F                                   12310.34
Name: 1, dtype: object


Unnamed: 0,Year Range,k,Runtime (s),Clusters,Points per Cluster,Davies-Bouldin,Calinski-Harabasz,Dunn Index,Silhouette Score,Mean ANOVA F
0,2020-2023,3,0.028,3,"{0: 1568, 1: 763, 2: 2041}",,,,,4686.198
1,2020-2023,4,0.004,4,"{0: 742, 1: 2035, 2: 952, 3: 643}",,,,,12310.34
2,2020-2023,5,0.016,5,"{0: 605, 1: 743, 2: 447, 3: 543, 4: 2034}",,,,,10275.825
3,2020-2023,6,0.01,6,"{0: 184, 1: 447, 2: 578, 3: 602, 4: 543, 5: 2018}",,,,,8259.523
4,2020-2023,7,0.007,7,"{0: 1123, 1: 367, 2: 600, 3: 731, 4: 923, 5: 3...",,,,,7771.473


# DBSCAN

In [3]:
import cluster_data_5d
from cluster_data_5d import run_clustering, normalize_data, unnormalize
import numpy as np
import pandas as pd
import os
from kmeans import k_means
from cluster_plotter import ClusterPlotter
from clustering_utils_5d import ClusterData
import cluster_plotter
from DBSCAN import dbscan_clustering
import high_dim_analysis
import seaborn as sns
import matplotlib.pyplot as plt

# Define year ranges
standard_year_ranges = {
    f"{start}-{start + 3}": np.arange(start, start + 4)
    for start in [2002, 2006, 2010, 2014, 2019]
}
standard_year_ranges["2019-2023"] = np.arange(2019, 2024)  # Include 2023
running_ranges = cluster_data_5d.generate_running_year_ranges(2002, 2023, 4)

binned_data = cluster_data_5d.bin_data_for_clustering(running_ranges, print_res=False)

results_per_year_range = {}

# Output directory for plots
plot_dir = r"Images\dbscan_tests_running_bins_5d_with_perigee"
os.makedirs(plot_dir, exist_ok=True)
cluster_plotter.clear_directory(plot_dir)

for cluster_data, year_range in binned_data:  
    print(f"\nRunning DBSCAN for Year Range: {year_range}")

    data_array = np.array([cluster_data.inc, cluster_data.raan, cluster_data.perigee]).T
    feature_names = ["inclination [°]", "RAAN [°]", "Perigee[km]"]

    normalized_data, data_min, data_max = normalize_data(data_array)

    # DBSCAN parameter ranges
    eps_values = [0.02, 0.01, 0.015]
    min_samples_values = [10, 15, 25, 30]

    results = []

    for eps in eps_values:
        for min_samples in min_samples_values:
            result_dbscan, time_dbscan, n_clusters_dbscan, points_per_cluster_dbscan, metrics_dbscan = run_clustering(
                dbscan_clustering, "DBSCAN", normalized_data, data_min, data_max, eps=eps, min_samples=min_samples
            )

            unnormalized_data, _ = unnormalize(
                result_dbscan.data, None, data_min, data_max
            )
            # Count noise points
            noise_points = np.sum(result_dbscan.labels == -1)

            results.append({
                "Year Range": year_range,
                "eps": eps,
                "min_samples": min_samples,
                "Runtime (s)": f"{time_dbscan:.3f}",
                "Clusters": n_clusters_dbscan,
                "Points per Cluster": points_per_cluster_dbscan,
                "Noise Points": noise_points,
                "Davies-Bouldin": f"{metrics_dbscan[0]:.3f}" if metrics_dbscan and isinstance(metrics_dbscan[0], (int, float)) else None,
                "Calinski-Harabasz": f"{metrics_dbscan[1]:.3f}" if metrics_dbscan and isinstance(metrics_dbscan[1], (int, float)) else None
            })


            labels = result_dbscan.labels
            """high_dim_analysis.plot_co_membership(
                labels,
                output_folder=plot_dir,
                filename=f"co_membership_{year_range}_eps{eps}_minsamples_{min_samples}.png"
            )"""
            high_dim_analysis.plot_correlation_heatmap(
                cluster_data,
                output_folder=plot_dir,
                filename=f"corr_{year_range}_eps{eps}_minsamples_{min_samples}.png"
            )
            high_dim_analysis.plot_mutual_information(
                cluster_data,
                labels,
                output_folder=plot_dir,
                filename=f"mi_{year_range}_eps{eps}_minsamples_{min_samples}.png"
            )
            
        from sklearn.feature_selection import f_classif
        X = np.array([cluster_data.inc, cluster_data.raan, cluster_data.perigee,
              cluster_data.ecc, cluster_data.mag]).T
        valid = labels != -1  # Exclude noise points
        if np.any(valid) and len(np.unique(labels[valid])) > 1:
            f_vals, _ = f_classif(X[valid], labels[valid])
            weights = np.var(X[valid], axis=0)
            weighted_mean_f = np.average(f_vals, weights=weights)
        else:
            weighted_mean_f = 0.0  # Avoid invalid or single-cluster cases

        results[-1]["Weighted Mean ANOVA F"] = f"{weighted_mean_f:.3f}"

    df = pd.DataFrame(results)

    df["Weighted Mean ANOVA F"] = pd.to_numeric(df["Weighted Mean ANOVA F"], errors="coerce")
    df_sorted = df.sort_values(by="Weighted Mean ANOVA F", ascending=False)
    best_params = df_sorted.iloc[0]
    best_eps = best_params["eps"]
    best_min_samples = best_params["min_samples"]

    # Re-run clustering with best parameters to get labels
    best_result, _, _, _, _ = run_clustering(
        dbscan_clustering, "DBSCAN", normalized_data, data_min, data_max,
        eps=best_eps, min_samples=best_min_samples
    )
    best_labels = best_result.labels

    df_plot = pd.DataFrame(data_array, columns=feature_names)
    df_plot['cluster'] = best_labels.astype(str)

    pairplot = sns.pairplot(df_plot, hue='cluster', diag_kind='kde', plot_kws={'alpha':0.6, 's':40})
    pairplot.fig.suptitle(f"DBSCAN Clusters for Year Range {year_range}", y=1.02)

    # Save the figure to the plot directory
    pairplot.savefig(os.path.join(plot_dir, f"pairplot_{year_range}_eps{best_eps}_minsamples_{best_min_samples}.png"))

    plt.close(pairplot.fig)  # Close the figure to free memory


    # Plot ANOVA F-values only for best result
    high_dim_analysis.plot_anova_f_values(
        cluster_data,
        best_labels,
        output_folder=plot_dir,
        filename=f"anova_best_{year_range}_eps{best_eps}_minsamples_{best_min_samples}.png",
        title=f"ANOVA F-values (Best DBSCAN Parameters), eps = {float(best_eps)}, minsamples = {float(best_min_samples)}"
    )

    print("Best DBSCAN parameters based on ANOVA F-value:")
    print(best_params)
    display(df)



Running DBSCAN for Year Range: 2002-2005
Runtime for dbscan_clustering: 0.100233 seconds


[WinError 2] Das System kann die angegebene Datei nicht finden
  File "c:\Users\fionu\anaconda3\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "c:\Users\fionu\anaconda3\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\fionu\anaconda3\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\Users\fionu\anaconda3\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


Runtime for dbscan_clustering: 0.106154 seconds
Runtime for dbscan_clustering: 0.098295 seconds
Runtime for dbscan_clustering: 0.099462 seconds
Runtime for dbscan_clustering: 0.083003 seconds
Runtime for dbscan_clustering: 0.064549 seconds
Runtime for dbscan_clustering: 0.087461 seconds
Runtime for dbscan_clustering: 0.026081 seconds
Runtime for dbscan_clustering: 0.050244 seconds
Runtime for dbscan_clustering: 0.073026 seconds
Runtime for dbscan_clustering: 0.049793 seconds
Runtime for dbscan_clustering: 0.043715 seconds
Runtime for dbscan_clustering: 0.081963 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                             2002-2005
eps                                                         0.01
min_samples                                                   30
Runtime (s)                                                0.026
Clusters                                                       5
Points per Cluster       {0: 214, 1: 70, 2: 30, 3: 35, -1: 7688}
Noise Points                                                7688
Davies-Bouldin                                              None
Calinski-Harabasz                                           None
Weighted Mean ANOVA F                                  58887.198
Name: 7, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2002-2005,0.02,10,0.1,75,"{0: 403, 1: 18, 2: 14, 3: 31, 4: 34, 5: 11, 6:...",3540,,,
1,2002-2005,0.02,15,0.106,33,"{0: 324, 1: 67, 2: 16, 3: 31, 4: 15, 5: 1575, ...",4519,,,
2,2002-2005,0.02,25,0.098,21,"{0: 322, 1: 45, 2: 31, 3: 818, 4: 464, 5: 114,...",5227,,,
3,2002-2005,0.02,30,0.099,12,"{0: 320, 1: 32, 2: 31, 3: 802, 4: 250, 5: 100,...",5679,,,12122.114
4,2002-2005,0.01,10,0.083,71,"{0: 10, 1: 256, 2: 13, 3: 11, 4: 11, 5: 12, 6:...",6200,,,
5,2002-2005,0.01,15,0.065,35,"{0: 255, 1: 19, 2: 19, 3: 17, 4: 17, 5: 28, 6:...",6844,,,
6,2002-2005,0.01,25,0.087,10,"{0: 218, 1: 107, 2: 27, 3: 25, 4: 33, 5: 25, 6...",7485,,,
7,2002-2005,0.01,30,0.026,5,"{0: 214, 1: 70, 2: 30, 3: 35, -1: 7688}",7688,,,58887.198
8,2002-2005,0.015,10,0.05,80,"{0: 43, 1: 18, 2: 290, 3: 15, 4: 10, 5: 31, 6:...",4622,,,
9,2002-2005,0.015,15,0.073,35,"{0: 22, 1: 290, 2: 31, 3: 15, 4: 674, 5: 116, ...",5476,,,



Running DBSCAN for Year Range: 2003-2006
Runtime for dbscan_clustering: 0.122779 seconds
Runtime for dbscan_clustering: 0.151791 seconds
Runtime for dbscan_clustering: 0.058725 seconds
Runtime for dbscan_clustering: 0.047602 seconds
Runtime for dbscan_clustering: 0.049688 seconds
Runtime for dbscan_clustering: 0.050811 seconds
Runtime for dbscan_clustering: 0.049576 seconds
Runtime for dbscan_clustering: 0.040768 seconds
Runtime for dbscan_clustering: 0.064044 seconds
Runtime for dbscan_clustering: 0.050193 seconds
Runtime for dbscan_clustering: 0.077913 seconds
Runtime for dbscan_clustering: 0.052109 seconds
Runtime for dbscan_clustering: 0.060771 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                            2003-2006
eps                                                        0.01
min_samples                                                  30
Runtime (s)                                               0.041
Clusters                                                      5
Points per Cluster       {0: 74, 1: 81, 2: 68, 3: 30, -1: 8626}
Noise Points                                               8626
Davies-Bouldin                                             None
Calinski-Harabasz                                          None
Weighted Mean ANOVA F                                100846.491
Name: 7, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2003-2006,0.02,10,0.123,88,"{0: 29, 1: 10, 2: 30, 3: 42, 4: 32, 5: 26, 6: ...",3991,,,
1,2003-2006,0.02,15,0.152,38,"{0: 40, 1: 26, 2: 67, 3: 22, 4: 24, 5: 1259, 6...",4995,,,
2,2003-2006,0.02,25,0.059,17,"{0: 52, 1: 846, 2: 103, 3: 25, 4: 79, 5: 85, 6...",5931,,,
3,2003-2006,0.02,30,0.048,12,"{0: 822, 1: 97, 2: 841, 3: 124, 4: 429, 5: 57,...",6349,,,3554.829
4,2003-2006,0.01,10,0.05,84,"{0: 11, 1: 24, 2: 13, 3: 11, 4: 12, 5: 121, 6:...",6932,,,
5,2003-2006,0.01,15,0.051,38,"{0: 17, 1: 114, 2: 87, 3: 16, 4: 107, 5: 27, 6...",7708,,,
6,2003-2006,0.01,25,0.05,10,"{0: 103, 1: 97, 2: 28, 3: 34, 4: 25, 5: 68, 6:...",8434,,,
7,2003-2006,0.01,30,0.041,5,"{0: 74, 1: 81, 2: 68, 3: 30, -1: 8626}",8626,,,100846.491
8,2003-2006,0.015,10,0.064,74,"{0: 11, 1: 18, 2: 15, 3: 14, 4: 19, 5: 54, 6: ...",5317,,,
9,2003-2006,0.015,15,0.05,39,"{0: 18, 1: 48, 2: 783, 3: 96, 4: 17, 5: 21, 6:...",6133,,,



Running DBSCAN for Year Range: 2004-2007
Runtime for dbscan_clustering: 0.073198 seconds
Runtime for dbscan_clustering: 0.085163 seconds
Runtime for dbscan_clustering: 0.056206 seconds
Runtime for dbscan_clustering: 0.049338 seconds
Runtime for dbscan_clustering: 0.066372 seconds
Runtime for dbscan_clustering: 0.063428 seconds
Runtime for dbscan_clustering: 0.037526 seconds
Runtime for dbscan_clustering: 0.049482 seconds
Runtime for dbscan_clustering: 0.046872 seconds
Runtime for dbscan_clustering: 0.041102 seconds
Runtime for dbscan_clustering: 0.045279 seconds
Runtime for dbscan_clustering: 0.049722 seconds
Runtime for dbscan_clustering: 0.034139 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2004-2007
eps                                                                   0.01
min_samples                                                             30
Runtime (s)                                                          0.049
Clusters                                                                 7
Points per Cluster       {0: 193, 1: 41, 2: 62, 3: 44, 4: 39, 5: 30, -1...
Noise Points                                                          8140
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                            39086.169
Name: 7, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2004-2007,0.02,10,0.073,58,"{0: 11, 1: 13, 2: 20, 3: 10, 4: 19, 5: 17, 6: ...",3863,,,
1,2004-2007,0.02,15,0.085,26,"{0: 20, 1: 15, 2: 1670, 3: 64, 4: 67, 5: 31, 6...",4527,,,
2,2004-2007,0.02,25,0.056,16,"{0: 1168, 1: 271, 2: 96, 3: 970, 4: 400, 5: 53...",5223,,,
3,2004-2007,0.02,30,0.049,13,"{0: 1148, 1: 86, 2: 897, 3: 347, 4: 39, 5: 123...",5675,,,2338.465
4,2004-2007,0.01,10,0.066,83,"{0: 106, 1: 265, 2: 98, 3: 19, 4: 11, 5: 14, 6...",6554,,,
5,2004-2007,0.01,15,0.063,39,"{0: 51, 1: 212, 2: 44, 3: 19, 4: 45, 5: 15, 6:...",7271,,,
6,2004-2007,0.01,25,0.038,10,"{0: 193, 1: 32, 2: 41, 3: 62, 4: 33, 5: 44, 6:...",8040,,,
7,2004-2007,0.01,30,0.049,7,"{0: 193, 1: 41, 2: 62, 3: 44, 4: 39, 5: 30, -1...",8140,,,39086.169
8,2004-2007,0.015,10,0.047,53,"{0: 13, 1: 14, 2: 1127, 3: 302, 4: 113, 5: 10,...",4884,,,
9,2004-2007,0.015,15,0.041,29,"{0: 962, 1: 98, 2: 47, 3: 112, 4: 120, 5: 675,...",5606,,,



Running DBSCAN for Year Range: 2005-2008
Runtime for dbscan_clustering: 0.047286 seconds
Runtime for dbscan_clustering: 0.046168 seconds
Runtime for dbscan_clustering: 0.054772 seconds
Runtime for dbscan_clustering: 0.048619 seconds
Runtime for dbscan_clustering: 0.047891 seconds
Runtime for dbscan_clustering: 0.033418 seconds
Runtime for dbscan_clustering: 0.033277 seconds
Runtime for dbscan_clustering: 0.033391 seconds
Runtime for dbscan_clustering: 0.039746 seconds
Runtime for dbscan_clustering: 0.039147 seconds
Runtime for dbscan_clustering: 0.049644 seconds
Runtime for dbscan_clustering: 0.031965 seconds
Runtime for dbscan_clustering: 0.050070 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                    2005-2008
eps                                                                0.01
min_samples                                                          30
Runtime (s)                                                       0.033
Clusters                                                              6
Points per Cluster       {0: 43, 1: 40, 2: 108, 3: 37, 4: 49, -1: 7344}
Noise Points                                                       7344
Davies-Bouldin                                                     None
Calinski-Harabasz                                                  None
Weighted Mean ANOVA F                                         77997.341
Name: 7, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2005-2008,0.02,10,0.047,56,"{0: 17, 1: 13, 2: 12, 3: 1708, 4: 36, 5: 18, 6...",3307,,,
1,2005-2008,0.02,15,0.046,24,"{0: 1594, 1: 56, 2: 25, 3: 1563, 4: 35, 5: 16,...",3957,,,
2,2005-2008,0.02,25,0.055,10,"{0: 1419, 1: 917, 2: 406, 3: 26, 4: 25, 5: 57,...",4676,,,
3,2005-2008,0.02,30,0.049,13,"{0: 977, 1: 116, 2: 105, 3: 816, 4: 74, 5: 301...",5057,,,2370.299
4,2005-2008,0.01,10,0.048,88,"{0: 10, 1: 251, 2: 117, 3: 48, 4: 45, 5: 13, 6...",5726,,,
5,2005-2008,0.01,15,0.033,34,"{0: 230, 1: 100, 2: 50, 3: 45, 4: 30, 5: 44, 6...",6535,,,
6,2005-2008,0.01,25,0.033,9,"{0: 43, 1: 40, 2: 110, 3: 64, 4: 41, 5: 45, 6:...",7222,,,
7,2005-2008,0.01,30,0.033,6,"{0: 43, 1: 40, 2: 108, 3: 37, 4: 49, -1: 7344}",7344,,,77997.341
8,2005-2008,0.015,10,0.04,53,"{0: 12, 1: 12, 2: 1002, 3: 441, 4: 10, 5: 26, ...",4194,,,
9,2005-2008,0.015,15,0.039,26,"{0: 925, 1: 75, 2: 107, 3: 119, 4: 653, 5: 19,...",4935,,,



Running DBSCAN for Year Range: 2006-2009
Runtime for dbscan_clustering: 0.048701 seconds
Runtime for dbscan_clustering: 0.057577 seconds
Runtime for dbscan_clustering: 0.037819 seconds
Runtime for dbscan_clustering: 0.036865 seconds
Runtime for dbscan_clustering: 0.032301 seconds
Runtime for dbscan_clustering: 0.034987 seconds
Runtime for dbscan_clustering: 0.039179 seconds
Runtime for dbscan_clustering: 0.033357 seconds
Runtime for dbscan_clustering: 0.033991 seconds
Runtime for dbscan_clustering: 0.042991 seconds
Runtime for dbscan_clustering: 0.092884 seconds
Runtime for dbscan_clustering: 0.085263 seconds
Runtime for dbscan_clustering: 0.086782 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2006-2009
eps                                                                  0.015
min_samples                                                             30
Runtime (s)                                                          0.085
Clusters                                                                12
Points per Cluster       {0: 396, 1: 89, 2: 173, 3: 45, 4: 50, 5: 66, 6...
Noise Points                                                          6483
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                             9622.421
Name: 11, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2006-2009,0.02,10,0.049,48,"{0: 11, 1: 15, 2: 1732, 3: 10, 4: 68, 5: 62, 6...",3222,,,
1,2006-2009,0.02,15,0.058,29,"{0: 1612, 1: 37, 2: 16, 3: 59, 4: 18, 5: 46, 6...",3807,,,
2,2006-2009,0.02,25,0.038,10,"{0: 958, 1: 345, 2: 42, 3: 805, 4: 441, 5: 48,...",4721,,,
3,2006-2009,0.02,30,0.037,10,"{0: 868, 1: 107, 2: 98, 3: 30, 4: 766, 5: 414,...",5068,,,2670.88
4,2006-2009,0.01,10,0.032,84,"{0: 11, 1: 269, 2: 59, 3: 25, 4: 85, 5: 55, 6:...",5723,,,
5,2006-2009,0.01,15,0.035,28,"{0: 80, 1: 91, 2: 44, 3: 152, 4: 18, 5: 20, 6:...",6577,,,
6,2006-2009,0.01,25,0.039,5,"{0: 45, 1: 49, 2: 76, 3: 37, -1: 7272}",7272,,,
7,2006-2009,0.01,30,0.033,4,"{0: 76, 1: 49, 2: 31, -1: 7323}",7323,,,2132.516
8,2006-2009,0.015,10,0.034,60,"{0: 11, 1: 13, 2: 1290, 3: 12, 4: 10, 5: 83, 6...",4136,,,
9,2006-2009,0.015,15,0.043,35,"{0: 852, 1: 117, 2: 72, 3: 52, 4: 62, 5: 31, 6...",4868,,,



Running DBSCAN for Year Range: 2007-2010
Runtime for dbscan_clustering: 0.066516 seconds
Runtime for dbscan_clustering: 0.083735 seconds
Runtime for dbscan_clustering: 0.090495 seconds
Runtime for dbscan_clustering: 0.059429 seconds
Runtime for dbscan_clustering: 0.014035 seconds
Runtime for dbscan_clustering: 0.016749 seconds
Runtime for dbscan_clustering: 0.027350 seconds
Runtime for dbscan_clustering: 0.033318 seconds
Runtime for dbscan_clustering: 0.030679 seconds
Runtime for dbscan_clustering: 0.039649 seconds
Runtime for dbscan_clustering: 0.034137 seconds
Runtime for dbscan_clustering: 0.033482 seconds
Runtime for dbscan_clustering: 0.025903 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                                       2007-2010
eps                                                                  0.015
min_samples                                                             30
Runtime (s)                                                          0.033
Clusters                                                                11
Points per Cluster       {0: 266, 1: 54, 2: 42, 3: 43, 4: 51, 5: 52, 6:...
Noise Points                                                          5047
Davies-Bouldin                                                        None
Calinski-Harabasz                                                     None
Weighted Mean ANOVA F                                               6855.9
Name: 11, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2007-2010,0.02,10,0.067,36,"{0: 12, 1: 10, 2: 1313, 3: 14, 4: 10, 5: 13, 6...",2559,,,
1,2007-2010,0.02,15,0.084,24,"{0: 1212, 1: 36, 2: 27, 3: 596, 4: 370, 5: 49,...",3032,,,
2,2007-2010,0.02,25,0.09,16,"{0: 669, 1: 83, 2: 51, 3: 74, 4: 41, 5: 26, 6:...",3711,,,
3,2007-2010,0.02,30,0.059,10,"{0: 619, 1: 62, 2: 72, 3: 373, 4: 273, 5: 45, ...",4137,,,2763.803
4,2007-2010,0.01,10,0.014,50,"{0: 25, 1: 26, 2: 10, 3: 92, 4: 18, 5: 52, 6: ...",4666,,,
5,2007-2010,0.01,15,0.017,22,"{0: 22, 1: 21, 2: 74, 3: 50, 4: 59, 5: 15, 6: ...",5153,,,
6,2007-2010,0.01,25,0.027,4,"{0: 34, 1: 39, 2: 51, -1: 5562}",5562,,,
7,2007-2010,0.01,30,0.033,3,"{0: 34, 1: 46, -1: 5606}",5606,,,2647.095
8,2007-2010,0.015,10,0.031,43,"{0: 789, 1: 137, 2: 10, 3: 55, 4: 19, 5: 58, 6...",3316,,,
9,2007-2010,0.015,15,0.04,29,"{0: 571, 1: 32, 2: 23, 3: 32, 4: 89, 5: 19, 6:...",3887,,,



Running DBSCAN for Year Range: 2008-2011
Runtime for dbscan_clustering: 0.032638 seconds
Runtime for dbscan_clustering: 0.015413 seconds
Runtime for dbscan_clustering: 0.027045 seconds
Runtime for dbscan_clustering: 0.024701 seconds
Runtime for dbscan_clustering: 0.016216 seconds
Runtime for dbscan_clustering: 0.014881 seconds
Runtime for dbscan_clustering: 0.016163 seconds
Runtime for dbscan_clustering: 0.019248 seconds
Runtime for dbscan_clustering: 0.024370 seconds
Runtime for dbscan_clustering: 0.026336 seconds
Runtime for dbscan_clustering: 0.021785 seconds
Runtime for dbscan_clustering: 0.018338 seconds
Runtime for dbscan_clustering: 0.020396 seconds
Best DBSCAN parameters based on ANOVA F-value:
Year Range                                            2008-2011
eps                                                       0.015
min_samples                                                  30
Runtime (s)                                               0.018
Clusters                       


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2008-2011,0.02,10,0.033,24,"{0: 583, 1: 82, 2: 135, 3: 13, 4: 31, 5: 11, 6...",2135,,,
1,2008-2011,0.02,15,0.015,15,"{0: 466, 1: 64, 2: 48, 3: 301, 4: 106, 5: 124,...",2526,,,
2,2008-2011,0.02,25,0.027,11,"{0: 28, 1: 403, 2: 50, 3: 48, 4: 128, 5: 96, 6...",3072,,,
3,2008-2011,0.02,30,0.025,12,"{0: 59, 1: 34, 2: 306, 3: 80, 4: 50, 5: 46, 6:...",3323,,,3288.455
4,2008-2011,0.01,10,0.016,27,"{0: 16, 1: 56, 2: 30, 3: 21, 4: 17, 5: 35, 6: ...",3569,,,
5,2008-2011,0.01,15,0.015,10,"{0: 29, 1: 19, 2: 19, 3: 21, 4: 30, 5: 15, 6: ...",3856,,,
6,2008-2011,0.01,25,0.016,1,{-1: 4064},4064,,,
7,2008-2011,0.01,30,0.019,1,{-1: 4064},4064,,,0.0
8,2008-2011,0.015,10,0.024,35,"{0: 363, 1: 55, 2: 25, 3: 36, 4: 11, 5: 13, 6:...",2748,,,
9,2008-2011,0.015,15,0.026,21,"{0: 58, 1: 45, 2: 275, 3: 36, 4: 62, 5: 19, 6:...",3175,,,



Running DBSCAN for Year Range: 2009-2012
Runtime for dbscan_clustering: 0.021807 seconds
Runtime for dbscan_clustering: 0.014850 seconds
Runtime for dbscan_clustering: 0.016522 seconds
Runtime for dbscan_clustering: 0.022038 seconds
Runtime for dbscan_clustering: 0.013035 seconds
Runtime for dbscan_clustering: 0.015969 seconds
Runtime for dbscan_clustering: 0.013514 seconds
Runtime for dbscan_clustering: 0.014431 seconds
Runtime for dbscan_clustering: 0.026945 seconds
Runtime for dbscan_clustering: 0.013824 seconds
Runtime for dbscan_clustering: 0.025621 seconds
Runtime for dbscan_clustering: 0.022122 seconds
Runtime for dbscan_clustering: 0.015618 seconds
Best DBSCAN parameters based on ANOVA F-value:
Year Range                                            2009-2012
eps                                                       0.015
min_samples                                                  30
Runtime (s)                                               0.022
Clusters                       


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2009-2012,0.02,10,0.022,22,"{0: 472, 1: 81, 2: 55, 3: 13, 4: 18, 5: 358, 6...",2044,,,
1,2009-2012,0.02,15,0.015,16,"{0: 373, 1: 44, 2: 40, 3: 18, 4: 246, 5: 72, 6...",2391,,,
2,2009-2012,0.02,25,0.017,12,"{0: 289, 1: 65, 2: 26, 3: 51, 4: 26, 5: 26, 6:...",2895,,,
3,2009-2012,0.02,30,0.022,6,"{0: 61, 1: 42, 2: 36, 3: 267, 4: 31, -1: 3123}",3123,,,2620.042
4,2009-2012,0.01,10,0.013,20,"{0: 20, 1: 17, 2: 31, 3: 12, 4: 19, 5: 21, 6: ...",3215,,,
5,2009-2012,0.01,15,0.016,6,"{0: 18, 1: 21, 2: 26, 3: 16, 4: 30, -1: 3449}",3449,,,
6,2009-2012,0.01,25,0.014,1,{-1: 3560},3560,,,
7,2009-2012,0.01,30,0.014,1,{-1: 3560},3560,,,0.0
8,2009-2012,0.015,10,0.027,36,"{0: 15, 1: 16, 2: 12, 3: 246, 4: 18, 5: 39, 6:...",2558,,,
9,2009-2012,0.015,15,0.014,18,"{0: 233, 1: 18, 2: 44, 3: 34, 4: 38, 5: 23, 6:...",2945,,,



Running DBSCAN for Year Range: 2010-2013
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.012741 seconds
Runtime for dbscan_clustering: 0.009072 seconds
Runtime for dbscan_clustering: 0.005018 seconds
Runtime for dbscan_clustering: 0.007742 seconds
Runtime for dbscan_clustering: 0.006385 seconds
Runtime for dbscan_clustering: 0.004009 seconds
Runtime for dbscan_clustering: 0.015298 seconds
Runtime for dbscan_clustering: 0.001930 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.008489 seconds
Runtime for dbscan_clustering: 0.004082 seconds
Runtime for dbscan_clustering: 0.019345 seconds
Best DBSCAN parameters based on ANOVA F-value:
Year Range                       2010-2013
eps                                   0.02
min_samples                             30
Runtime (s)                          0.005
Clusters                                 2
Points per Cluster       {0: 43, -1: 1536}
Noise Points                 


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2010-2013,0.02,10,0.0,18,"{0: 20, 1: 17, 2: 16, 3: 25, 4: 105, 5: 11, 6:...",1222,,,
1,2010-2013,0.02,15,0.013,6,"{0: 18, 1: 17, 2: 82, 3: 24, 4: 15, -1: 1423}",1423,,,
2,2010-2013,0.02,25,0.009,2,"{0: 66, -1: 1513}",1513,,,
3,2010-2013,0.02,30,0.005,2,"{0: 43, -1: 1536}",1536,,,0.0
4,2010-2013,0.01,10,0.008,4,"{0: 12, 1: 14, 2: 10, -1: 1543}",1543,,,
5,2010-2013,0.01,15,0.006,1,{-1: 1579},1579,,,
6,2010-2013,0.01,25,0.004,1,{-1: 1579},1579,,,
7,2010-2013,0.01,30,0.015,1,{-1: 1579},1579,,,0.0
8,2010-2013,0.015,10,0.002,7,"{0: 16, 1: 13, 2: 11, 3: 71, 4: 13, 5: 11, -1:...",1444,,,
9,2010-2013,0.015,15,0.0,2,"{0: 49, -1: 1530}",1530,,,



Running DBSCAN for Year Range: 2011-2014
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.004014 seconds
Runtime for dbscan_clustering: 0.012675 seconds
Runtime for dbscan_clustering: 0.015548 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.002008 seconds
Runtime for dbscan_clustering: 0.004547 seconds
Runtime for dbscan_clustering: 0.008049 seconds
Runtime for dbscan_clustering: 0.003495 seconds
Runtime for dbscan_clustering: 0.006998 seconds
Runtime for dbscan_clustering: 0.001513 seconds
Runtime for dbscan_clustering: 0.004013 seconds
Runtime for dbscan_clustering: 0.014463 seconds


  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range               2011-2014
eps                           0.02
min_samples                     30
Runtime (s)                  0.016
Clusters                         1
Points per Cluster       {-1: 866}
Noise Points                   866
Davies-Bouldin                None
Calinski-Harabasz             None
Weighted Mean ANOVA F          0.0
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2011-2014,0.02,10,0.0,4,"{0: 20, 1: 10, 2: 11, -1: 825}",825,,,
1,2011-2014,0.02,15,0.004,2,"{0: 15, -1: 851}",851,,,
2,2011-2014,0.02,25,0.013,1,{-1: 866},866,,,
3,2011-2014,0.02,30,0.016,1,{-1: 866},866,,,0.0
4,2011-2014,0.01,10,0.0,1,{-1: 866},866,,,
5,2011-2014,0.01,15,0.002,1,{-1: 866},866,,,
6,2011-2014,0.01,25,0.005,1,{-1: 866},866,,,
7,2011-2014,0.01,30,0.008,1,{-1: 866},866,,,0.0
8,2011-2014,0.015,10,0.003,2,"{0: 14, -1: 852}",852,,,
9,2011-2014,0.015,15,0.007,1,{-1: 866},866,,,



Running DBSCAN for Year Range: 2012-2015
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.002007 seconds
Runtime for dbscan_clustering: 0.005576 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.004015 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.002205 seconds
Runtime for dbscan_clustering: 0.004082 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.014472 seconds
Runtime for dbscan_clustering: 0.002006 seconds


  msb = ssbn / float(dfbn)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range               2012-2015
eps                           0.02
min_samples                     30
Runtime (s)                  0.006
Clusters                         1
Points per Cluster       {-1: 870}
Noise Points                   870
Davies-Bouldin                None
Calinski-Harabasz             None
Weighted Mean ANOVA F          0.0
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2012-2015,0.02,10,0.0,1,{-1: 870},870,,,
1,2012-2015,0.02,15,0.0,1,{-1: 870},870,,,
2,2012-2015,0.02,25,0.002,1,{-1: 870},870,,,
3,2012-2015,0.02,30,0.006,1,{-1: 870},870,,,0.0
4,2012-2015,0.01,10,0.0,1,{-1: 870},870,,,
5,2012-2015,0.01,15,0.0,1,{-1: 870},870,,,
6,2012-2015,0.01,25,0.004,1,{-1: 870},870,,,
7,2012-2015,0.01,30,0.0,1,{-1: 870},870,,,0.0
8,2012-2015,0.015,10,0.002,1,{-1: 870},870,,,
9,2012-2015,0.015,15,0.004,1,{-1: 870},870,,,



Running DBSCAN for Year Range: 2013-2016
Runtime for dbscan_clustering: 0.007024 seconds
Runtime for dbscan_clustering: 0.001664 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.002683 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.016072 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.006007 seconds
Runtime for dbscan_clustering: 0.000000 seconds
Runtime for dbscan_clustering: 0.016151 seconds
Runtime for dbscan_clustering: 0.014505 seconds
Runtime for dbscan_clustering: 0.012577 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                        2013-2016
eps                                    0.02
min_samples                              30
Runtime (s)                           0.003
Clusters                                  2
Points per Cluster       {0: 138, -1: 1189}
Noise Points                           1189
Davies-Bouldin                         None
Calinski-Harabasz                      None
Weighted Mean ANOVA F                   0.0
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2013-2016,0.02,10,0.007,4,"{0: 16, 1: 142, 2: 24, -1: 1145}",1145,,,
1,2013-2016,0.02,15,0.002,3,"{0: 142, 1: 23, -1: 1162}",1162,,,
2,2013-2016,0.02,25,0.0,2,"{0: 138, -1: 1189}",1189,,,
3,2013-2016,0.02,30,0.003,2,"{0: 138, -1: 1189}",1189,,,0.0
4,2013-2016,0.01,10,0.0,2,"{0: 134, -1: 1193}",1193,,,
5,2013-2016,0.01,15,0.0,2,"{0: 124, -1: 1203}",1203,,,
6,2013-2016,0.01,25,0.016,2,"{0: 123, -1: 1204}",1204,,,
7,2013-2016,0.01,30,0.0,2,"{0: 120, -1: 1207}",1207,,,0.0
8,2013-2016,0.015,10,0.006,3,"{0: 141, 1: 18, -1: 1168}",1168,,,
9,2013-2016,0.015,15,0.0,3,"{0: 138, 1: 15, -1: 1174}",1174,,,



Running DBSCAN for Year Range: 2014-2017
Runtime for dbscan_clustering: 0.007485 seconds
Runtime for dbscan_clustering: 0.007263 seconds
Runtime for dbscan_clustering: 0.007141 seconds
Runtime for dbscan_clustering: 0.040796 seconds
Runtime for dbscan_clustering: 0.016107 seconds
Runtime for dbscan_clustering: 0.007196 seconds
Runtime for dbscan_clustering: 0.014709 seconds
Runtime for dbscan_clustering: 0.007255 seconds
Runtime for dbscan_clustering: 0.007074 seconds
Runtime for dbscan_clustering: 0.007219 seconds
Runtime for dbscan_clustering: 0.007407 seconds
Runtime for dbscan_clustering: 0.005042 seconds
Runtime for dbscan_clustering: 0.008047 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                               2014-2017
eps                                          0.015
min_samples                                     30
Runtime (s)                                  0.005
Clusters                                         3
Points per Cluster       {0: 132, 1: 74, -1: 1691}
Noise Points                                  1691
Davies-Bouldin                                None
Calinski-Harabasz                             None
Weighted Mean ANOVA F                    10209.325
Name: 11, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2014-2017,0.02,10,0.007,7,"{0: 27, 1: 17, 2: 142, 3: 27, 4: 90, 5: 39, -1...",1555,,,
1,2014-2017,0.02,15,0.007,5,"{0: 142, 1: 87, 2: 33, 3: 15, -1: 1620}",1620,,,
2,2014-2017,0.02,25,0.007,3,"{0: 138, 1: 85, -1: 1674}",1674,,,
3,2014-2017,0.02,30,0.041,3,"{0: 138, 1: 84, -1: 1675}",1675,,,8356.631
4,2014-2017,0.01,10,0.016,3,"{0: 134, 1: 74, -1: 1689}",1689,,,
5,2014-2017,0.01,15,0.007,3,"{0: 124, 1: 68, -1: 1705}",1705,,,
6,2014-2017,0.01,25,0.015,3,"{0: 123, 1: 46, -1: 1728}",1728,,,
7,2014-2017,0.01,30,0.007,2,"{0: 120, -1: 1777}",1777,,,0.0
8,2014-2017,0.015,10,0.007,7,"{0: 10, 1: 141, 2: 81, 3: 22, 4: 10, 5: 10, -1...",1623,,,
9,2014-2017,0.015,15,0.007,4,"{0: 138, 1: 78, 2: 16, -1: 1665}",1665,,,



Running DBSCAN for Year Range: 2015-2018
Runtime for dbscan_clustering: 0.021267 seconds
Runtime for dbscan_clustering: 0.021621 seconds
Runtime for dbscan_clustering: 0.014219 seconds
Runtime for dbscan_clustering: 0.014983 seconds
Runtime for dbscan_clustering: 0.014986 seconds
Runtime for dbscan_clustering: 0.007186 seconds
Runtime for dbscan_clustering: 0.012098 seconds
Runtime for dbscan_clustering: 0.010675 seconds
Runtime for dbscan_clustering: 0.009030 seconds
Runtime for dbscan_clustering: 0.019846 seconds
Runtime for dbscan_clustering: 0.012996 seconds
Runtime for dbscan_clustering: 0.012466 seconds
Runtime for dbscan_clustering: 0.011127 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                                2015-2018
eps                                            0.01
min_samples                                      30
Runtime (s)                                   0.011
Clusters                                          3
Points per Cluster       {0: 120, 1: 218, -1: 2304}
Noise Points                                   2304
Davies-Bouldin                                 None
Calinski-Harabasz                              None
Weighted Mean ANOVA F                     14908.046
Name: 7, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2015-2018,0.02,10,0.021,19,"{0: 31, 1: 18, 2: 142, 3: 28, 4: 300, 5: 13, 6...",1828,,,
1,2015-2018,0.02,15,0.022,10,"{0: 142, 1: 298, 2: 20, 3: 18, 4: 21, 5: 35, 6...",2004,,,
2,2015-2018,0.02,25,0.014,4,"{0: 138, 1: 298, 2: 32, -1: 2174}",2174,,,
3,2015-2018,0.02,30,0.015,3,"{0: 138, 1: 290, -1: 2214}",2214,,,9940.194
4,2015-2018,0.01,10,0.015,8,"{0: 134, 1: 254, 2: 12, 3: 10, 4: 10, 5: 11, 6...",2201,,,
5,2015-2018,0.01,15,0.007,4,"{0: 124, 1: 222, 2: 27, -1: 2269}",2269,,,
6,2015-2018,0.01,25,0.012,3,"{0: 123, 1: 219, -1: 2300}",2300,,,
7,2015-2018,0.01,30,0.011,3,"{0: 120, 1: 218, -1: 2304}",2304,,,14908.046
8,2015-2018,0.015,10,0.009,13,"{0: 10, 1: 141, 2: 292, 3: 12, 4: 18, 5: 14, 6...",2043,,,
9,2015-2018,0.015,15,0.02,7,"{0: 138, 1: 292, 2: 17, 3: 15, 4: 26, 5: 16, -...",2138,,,



Running DBSCAN for Year Range: 2016-2019
Runtime for dbscan_clustering: 0.016749 seconds
Runtime for dbscan_clustering: 0.016866 seconds
Runtime for dbscan_clustering: 0.012133 seconds
Runtime for dbscan_clustering: 0.016917 seconds
Runtime for dbscan_clustering: 0.015161 seconds
Runtime for dbscan_clustering: 0.009985 seconds
Runtime for dbscan_clustering: 0.013413 seconds
Runtime for dbscan_clustering: 0.010648 seconds
Runtime for dbscan_clustering: 0.017155 seconds
Runtime for dbscan_clustering: 0.015271 seconds
Runtime for dbscan_clustering: 0.009763 seconds
Runtime for dbscan_clustering: 0.015792 seconds
Runtime for dbscan_clustering: 0.009765 seconds
Best DBSCAN parameters based on ANOVA F-value:
Year Range                                2016-2019
eps                                            0.01
min_samples                                      30
Runtime (s)                                   0.011
Clusters                                          3
Points per Cluster       {0


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2016-2019,0.02,10,0.017,21,"{0: 141, 1: 31, 2: 33, 3: 367, 4: 11, 5: 20, 6...",1805,,,
1,2016-2019,0.02,15,0.017,11,"{0: 138, 1: 365, 2: 22, 3: 25, 4: 17, 5: 19, 6...",1995,,,
2,2016-2019,0.02,25,0.012,4,"{0: 136, 1: 360, 2: 28, -1: 2186}",2186,,,
3,2016-2019,0.02,30,0.017,3,"{0: 135, 1: 358, -1: 2217}",2217,,,9588.456
4,2016-2019,0.01,10,0.015,9,"{0: 123, 1: 262, 2: 12, 3: 28, 4: 14, 5: 6, 6:...",2243,,,
5,2016-2019,0.01,15,0.01,4,"{0: 120, 1: 259, 2: 23, -1: 2308}",2308,,,
6,2016-2019,0.01,25,0.013,3,"{0: 116, 1: 256, -1: 2338}",2338,,,
7,2016-2019,0.01,30,0.011,3,"{0: 116, 1: 250, -1: 2344}",2344,,,14957.078
8,2016-2019,0.015,10,0.017,15,"{0: 136, 1: 25, 2: 353, 3: 11, 4: 10, 5: 18, 6...",2027,,,
9,2016-2019,0.015,15,0.015,6,"{0: 131, 1: 351, 2: 18, 3: 26, 4: 16, -1: 2168}",2168,,,



Running DBSCAN for Year Range: 2017-2020
Runtime for dbscan_clustering: 0.017228 seconds
Runtime for dbscan_clustering: 0.019235 seconds
Runtime for dbscan_clustering: 0.013721 seconds
Runtime for dbscan_clustering: 0.015491 seconds
Runtime for dbscan_clustering: 0.015700 seconds
Runtime for dbscan_clustering: 0.013741 seconds
Runtime for dbscan_clustering: 0.012404 seconds
Runtime for dbscan_clustering: 0.013206 seconds
Runtime for dbscan_clustering: 0.024570 seconds
Runtime for dbscan_clustering: 0.015145 seconds
Runtime for dbscan_clustering: 0.021693 seconds
Runtime for dbscan_clustering: 0.014595 seconds
Runtime for dbscan_clustering: 0.012863 seconds
Best DBSCAN parameters based on ANOVA F-value:
Year Range                               2017-2020
eps                                           0.01
min_samples                                     30
Runtime (s)                                  0.013
Clusters                                         3
Points per Cluster       {0: 421


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2017-2020,0.02,10,0.017,36,"{0: 586, 1: 33, 2: 10, 3: 38, 4: 17, 5: 16, 6:...",2066,,,
1,2017-2020,0.02,15,0.019,17,"{0: 586, 1: 31, 2: 34, 3: 22, 4: 20, 5: 16, 6:...",2367,,,
2,2017-2020,0.02,25,0.014,4,"{0: 586, 1: 26, 2: 25, -1: 2654}",2654,,,
3,2017-2020,0.02,30,0.015,2,"{0: 582, -1: 2709}",2709,,,0.0
4,2017-2020,0.01,10,0.016,16,"{0: 503, 1: 14, 2: 10, 3: 10, 4: 19, 5: 10, 6:...",2618,,,
5,2017-2020,0.01,15,0.014,2,"{0: 487, -1: 2804}",2804,,,
6,2017-2020,0.01,25,0.012,3,"{0: 422, 1: 43, -1: 2826}",2826,,,
7,2017-2020,0.01,30,0.013,3,"{0: 421, 1: 35, -1: 2835}",2835,,,22.152
8,2017-2020,0.015,10,0.025,25,"{0: 574, 1: 25, 2: 10, 3: 28, 4: 14, 5: 22, 6:...",2363,,,
9,2017-2020,0.015,15,0.015,10,"{0: 569, 1: 23, 2: 22, 3: 16, 4: 15, 5: 20, 6:...",2571,,,



Running DBSCAN for Year Range: 2018-2021
Runtime for dbscan_clustering: 0.030693 seconds
Runtime for dbscan_clustering: 0.015842 seconds
Runtime for dbscan_clustering: 0.031851 seconds
Runtime for dbscan_clustering: 0.016142 seconds
Runtime for dbscan_clustering: 0.024822 seconds
Runtime for dbscan_clustering: 0.029680 seconds
Runtime for dbscan_clustering: 0.022037 seconds
Runtime for dbscan_clustering: 0.016223 seconds
Runtime for dbscan_clustering: 0.021755 seconds
Runtime for dbscan_clustering: 0.027263 seconds
Runtime for dbscan_clustering: 0.020622 seconds
Runtime for dbscan_clustering: 0.015804 seconds
Runtime for dbscan_clustering: 0.015741 seconds
Best DBSCAN parameters based on ANOVA F-value:
Year Range                               2018-2021
eps                                           0.02
min_samples                                     30
Runtime (s)                                  0.016
Clusters                                         3
Points per Cluster       {0: 544


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2018-2021,0.02,10,0.031,39,"{0: 37, 1: 544, 2: 22, 3: 24, 4: 16, 5: 56, 6:...",2076,,,
1,2018-2021,0.02,15,0.016,21,"{0: 544, 1: 22, 2: 16, 3: 41, 4: 18, 5: 33, 6:...",2390,,,
2,2018-2021,0.02,25,0.032,6,"{0: 544, 1: 41, 2: 29, 3: 29, 4: 26, -1: 2773}",2773,,,
3,2018-2021,0.02,30,0.016,3,"{0: 544, 1: 36, -1: 2862}",2862,,,63574.18
4,2018-2021,0.01,10,0.025,17,"{0: 504, 1: 12, 2: 23, 3: 14, 4: 13, 5: 15, 6:...",2729,,,
5,2018-2021,0.01,15,0.03,6,"{0: 500, 1: 17, 2: 15, 3: 15, 4: 16, -1: 2879}",2879,,,
6,2018-2021,0.01,25,0.022,3,"{0: 66, 1: 384, -1: 2992}",2992,,,
7,2018-2021,0.01,30,0.016,3,"{0: 64, 1: 382, -1: 2996}",2996,,,31.652
8,2018-2021,0.015,10,0.022,34,"{0: 22, 1: 11, 2: 16, 3: 537, 4: 26, 5: 11, 6:...",2383,,,
9,2018-2021,0.015,15,0.027,14,"{0: 21, 1: 16, 2: 537, 3: 31, 4: 23, 5: 20, 6:...",2675,,,



Running DBSCAN for Year Range: 2019-2022
Runtime for dbscan_clustering: 0.004002 seconds
Runtime for dbscan_clustering: 0.019628 seconds
Runtime for dbscan_clustering: 0.016143 seconds
Runtime for dbscan_clustering: 0.017659 seconds
Runtime for dbscan_clustering: 0.037420 seconds
Runtime for dbscan_clustering: 0.013049 seconds
Runtime for dbscan_clustering: 0.042474 seconds
Runtime for dbscan_clustering: 0.005148 seconds
Runtime for dbscan_clustering: 0.017618 seconds
Runtime for dbscan_clustering: 0.015900 seconds
Runtime for dbscan_clustering: 0.016052 seconds
Runtime for dbscan_clustering: 0.020691 seconds
Runtime for dbscan_clustering: 0.016097 seconds



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Best DBSCAN parameters based on ANOVA F-value:
Year Range                               2019-2022
eps                                           0.02
min_samples                                     30
Runtime (s)                                  0.018
Clusters                                         3
Points per Cluster       {0: 421, 1: 43, -1: 3145}
Noise Points                                  3145
Davies-Bouldin                                None
Calinski-Harabasz                             None
Weighted Mean ANOVA F                     68648.87
Name: 3, dtype: object


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2019-2022,0.02,10,0.004,58,"{0: 10, 1: 10, 2: 11, 3: 18, 4: 18, 5: 421, 6:...",2059,,,
1,2019-2022,0.02,15,0.02,25,"{0: 18, 1: 421, 2: 26, 3: 46, 4: 45, 5: 23, 6:...",2560,,,
2,2019-2022,0.02,25,0.016,5,"{0: 421, 1: 30, 2: 43, 3: 25, -1: 3090}",3090,,,
3,2019-2022,0.02,30,0.018,3,"{0: 421, 1: 43, -1: 3145}",3145,,,68648.87
4,2019-2022,0.01,10,0.037,19,"{0: 379, 1: 10, 2: 12, 3: 10, 4: 11, 5: 17, 6:...",3018,,,
5,2019-2022,0.01,15,0.013,6,"{0: 362, 1: 17, 2: 14, 3: 15, 4: 23, -1: 3178}",3178,,,
6,2019-2022,0.01,25,0.042,3,"{0: 302, 1: 42, -1: 3265}",3265,,,
7,2019-2022,0.01,30,0.005,2,"{0: 295, -1: 3314}",3314,,,0.0
8,2019-2022,0.015,10,0.018,40,"{0: 18, 1: 414, 2: 14, 3: 12, 4: 11, 5: 31, 6:...",2580,,,
9,2019-2022,0.015,15,0.016,13,"{0: 414, 1: 21, 2: 28, 3: 16, 4: 15, 5: 16, 6:...",2973,,,



Running DBSCAN for Year Range: 2020-2023
Runtime for dbscan_clustering: 0.027096 seconds
Runtime for dbscan_clustering: 0.033009 seconds
Runtime for dbscan_clustering: 0.027955 seconds
Runtime for dbscan_clustering: 0.026338 seconds
Runtime for dbscan_clustering: 0.016145 seconds
Runtime for dbscan_clustering: 0.019660 seconds
Runtime for dbscan_clustering: 0.016206 seconds
Runtime for dbscan_clustering: 0.016148 seconds
Runtime for dbscan_clustering: 0.036607 seconds
Runtime for dbscan_clustering: 0.011451 seconds
Runtime for dbscan_clustering: 0.024747 seconds
Runtime for dbscan_clustering: 0.016039 seconds
Runtime for dbscan_clustering: 0.022055 seconds
Best DBSCAN parameters based on ANOVA F-value:
Year Range                               2020-2023
eps                                          0.015
min_samples                                     30
Runtime (s)                                  0.016
Clusters                                         3
Points per Cluster       {0: 467


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=f_vals, y=features, palette="crest", legend = False)


Unnamed: 0,Year Range,eps,min_samples,Runtime (s),Clusters,Points per Cluster,Noise Points,Davies-Bouldin,Calinski-Harabasz,Weighted Mean ANOVA F
0,2020-2023,0.02,10,0.027,67,"{0: 10, 1: 10, 2: 494, 3: 25, 4: 29, 5: 25, 6:...",2332,,,
1,2020-2023,0.02,15,0.033,35,"{0: 494, 1: 25, 2: 20, 3: 18, 4: 15, 5: 43, 6:...",2826,,,
2,2020-2023,0.02,25,0.028,8,"{0: 494, 1: 32, 2: 61, 3: 42, 4: 25, 5: 135, 6...",3546,,,
3,2020-2023,0.02,30,0.026,7,"{0: 492, 1: 51, 2: 38, 3: 92, 4: 26, 5: 30, -1...",3643,,,35544.301
4,2020-2023,0.01,10,0.016,29,"{0: 10, 1: 451, 2: 14, 3: 11, 4: 10, 5: 13, 6:...",3561,,,
5,2020-2023,0.01,15,0.02,8,"{0: 433, 1: 18, 2: 22, 3: 21, 4: 20, 5: 15, 6:...",3828,,,
6,2020-2023,0.01,25,0.016,2,"{0: 424, -1: 3948}",3948,,,
7,2020-2023,0.01,30,0.016,3,"{0: 381, 1: 32, -1: 3959}",3959,,,17.979
8,2020-2023,0.015,10,0.037,52,"{0: 10, 1: 487, 2: 25, 3: 14, 4: 10, 5: 15, 6:...",2942,,,
9,2020-2023,0.015,15,0.011,21,"{0: 482, 1: 15, 2: 21, 3: 19, 4: 43, 5: 16, 6:...",3456,,,
