## Different binning widths k-means

In [2]:
import cluster_data
from cluster_data import run_clustering, normalize_data, unnormalize, generate_running_year_ranges, bin_data_for_clustering
import numpy as np
import pandas as pd
import os
from kmeans import k_means
from cluster_plotter import ClusterPlotter
from clustering_utils import ClusterData
import cluster_plotter
import scores

array_of_metrics = []
array_of_yearranges = []
array_of_binwidths = []

bins = [3, 4, 5, 6]
k_values = [5, 6, 7]  # Define once

# Output directory for plots
plot_dir = "Images/binning_width_kmeans"
os.makedirs(plot_dir, exist_ok=True)
cluster_plotter.clear_directory(plot_dir)

for b in bins:
    running_ranges = generate_running_year_ranges(2002, 2023, b)
    binned_data = bin_data_for_clustering(running_ranges, print_res=False)

    for cluster_data, year_range in binned_data:
        print(f"\nRunning K-Means for Year Range: {year_range}")

        data_array = np.array([cluster_data.inc, cluster_data.raan]).T
        normalized_data, data_min, data_max = normalize_data(data_array)

        for k in k_values:
            result_kmeans, time_kmeans, n_clusters_kmeans, points_per_cluster_kmeans, metrics_kmeans = run_clustering(
                k_means, f"K-means (k={k})", normalized_data, data_min, data_max, k, plot=False, init='kmeans++'
            )

            unnormalized_data, cluster_centers = unnormalize(
                result_kmeans.data, result_kmeans.cluster_centers, data_min, data_max
            )
            #plotter = ClusterPlotter(unnormalized_data, result_kmeans.labels, cluster_centers)
            #plot_filename = os.path.join(plot_dir, f"kmeans_{year_range}_k{k}.png")
            #title = f"k-Means: years = {year_range}, k = {k}"
            #plotter.clusters_2d_plot(title, plot_filename)

            # Ensure metrics_kmeans has enough elements before accessing indices
            if len(metrics_kmeans) >= 7:
                rounded_cluster_std = {key: tuple(round(val, 3) for val in value) for key, value in metrics_kmeans[4].items()}
                rounded_square_density = {key: round(value, 3) for key, value in metrics_kmeans[5].items()}
                rounded_hull_density = {key: round(value, 3) for key, value in metrics_kmeans[6].items()}
            else:
                rounded_cluster_std = rounded_square_density = rounded_hull_density = None

            array_of_metrics.append(metrics_kmeans[:4])  
            array_of_yearranges.append(year_range) 
            array_of_binwidths.append(b)

dir = plot_dir
scores.plot_scores_for_different_binnings(array_of_metrics, array_of_yearranges, array_of_binwidths, dir)



Running K-Means for Year Range: 2002-2004
Runtime for k_means: 0.084535 seconds
Runtime for k_means: 0.089525 seconds
Runtime for k_means: 0.045043 seconds

Running K-Means for Year Range: 2003-2005
Runtime for k_means: 0.138019 seconds
Runtime for k_means: 0.062390 seconds
Runtime for k_means: 0.094322 seconds

Running K-Means for Year Range: 2004-2006
Runtime for k_means: 0.146312 seconds
Runtime for k_means: 0.068518 seconds
Runtime for k_means: 0.142825 seconds

Running K-Means for Year Range: 2005-2007
Runtime for k_means: 0.046901 seconds
Runtime for k_means: 0.082254 seconds
Runtime for k_means: 0.124498 seconds

Running K-Means for Year Range: 2006-2008
Runtime for k_means: 0.102597 seconds
Runtime for k_means: 0.119095 seconds
Runtime for k_means: 0.158415 seconds

Running K-Means for Year Range: 2007-2009
Runtime for k_means: 0.057754 seconds
Runtime for k_means: 0.099642 seconds
Runtime for k_means: 0.074912 seconds

Running K-Means for Year Range: 2008-2010
Runtime for k_m

## Different binning widths DBSCAN

In [None]:
import cluster_data
from cluster_data import run_clustering, normalize_data, unnormalize, generate_running_year_ranges, bin_data_for_clustering
import numpy as np
import pandas as pd
import os
from DBSCAN import dbscan_clustering
from cluster_plotter import ClusterPlotter
from clustering_utils import ClusterData
import cluster_plotter
import scores

array_of_metrics = []
array_of_yearranges = []

bins = [3, 4, 5, 6]
eps_vals =  [0.02, 0.01, 0.015]
min_samples_vals = [10, 20, 30]

# Output directory for plots
plot_dir = "Images/binning_width_dbscan"
os.makedirs(plot_dir, exist_ok=True)
cluster_plotter.clear_directory(plot_dir)

for b in bins:
    running_ranges = generate_running_year_ranges(2002, 2023, b)
    binned_data = bin_data_for_clustering(running_ranges, print_res=False)

    for cluster_data, year_range in binned_data:
        print(f"\nRunning DBSCAN for Year Range: {year_range}")

        data_array = np.array([cluster_data.inc, cluster_data.raan]).T
        normalized_data, data_min, data_max = normalize_data(data_array)

        for eps in eps_vals: 
            for min in min_samples_vals:
                result_dbscan, time_dbscan, n_clusters_dbscan, points_per_cluster_dbscan, metrics_dbscan = run_clustering(
                    dbscan_clustering, f"DBSCAN, eps: {eps}, min_samples {min}", normalized_data, data_min, data_max, eps, min, plot=False,
                )

                unnormalized_data, cluster_centers = unnormalize(
                    result_dbscan.data, result_dbscan.cluster_centers, data_min, data_max
                )

                # Ensure metrics_kmeans has enough elements before accessing indices
                if len(metrics_dbscan) >= 7:
                    rounded_cluster_std = {key: tuple(round(val, 3) for val in value) for key, value in metrics_dbscan[4].items()}
                    rounded_square_density = {key: round(value, 3) for key, value in metrics_dbscan[5].items()}
                    rounded_hull_density = {key: round(value, 3) for key, value in metrics_dbscan[6].items()}
                else:
                    rounded_cluster_std = rounded_square_density = rounded_hull_density = None

                array_of_metrics.append(metrics_dbscan[:4])  
                array_of_yearranges.append(year_range) 

dir = plot_dir
scores.plot_scores_for_different_binnings(array_of_metrics, array_of_yearranges, dir)


Running DBSCAN for Year Range: 2002-2004
Runtime for dbscan_clustering: 3.655288 seconds
Runtime for dbscan_clustering: 3.302338 seconds
Runtime for dbscan_clustering: 4.959954 seconds
Runtime for dbscan_clustering: 2.927646 seconds
Runtime for dbscan_clustering: 2.745319 seconds
Runtime for dbscan_clustering: 2.778965 seconds
Runtime for dbscan_clustering: 2.862009 seconds
Runtime for dbscan_clustering: 2.908840 seconds
Runtime for dbscan_clustering: 2.881798 seconds

Running DBSCAN for Year Range: 2003-2005
Runtime for dbscan_clustering: 3.747026 seconds
Runtime for dbscan_clustering: 3.799513 seconds
Runtime for dbscan_clustering: 3.381438 seconds
Runtime for dbscan_clustering: 4.698300 seconds
Runtime for dbscan_clustering: 5.333785 seconds
Runtime for dbscan_clustering: 5.492862 seconds
Runtime for dbscan_clustering: 4.565468 seconds
Runtime for dbscan_clustering: 4.829984 seconds
Runtime for dbscan_clustering: 4.596021 seconds

Running DBSCAN for Year Range: 2004-2006
Runtime fo