In [1]:
import cluster_data
from cluster_data import run_clustering, normalize_data, unnormalize
import numpy as np
import pandas as pd
import os
import cluster_plotter
from DBSCAN import dbscan_clustering
from cluster_plotter import ClusterPlotter
from clustering_utils import ClusterData
import matplotlib.pyplot as plt

standard_year_ranges = {
    f"{start}-{start + 3}": np.arange(start, start + 4)
    for start in [2002, 2006, 2010, 2014]
}
standard_year_ranges["2019-2023"] = np.arange(2019, 2024)
running_ranges = cluster_data.generate_running_year_ranges(2002, 2023, 4)

binned_data = cluster_data.bin_data_for_clustering(running_ranges, print_res=False)

plot_dir = "Images/dbscan_tails"
os.makedirs(plot_dir, exist_ok=True)
cluster_plotter.clear_directory(plot_dir)

results_per_year_range = {}
cluster_centers_dict = {}  # Stores cluster centers per year range

for cluster_data, year_range in binned_data:  
    print(f"\nRunning DBSCAN for Year Range: {year_range}")

    data_array = np.array([cluster_data.inc, cluster_data.raan]).T
    normalized_data, data_min, data_max = normalize_data(data_array)

    eps_values = [0.012, 0.015, 0.023]
    min_samples_values = [10, 15, 17]
    
    best_result = None
    best_score = float('-inf')
    
    for eps in eps_values:
        for min_samples in min_samples_values:
            result_dbscan, time_dbscan, n_clusters_dbscan, points_per_cluster_dbscan, metrics_dbscan = run_clustering(
                dbscan_clustering, "DBSCAN", normalized_data, data_min, data_max, plot=False, eps=eps, min_samples=min_samples
            )
            
            silhouette_score = metrics_dbscan[3] if metrics_dbscan and isinstance(metrics_dbscan[3], (int, float)) else None
            noise_points = np.sum(result_dbscan.labels == -1)

            if silhouette_score is not None and silhouette_score > best_score:
                best_score = silhouette_score
                best_result = {
                    "eps": eps,
                    "min_samples": min_samples,
                    "runtime": time_dbscan,
                    "clusters": n_clusters_dbscan,
                    "points_per_cluster": points_per_cluster_dbscan,
                    "noise_points": noise_points,
                    "metrics": metrics_dbscan,
                    "result_dbscan": result_dbscan
                }

    if best_result:
        unnormalized_data, _ = unnormalize(best_result["result_dbscan"].data, None, data_min, data_max)
        plotter = ClusterPlotter(unnormalized_data, best_result["result_dbscan"].labels, None)
        plot_filename = os.path.join(plot_dir, f"dbscan_{year_range}_best.png")
        title = f"DBSCAN Best: {year_range}, eps = {best_result['eps']}, min_samples = {best_result['min_samples']}"
        plotter.clusters_2d_plot(title, plot_filename, show_centers=False)
        
        results_per_year_range[year_range] = best_result
        print(f"Best for {year_range}: eps = {best_result['eps']}, min_samples = {best_result['min_samples']}, silhouette = {best_score:.3f}")

        # Store cluster centers
        if hasattr(best_result["result_dbscan"], "cluster_centers"):
            cluster_centers_dict[year_range] = best_result["result_dbscan"].cluster_centers

# Function to plot cluster centers
def plot_cluster_centers(cluster_centers_dict):
    plt.figure(figsize=(10, 7))
    colors = plt.cm.viridis(np.linspace(0, 1, len(cluster_centers_dict)))
    
    for i, (year_range, centers) in enumerate(cluster_centers_dict.items()):
        if centers is not None:
            plt.scatter(centers[:, 1], centers[:, 0], color=colors[i], label=f"{year_range}", marker='X', s=100)
            for idx, (x, y) in enumerate(centers):
                plt.text(x, y, f"{year_range}_{idx}", fontsize=9, ha='right')
    
    plt.xlabel("RAAN [°]")
    plt.ylabel("Inclination [°]")
    plt.title("Cluster Centers Over Time")
    plt.legend()
    plt.grid(True)
    plt.show()

plot_cluster_centers(cluster_centers_dict)

# Manual selection of clusters
selected_clusters = {}  

def select_cluster(year_range, cluster_idx):
    if year_range in cluster_centers_dict and cluster_idx < len(cluster_centers_dict[year_range]):
        selected_clusters[year_range] = cluster_centers_dict[year_range][cluster_idx]
        print(f"Selected cluster {cluster_idx} from {year_range}: {selected_clusters[year_range]}")
    else:
        print("Invalid selection. Check year range and cluster index.")

# Example: Selecting clusters manually (update based on the plot labels)
select_cluster("2002-2005", 1)
select_cluster("2010-2013", 0)

# Function to plot evolution of selected clusters
def plot_selected_clusters(selected_clusters):
    plt.figure(figsize=(10, 7))
    years = []
    inclinations = []
    raans = []

    for year_range, center in sorted(selected_clusters.items()):
        years.append(year_range)
        raans.append(center[1])
        inclinations.append(center[0])
    
    plt.plot(raans, inclinations, marker='o', linestyle='-', color='red', label="Selected Cluster Evolution")
    for i, year in enumerate(years):
        plt.text(raans[i], inclinations[i], year, fontsize=9, ha='right')
    
    plt.xlabel("RAAN [°]")
    plt.ylabel("Inclination [°]")
    plt.title("Evolution of Selected Cluster Over Time")
    plt.legend()
    plt.grid(True)
    plt.show()

plot_selected_clusters(selected_clusters)


Running DBSCAN for Year Range: 2002-2005

DBSCAN result:


Running DBSCAN:  33%|███▎      | 1/3 [00:00<00:00,  2.71step/s]

Runtime for dbscan_clustering: 0.368435 seconds


  stds[label] = tuple(np.sqrt(eigenvalues))
Running DBSCAN: 100%|██████████| 3/3 [00:08<00:00,  2.76s/step]



DBSCAN result:


Running DBSCAN:  33%|███▎      | 1/3 [00:00<00:00,  2.87step/s]

Runtime for dbscan_clustering: 0.346906 seconds


Running DBSCAN: 100%|██████████| 3/3 [00:07<00:00,  2.64s/step]



DBSCAN result:


Running DBSCAN:  33%|███▎      | 1/3 [00:00<00:00,  3.12step/s]

Runtime for dbscan_clustering: 0.320231 seconds


Running DBSCAN: 100%|██████████| 3/3 [00:07<00:00,  2.60s/step]



DBSCAN result:


Running DBSCAN:  33%|███▎      | 1/3 [00:00<00:00,  2.52step/s]

Runtime for dbscan_clustering: 0.397425 seconds


Running DBSCAN: 100%|██████████| 3/3 [00:07<00:00,  2.55s/step]



DBSCAN result:


Running DBSCAN:  33%|███▎      | 1/3 [00:00<00:00,  2.37step/s]

Runtime for dbscan_clustering: 0.420795 seconds


Running DBSCAN: 100%|██████████| 3/3 [00:07<00:00,  2.52s/step]



DBSCAN result:


Running DBSCAN:  33%|███▎      | 1/3 [00:00<00:00,  2.34step/s]

Runtime for dbscan_clustering: 0.426632 seconds


Running DBSCAN: 100%|██████████| 3/3 [00:16<00:00,  5.49s/step]



DBSCAN result:


Running DBSCAN:  33%|███▎      | 1/3 [00:00<00:01,  1.47step/s]

Runtime for dbscan_clustering: 0.679510 seconds


Running DBSCAN: 100%|██████████| 3/3 [00:14<00:00,  4.98s/step]



DBSCAN result:


Running DBSCAN:  33%|███▎      | 1/3 [00:01<00:02,  1.43s/step]

Runtime for dbscan_clustering: 1.427070 seconds


Running DBSCAN: 100%|██████████| 3/3 [00:18<00:00,  6.20s/step]



DBSCAN result:


Running DBSCAN:  33%|███▎      | 1/3 [00:00<00:01,  1.62step/s]

Runtime for dbscan_clustering: 0.617562 seconds


Running DBSCAN: 100%|██████████| 3/3 [00:08<00:00,  2.88s/step]


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (21519,) + inhomogeneous part.

<Figure size 1000x700 with 0 Axes>