Test with stat_Master file

In [None]:
import cluster_finder
import numpy as np
from cluster_plotter import ClusterPlotter

testfile = r"input\stat_Master_02_geo_s1.crs"
data = cluster_finder.prepare_data_for_clustering(testfile)  # data is a named tuple (inc, raan, ecc)

bandwidths = np.arange(0.2, 0.5, 0.04)
best_bandwidth, scores = cluster_finder.find_best_bandwidth(data, bandwidths)
cluster_finder.plot_bandwidth_against_score(bandwidths, scores)

cluster_data = cluster_finder.find_clusters_mean_shift_clustering(data, best_bandwidth)
data = cluster_data.data 
labels = cluster_data.labels
cluster_centers = cluster_data.cluster_centers

print("Cluster Centers:", cluster_centers)

# Assuming ClusterPlotter is defined somewhere else
plotter = ClusterPlotter(data, labels, cluster_centers)
plotter.clusters_3d_plot("3D Cluster Visualization")
plotter.clusters_2d_plot("2D Cluster Visualization")

Main results

In [None]:
import cluster_finder
import numpy as np
from scipy.spatial.distance import cdist
from scipy.optimize import linear_sum_assignment
from cluster_plotter import ClusterPlotter  # Assuming your plotting module is saved as cluster_plotter.py

seed = 1
years = ["2002", "2003", "2004", "2005"]
orbit_types = ["geo", "gto", "fol"]

cluster_evolution = {}
updated_10cm_cluster_evolution = {}

cluster_centers_5mm_dict = {}
cluster_centers_10cm_dict = {}
matches_dict = {}  # Store matched clusters

bandwidths = [0.2, 0.25, 0.3, 0.35, 0.4]
for orbit_type in orbit_types: 
    for year in years: 
        print(year, orbit_type)
        file_5mm, file_10cm = cluster_finder.get_files_for_cluster_evolution(year, orbit_type, seed, "input")
        clusters_5mm, clusters_10cm, clusters_5mm_array, clusters_10cm_array = cluster_finder.find_clusters_for_one_year(file_5mm, file_10cm, bandwidths)
                
        # Add clusters to the evolution dictionary
        cluster_evolution[(year, orbit_type, seed)] = {
            "clusters_5mm": clusters_5mm,
            "clusters_10cm": clusters_10cm
        }
        
        # Compare clusters and store updated data
        updated_10cm_cluster_data = cluster_finder.cluster_comparison(clusters_5mm_array, clusters_10cm_array)
        updated_10cm_cluster_evolution[(year, orbit_type, seed)] = updated_10cm_cluster_data

        # Get cluster centers
        clusters_center_5mm = clusters_5mm.cluster_centers
        clusters_center_10cm = clusters_10cm.cluster_centers

        # Match clusters using Hungarian algorithm
        matches, unmatched_10cm, unmatched_5mm = cluster_finder.match_clusters(clusters_center_10cm, clusters_center_5mm)
        matches_dict[(year, orbit_type, seed)] = matches

        # Store cluster centers for plotting
        cluster_centers_5mm_dict[(year, orbit_type, seed)] = clusters_center_5mm
        cluster_centers_10cm_dict[(year, orbit_type, seed)] = clusters_center_10cm

        # Visualize matched clusters
        plotter_5mm = ClusterPlotter(clusters_5mm.data, clusters_5mm.labels, clusters_5mm.cluster_centers)
        plotter_5mm.combined_clusters_2d_plot(
            clusters_10cm.data, clusters_10cm.labels, clusters_center_10cm,
            title=f"Matched Clusters in {year} ({orbit_type})"
        )

cluster_finder.plot_cluster_center_evolution(cluster_centers_5mm_dict, cluster_centers_10cm_dict)
cluster_finder.plot_cluster_center_evolution_2d(cluster_centers_5mm_dict, cluster_centers_10cm_dict)

2002 geo
Bandwidth: 0.2, Silhouette Score: 0.3855328177458831
Bandwidth: 0.25, Silhouette Score: 0.5130814660965503
Bandwidth: 0.3, Silhouette Score: 0.5417155015594003
Bandwidth: 0.35, Silhouette Score: 0.5318661687895904
Bandwidth: 0.4, Silhouette Score: 0.2716892245928157
Best bandwidth: 0.3 with Silhouette Score: 0.5417155015594003


In [None]:
import cluster_finder
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from cluster_plotter import ClusterPlotter

data_5mm = r"input\small_Master_02_geo_s1.crs" #crossing data for objects larger than 5mm
data_5mm = cluster_finder.prepare_data_for_clustering(data_5mm)

bandwidths =  [0.3] #np.arange(0.2, 0.5, 0.05)#bandwidths to find
best_bandwidth_5mm, scores_5mm = cluster_finder.find_best_bandwidth(data_5mm, bandwidths)
clusters_5mm = cluster_finder.find_clusters_mean_shift_clustering(data_5mm, best_bandwidth_5mm)

cluster_data_5mm = clusters_5mm.data
labels_5mm = clusters_5mm.labels
cluster_centers_5mm = clusters_5mm.cluster_centers

data_10cm = r"input\stat_Master_02_geo_s1.crs" #crossing data for objects larger than 10cm
data_10cm = cluster_finder.prepare_data_for_clustering(data_10cm)

best_bandwidth_10cm, scores_10cm = cluster_finder.find_best_bandwidth(data_10cm, bandwidths)
clusters_10cm = cluster_finder.find_clusters_mean_shift_clustering(data_10cm, best_bandwidth_10cm)

cluster_data_10cm = clusters_10cm.data
labels_10cm = clusters_10cm.labels
cluster_centers_10cm = clusters_10cm.cluster_centers

#store clusters for both datasets in numpy array
clusters_5mm = cluster_finder.store_clusters(cluster_data_5mm, labels_5mm)
clusters_10cm = cluster_finder.store_clusters(cluster_data_10cm, labels_10cm)
#print the number of clusters for both datasets
print(f"5mm dataset clusters: {len(clusters_5mm)}")
print(f"10cm dataset clusters: {len(clusters_10cm)}")

#convert clusters to array
clusters_5mm_array = cluster_finder.convert_clusters_to_array(clusters_5mm)
clusters_10cm_array = cluster_finder.convert_clusters_to_array(clusters_10cm)

updated_10cm_cluster_data = []
total_10cm_elements = sum([len(elements) for elements in clusters_10cm.values()])
total_5mm_elements = sum([len(elements) for elements in clusters_5mm.values()])
print(f"Total 10cm Cluster Elements: {total_10cm_elements}")
print(f"Total 5mm Cluster Elements: {total_5mm_elements}")

count = 0 #count number of matches

data_10cm = clusters_10cm_array[:, 1:]  #only the inc, raan, ecc, cluster label is removed
data_5mm = clusters_5mm_array[:, 1:]

print(len(data_10cm[0]))
print(len(data_5mm[0]))
for row_index, row_10cm in enumerate(data_10cm):  # Iterate over rows in the 10cm dataset
    matched = False
    matches = np.all(np.isclose(data_5mm, row_10cm, atol=1e-20), axis=1) #tolerance to 10e-20 is the way to go
    
    if np.any(matches):
        matched_indices = np.where(matches)[0]
        for idx in matched_indices:
            matched_row = clusters_5mm_array[idx]
            updated_10cm_cluster_data.append(
                [clusters_10cm_array[row_index, 0]] + list(row_10cm) + [matched_row[0]]
            )
            matched = True
            break  # Stop searching once the first match is found

    if not matched:
        count += 1

updated_10cm_cluster_data = np.array(updated_10cm_cluster_data)

print("Updated 10cm Cluster Data with 5mm Cluster Labels:")
print(updated_10cm_cluster_data)
print(f"Number of unmatched 10cm elements: {count}")#should be zero

first_column = updated_10cm_cluster_data[:, 0]  # First column: 10cm cluster labels
last_column = updated_10cm_cluster_data[:, -1]  # Last column: 5mm cluster labels
mismatches = first_column != last_column
num_mismatches = np.sum(mismatches)
print(f"Number of rows where the first and last columns do not match: {num_mismatches}")
print(f"Total number of rows: {updated_10cm_cluster_data.shape[0]}")
print(f"Mismatch percentage: {num_mismatches / updated_10cm_cluster_data.shape[0] * 100:.2f}%")

"""# Plotting for the 5mm dataset
plotter_5mm = ClusterPlotter(cluster_data_5mm, labels_5mm, cluster_centers_5mm)
plotter_5mm.clusters_3d_plot("3D Cluster Visualization for 5mm Data")
plotter_5mm.clusters_2d_plot("2D Cluster Visualization for 5mm Data")

# Plotting for the 10cm dataset
plotter_10cm = ClusterPlotter(cluster_data_10cm, labels_10cm, cluster_centers_10cm)
plotter_10cm.clusters_3d_plot("3D Cluster Visualization for 10cm Data")
plotter_10cm.clusters_2d_plot("2D Cluster Visualization for 10cm Data")"""

In [None]:
plotter_5mm = ClusterPlotter(data_5mm, labels_5mm, cluster_centers_5mm)

# Call combined 2D plot
plotter_5mm.combined_clusters_2d_plot(data_10cm, labels_10cm, cluster_centers_10cm,
                                      title="Combined 2D Clusters for 5mm and 10cm")

# Call combined 3D plot
plotter_5mm.combined_clusters_3d_plot(data_10cm, labels_10cm, cluster_centers_10cm,
                                      title="Combined 3D Clusters for 5mm and 10cm")


## Proof of Concept, it seems to work

In [None]:
import numpy as np
from sklearn.cluster import MeanShift
from sklearn.preprocessing import MinMaxScaler
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt

#https://www.geeksforgeeks.org/ml-mean-shift-clustering/

#generate fake data with three main clusters
cluster_centers = [
    [5, 90, 0.2],    
    [15, 180, 0.7],  
    [20, 300, 0.4]   
]
cluster_std = [3, 0.7, 1]  # standard deviation for each feature

data, _ = make_blobs(n_samples=1000, centers=cluster_centers, cluster_std=cluster_std, random_state=42)

scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(data)

# Apply Mean Shift clustering
mean_shift = MeanShift(bandwidth=0.1) 
mean_shift.fit(normalized_data)
labels = mean_shift.labels_
cluster_centers = mean_shift.cluster_centers_
original_scale_centers = scaler.inverse_transform(cluster_centers)

print("Cluster labels:", labels)
print("Cluster centers (original scale):", original_scale_centers)

# 2D Plot
plt.figure(figsize=(10, 7))
scatter = plt.scatter(data[:, 1], data[:, 0], c=data[:, 2], cmap='viridis', s=50, alpha=0.8)
plt.scatter(original_scale_centers[:, 1], original_scale_centers[:, 0], 
            c='red', marker='X', s=200, label='Cluster Centers')
plt.xlabel('RAAN (omega)')
plt.ylabel('Inclination (i)')
plt.title('2D Scatter Plot of Space Debris Clusters')
plt.colorbar(scatter, label='Eccentricity (e)')
plt.legend()
plt.show()

# 3D Plot
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')
scatter = ax.scatter(data[:, 1], data[:, 0], data[:, 2], c=labels, cmap='viridis', s=50, alpha=0.8)
ax.scatter(original_scale_centers[:, 1], original_scale_centers[:, 0], original_scale_centers[:, 2], 
           c='red', marker='X', s=200, label='Cluster Centers')
ax.set_xlabel('RAAN (omega)')
ax.set_ylabel('Inclination (i)')
ax.set_zlabel('Eccentricity (e)')
ax.set_title('Fake Data')
fig.colorbar(scatter, ax=ax, label='Cluster Labels')
ax.legend()
plt.show()