In [1]:
import numpy as np
import os
import sys
import pandas as pd
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load helper
path_to_parent = os.path.abspath("..")
sys.path.append(path_to_parent)
import getdata

# Settings
orbits = ["geo", "gto", "fol"]
years = np.arange(2002, 2024)

plot_dir = "Images/tsne"
os.makedirs(plot_dir, exist_ok=True)
getdata.clear_directory(plot_dir)

# t-SNE analysis and plotting
for orbit in orbits: 
    for year in years: 
        try:
            year2 = str(year)[2:]
            if year == 2023:
                file = os.path.join("..", "input", f"stat_Master_23_{orbit}_s1_10cm.det").replace("\\", "/")
            else:
                file = os.path.join("..", "input", f"stat_Master_{year2}_{orbit}_s1.det").replace("\\", "/")
            data = np.array(getdata.array_extender(file)).T

            # Define feature indices and names
            orbit_feature_indices = [1, 8, 9, 10, 11, 12, 13, 20]
            feature_names = [
                "diameter", "sem_major", "inc", "ecc", "arg_per", "raan", "true_lat", "mag_obj"
            ]

            # Build DataFrame for orbit features
            df = pd.DataFrame(data[:, orbit_feature_indices], columns=feature_names)

            # Shift RAAN to -180 to 180
            df['raan'] = ((df['raan'] + 180) % 360) - 180

            # Apply fundamental filters
            df_filtered = df[
                (df['mag_obj'] >= 14.5) & (df['mag_obj'] <= 19) &
                (df['sem_major'] < 60000) &
                (df['inc'] < 22) &
                (df['diameter'] > 0.1)
            ].reset_index(drop=True)

            if len(df_filtered) < 2:
                print(f"[{orbit.upper()} {year}] Skipped: only {len(df_filtered)} samples after filtering")
                continue

            # Standardize
            scaler = StandardScaler()
            scaled = scaler.fit_transform(df_filtered)

            # t-SNE
            tsne = TSNE(n_components=2, perplexity=30, random_state=42)
            embedding = tsne.fit_transform(scaled)
            print(f"[{orbit.upper()} {year}] t-SNE computed for {len(df_filtered)} samples")

            # Scatter plot of t-SNE embedding
            plt.figure()
            plt.scatter(embedding[:, 0], embedding[:, 1], s=5)
            plt.xlabel('tSNE-1')
            plt.ylabel('tSNE-2')
            plt.title(f't-SNE Projection ({orbit.upper()}, {year})')
            plt.grid(True)
            plt.tight_layout()
            plt.savefig(os.path.join(plot_dir, f"tsne_{orbit}_{year}.png"))
            plt.close()

            # Pairplot: only upper triangle
            g = sns.PairGrid(df_filtered, diag_sharey=False)
            g.map_upper(sns.scatterplot, s=10)
            g.map_diag(sns.histplot, kde=False)

            # Customize axes
            for i, y_var in enumerate(feature_names):
                for j, x_var in enumerate(feature_names):
                    ax = g.axes[i, j]
                    if j < i:
                        ax.set_visible(False)
                    else:
                        ax.set_xlabel(x_var)
                        ax.set_ylabel(y_var)
                        ax.xaxis.set_tick_params(labelbottom=True)
                        ax.yaxis.set_tick_params(labelleft=True)
                        ax.set_title(f"{y_var} vs {x_var}", fontsize=10, pad=2)

            g.fig.subplots_adjust(wspace=0.5, hspace=0.5)
            g.fig.suptitle(f'Pairplot of Features ({orbit.upper()}, {year})', y=1.02)
            g.fig.tight_layout()
            g.fig.savefig(os.path.join(plot_dir, f"pairplot_{orbit}_{year}.png"))
            plt.close(g.fig)

        except Exception as e:
            print(f"[{orbit.upper()} {year}] Failed: {e}")


[WinError 2] Das System kann die angegebene Datei nicht finden
  File "c:\Users\fionu\anaconda3\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "c:\Users\fionu\anaconda3\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\fionu\anaconda3\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\Users\fionu\anaconda3\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


[GEO 2002] t-SNE computed for 653 samples
[GEO 2003] t-SNE computed for 713 samples
[GEO 2004] t-SNE computed for 640 samples
[GEO 2005] t-SNE computed for 618 samples
[GEO 2006] t-SNE computed for 528 samples
[GEO 2007] t-SNE computed for 428 samples
[GEO 2008] t-SNE computed for 153 samples
[GEO 2009] t-SNE computed for 265 samples
[GEO 2010] t-SNE computed for 88 samples
[GEO 2011] t-SNE computed for 100 samples
[GEO 2012] Failed: perplexity must be less than n_samples
[GEO 2013] Skipped: only 1 samples after filtering
[GEO 2014] Failed: perplexity must be less than n_samples
[GEO 2015] t-SNE computed for 81 samples
[GEO 2016] t-SNE computed for 73 samples
[GEO 2017] t-SNE computed for 70 samples
[GEO 2018] t-SNE computed for 176 samples
[GEO 2019] t-SNE computed for 182 samples
[GEO 2020] t-SNE computed for 343 samples
[GEO 2021] t-SNE computed for 189 samples
[GEO 2022] t-SNE computed for 175 samples
[GEO 2023] t-SNE computed for 270 samples
[GTO 2002] t-SNE computed for 457 sampl

In [2]:
import os
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# --- CONFIGURATION ---
YEARS       = list(range(2002, 2024))
ORBIT_TYPES = ["geo", "gto", "fol"]
BASE_DIR    = os.path.join("..", "input")
STORE_FOLDER = "Images/tsne_and_pairplots"

# Ensure directory is clean
os.makedirs(STORE_FOLDER, exist_ok=True)
getdata.clear_directory(STORE_FOLDER)

# Feature selection
ORBIT_IDX = [1, 8, 9, 10, 11, 12, 13, 20]
FEATURES = ["diameter", "sem_major", "inc", "ecc", "arg_per", "raan", "true_lat", "mag_obj"]

# --- FILTER FUNCTION ---
def apply_filters(df):
    df = df.copy()
    df['raan'] = ((df['raan'] + 180) % 360) - 180
    return df[
        (df.mag_obj.between(14.5, 19)) &
        (df.sem_major < 60000) &
        (df.inc < 22) &
        (df.diameter > 0.1)
    ]

# --- RUN t-SNE AND STORE SCATTER PLOTS ---
for orb in ORBIT_TYPES:
    for yr in YEARS:
        try:
            year2 = str(yr)[2:]
            if yr == 2023:
                path = os.path.join(BASE_DIR, f"stat_Master_23_{orb}_s1_10cm.det").replace("\\", "/")
            else:
                path = os.path.join(BASE_DIR, f"stat_Master_{year2}_{orb}_s1.det").replace("\\", "/")

            raw = np.array(getdata.array_extender(path)).T
            df = pd.DataFrame(raw[:, ORBIT_IDX], columns=FEATURES)
            df_f = apply_filters(df)

            if len(df_f) < 2:
                print(f"[{orb.upper()} {yr}] Skipped (only {len(df_f)} samples after filtering)")
                continue

            X = StandardScaler().fit_transform(df_f)

            # --- t-SNE ---
            tsne = TSNE(n_components=2, perplexity=30, random_state=42)
            tsne_proj = tsne.fit_transform(X)
            print(f"[{orb.upper()} {yr}] t-SNE computed for {len(df_f)} samples")

            # --- SCATTER PLOT ---
            plt.figure(figsize=(6, 5))
            plt.scatter(tsne_proj[:, 0], tsne_proj[:, 1], s=5)
            plt.xlabel("t-SNE 1")
            plt.ylabel("t-SNE 2")
            plt.title(f"t-SNE Embedding ({orb.upper()}, {yr})")
            plt.grid(True)
            plt.tight_layout()
            fname = f"tsne_{orb}_{yr}.png"
            plt.savefig(os.path.join(STORE_FOLDER, fname))
            plt.close()

        except Exception as e:
            print(f"[{orb.upper()} {yr}] Failed: {e}")


[GEO 2002] t-SNE computed for 653 samples
[GEO 2003] t-SNE computed for 713 samples
[GEO 2004] t-SNE computed for 640 samples
[GEO 2005] t-SNE computed for 618 samples
[GEO 2006] t-SNE computed for 528 samples
[GEO 2007] t-SNE computed for 428 samples
[GEO 2008] t-SNE computed for 153 samples
[GEO 2009] t-SNE computed for 265 samples
[GEO 2010] t-SNE computed for 88 samples
[GEO 2011] t-SNE computed for 100 samples
[GEO 2012] Failed: perplexity must be less than n_samples
[GEO 2013] Skipped (only 1 samples after filtering)
[GEO 2014] Failed: perplexity must be less than n_samples
[GEO 2015] t-SNE computed for 81 samples
[GEO 2016] t-SNE computed for 73 samples
[GEO 2017] t-SNE computed for 70 samples
[GEO 2018] t-SNE computed for 176 samples
[GEO 2019] t-SNE computed for 182 samples
[GEO 2020] t-SNE computed for 343 samples
[GEO 2021] t-SNE computed for 189 samples
[GEO 2022] t-SNE computed for 175 samples
[GEO 2023] t-SNE computed for 270 samples
[GTO 2002] t-SNE computed for 457 samp