In [1]:
# Add the directory to sys.path
import sys
sys.path.append('.')

# Now you can import the module
from utils.tle_processing import *


In [2]:
import numpy as np
import spacetrack.operators as op
from datetime import datetime,timedelta
import pandas as pd
from spacetrack import SpaceTrackClient
from io import StringIO  # Import StringIO
import time
import matplotlib.pyplot as plt
import tqdm

In [17]:
# Model training
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor, RandomForestRegressor
from sklearn.linear_model import Ridge, HuberRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor


In [46]:
# Save model
import joblib
import os
import glob

# Classical ML Models


In [39]:
resample_tle_df = pd.read_csv('sample_dataset/sample_daily_records_data.csv')
resample_tle_df.set_index('Epoch', inplace=True)
resample_tle_df.head()

Unnamed: 0_level_0,First Derivative Mean Motion,Inclination (degrees),Right Ascension of the Ascending Node (degrees),Argument of Perigee (degrees),Mean Anomaly (degrees),Eccentricity,Mean Motion (revolutions per day),Revolution Number at Epoch
Epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-12-31,0.000157,51.6448,77.04235,208.40055,320.4631,0.000525,15.497978,37584.5
2023-01-01,0.000166,51.644567,74.151133,211.103567,277.293067,0.000519,15.498221,37593.666667
2023-01-02,0.000175,51.64468,69.55484,215.69364,221.64474,0.000511,15.498584,37608.0
2023-01-03,0.000162,51.64486,64.71342,219.86414,202.58388,0.000499,15.498874,37623.2
2023-01-04,0.000142,51.64474,59.07226,225.67,219.52978,0.000497,15.499123,37640.8


In [7]:
resample_tle_df.columns

Index(['First Derivative Mean Motion', 'Inclination (degrees)',
       'Right Ascension of the Ascending Node (degrees)',
       'Argument of Perigee (degrees)', 'Mean Anomaly (degrees)',
       'Eccentricity', 'Mean Motion (revolutions per day)',
       'Revolution Number at Epoch'],
      dtype='object')

# Data Preparation

In [18]:
# --- 1. Prepare Sample Data ---
data_for_training = resample_tle_df.copy()

## First differencing

In [19]:


data_for_training['Revolution Number at Epoch_Diff'] =  data_for_training['Revolution Number at Epoch'].diff()

# data_for_training.head()


## Cyclical features

In [32]:
def encode_cyclical_feature(df: pd.DataFrame, column_name: str, period: float = 360.0) -> pd.DataFrame:
    """
    Encodes a cyclical feature (like degrees, hours, or months) using
    sine and cosine projection.

    Args:
        df (pd.DataFrame): The input DataFrame.
        column_name (str): The name of the cyclical feature column (in degrees).
        period (float): The length of the cycle (e.g., 360 for degrees, 24 for hours).
                        Defaults to 360.0.

    Returns:
        pd.DataFrame: A new DataFrame with two added columns:
                      <column_name>_sin and <column_name>_cos.
    """
    print(f"--- Encoding '{column_name}' with period {period} ---")

    # 1. Conversion to Radians: Necessary for NumPy's sin/cos functions.
    # The formula converts the value into a proportion of the cycle (0 to 2*pi).
    angle_rad = 2 * np.pi * df[column_name] / period

    # 2. Sine and Cosine Projection
    sin_col = f'{column_name}_sin'
    cos_col = f'{column_name}_cos'

    df[sin_col] = np.sin(angle_rad)
    df[cos_col] = np.cos(angle_rad)

    # Optional: Drop the original column if you don't need it.
    # df = df.drop(columns=[column_name])

    print(f"Created columns: '{sin_col}' and '{cos_col}'")
    return df

def decode_cyclical_feature(df: pd.DataFrame, sin_col: str, cos_col: str, period: float = 360.0) -> pd.DataFrame:
    """
    Reconstructs the original cyclical feature value (in degrees) from its
    sine and cosine components.

    Args:
        df (pd.DataFrame): The input DataFrame containing sine and cosine components.
        sin_col (str): The name of the sine component column.
        cos_col (str): The name of the cosine component column.
        period (float): The length of the cycle (e.g., 360 for degrees, 24 for hours).
                        Defaults to 360.0.

    Returns:
        pd.DataFrame: The DataFrame with a new reconstructed column.
    """
    print(f"\n--- Decoding from '{sin_col}' and '{cos_col}' ---")
    reconstructed_col = f'{sin_col.split("_sin")[0]}_reconstructed'

    # 1. Use arctan2 to get the angle in radians (range: [-pi, pi]).
    # arctan2 is crucial as it correctly handles all four quadrants.
    angle_rad = np.arctan2(df[sin_col], df[cos_col])

    # 2. Scale back to the original period (e.g., [0, 360]).
    # Formula: (angle_rad / (2 * pi)) * period
    angle_normalized = angle_rad * (period / (2 * np.pi))

    # 3. Handle the cyclical nature to ensure the result is non-negative (e.g., [0, 360)).
    # The decoded value might be negative (e.g., -90 degrees). We shift it to the
    # equivalent positive value (e.g., 270 degrees).
    df[reconstructed_col] = np.where(angle_normalized < 0,
                                     angle_normalized + period,
                                     angle_normalized)

    print(f"Reconstructed column: '{reconstructed_col}'")
    return df

In [33]:
def encode_cyclical_feature(df: pd.DataFrame, column_name: str, period: float = 360.0) -> pd.DataFrame:
    """
    Encodes a cyclical feature (like degrees, hours, or months) using
    sine and cosine projection.

    Args:
        df (pd.DataFrame): The input DataFrame.
        column_name (str): The name of the cyclical feature column (in degrees).
        period (float): The length of the cycle (e.g., 360 for degrees, 24 for hours).
                        Defaults to 360.0.

    Returns:
        pd.DataFrame: A new DataFrame with two added columns:
                      <column_name>_sin and <column_name>_cos.
    """
    print(f"--- Encoding '{column_name}' with period {period} ---")

    # 1. Conversion to Radians: Necessary for NumPy's sin/cos functions.
    # The formula converts the value into a proportion of the cycle (0 to 2*pi).
    angle_rad = 2 * np.pi * df[column_name] / period

    # 2. Sine and Cosine Projection
    sin_col = f'{column_name}_sin'
    cos_col = f'{column_name}_cos'

    df[sin_col] = np.sin(angle_rad)
    df[cos_col] = np.cos(angle_rad)

    # Optional: Drop the original column if you don't need it.
    # df = df.drop(columns=[column_name])

    print(f"Created columns: '{sin_col}' and '{cos_col}'")
    return df

def decode_cyclical_feature(df: pd.DataFrame, sin_col: str, cos_col: str, period: float = 360.0) -> pd.DataFrame:
    """
    Reconstructs the original cyclical feature value (in degrees) from its
    sine and cosine components.

    Args:
        df (pd.DataFrame): The input DataFrame containing sine and cosine components.
        sin_col (str): The name of the sine component column.
        cos_col (str): The name of the cosine component column.
        period (float): The length of the cycle (e.g., 360 for degrees, 24 for hours).
                        Defaults to 360.0.

    Returns:
        pd.DataFrame: The DataFrame with a new reconstructed column.
    """
    print(f"\n--- Decoding from '{sin_col}' and '{cos_col}' ---")
    reconstructed_col = f'{sin_col.split("_sin")[0]}_reconstructed'

    # 1. Use arctan2 to get the angle in radians (range: [-pi, pi]).
    # arctan2 is crucial as it correctly handles all four quadrants.
    angle_rad = np.arctan2(df[sin_col], df[cos_col])

    # 2. Scale back to the original period (e.g., [0, 360]).
    # Formula: (angle_rad / (2 * pi)) * period
    angle_normalized = angle_rad * (period / (2 * np.pi))

    # 3. Handle the cyclical nature to ensure the result is non-negative (e.g., [0, 360)).
    # The decoded value might be negative (e.g., -90 degrees). We shift it to the
    # equivalent positive value (e.g., 270 degrees).
    df[reconstructed_col] = np.where(angle_normalized < 0,
                                     angle_normalized + period,
                                     angle_normalized)

    print(f"Reconstructed column: '{reconstructed_col}'")
    return df

In [34]:
# ['Right Ascension of the Ascending Node (degrees)','Argument of Perigee (degrees)', 'Mean Anomaly (degrees)']
CYCLE_PERIOD = 360.0
FEATURE_COL = 'Right Ascension of the Ascending Node (degrees)'
data_for_training = encode_cyclical_feature(data_for_training, FEATURE_COL, CYCLE_PERIOD)

FEATURE_COL = 'Argument of Perigee (degrees)'
data_for_training = encode_cyclical_feature(data_for_training, FEATURE_COL, CYCLE_PERIOD)

FEATURE_COL = 'Mean Anomaly (degrees)'
data_for_training = encode_cyclical_feature(data_for_training, FEATURE_COL, CYCLE_PERIOD)


data_for_training.head()

--- Encoding 'Right Ascension of the Ascending Node (degrees)' with period 360.0 ---
Created columns: 'Right Ascension of the Ascending Node (degrees)_sin' and 'Right Ascension of the Ascending Node (degrees)_cos'
--- Encoding 'Argument of Perigee (degrees)' with period 360.0 ---
Created columns: 'Argument of Perigee (degrees)_sin' and 'Argument of Perigee (degrees)_cos'
--- Encoding 'Mean Anomaly (degrees)' with period 360.0 ---
Created columns: 'Mean Anomaly (degrees)_sin' and 'Mean Anomaly (degrees)_cos'


Unnamed: 0_level_0,First Derivative Mean Motion,Inclination (degrees),Right Ascension of the Ascending Node (degrees),Argument of Perigee (degrees),Mean Anomaly (degrees),Eccentricity,Mean Motion (revolutions per day),Revolution Number at Epoch,Revolution Number at Epoch_Diff,Right Ascension of the Ascending Node (degrees)_sin,Right Ascension of the Ascending Node (degrees)_cos,Argument of Perigee (degrees)_sin,Argument of Perigee (degrees)_cos,Mean Anomaly (degrees)_sin,Mean Anomaly (degrees)_cos
Epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2023-01-01,0.000166,51.644567,74.151133,211.103567,277.293067,0.000519,15.498221,37593.666667,9.166667,0.961985,0.273101,-0.516587,-0.856235,-0.99191,0.126945
2023-01-02,0.000175,51.64468,69.55484,215.69364,221.64474,0.000511,15.498584,37608.0,14.333333,0.937007,0.349311,-0.583451,-0.812148,-0.66451,-0.747279
2023-01-03,0.000162,51.64486,64.71342,219.86414,202.58388,0.000499,15.498874,37623.2,15.2,0.904183,0.427146,-0.640969,-0.767566,-0.384036,-0.923318
2023-01-04,0.000142,51.64474,59.07226,225.67,219.52978,0.000497,15.499123,37640.8,17.6,0.857816,0.513957,-0.715327,-0.69879,-0.636479,-0.771294
2023-01-05,0.000139,51.64482,54.69776,228.97012,250.77084,0.00049,15.499368,37654.6,13.8,0.816115,0.57789,-0.754367,-0.656453,-0.944209,-0.329347


In [35]:
data_for_training.dropna(inplace=True)

# Column selection for model training

In [36]:
gt_features = ['First Derivative Mean Motion', 'Inclination (degrees)',
       'Right Ascension of the Ascending Node (degrees)',
       'Argument of Perigee (degrees)', 'Mean Anomaly (degrees)',
       'Eccentricity', 'Mean Motion (revolutions per day)',
       'Revolution Number at Epoch']

In [37]:
original_features = ['First Derivative Mean Motion', 'Inclination (degrees)','Eccentricity','Mean Motion (revolutions per day)']

diff_features = ['Revolution Number at Epoch_Diff']

cyclical_features = ['Right Ascension of the Ascending Node (degrees)_sin',
        'Right Ascension of the Ascending Node (degrees)_cos',
        'Argument of Perigee (degrees)_sin',
        'Argument of Perigee (degrees)_cos', 
        'Mean Anomaly (degrees)_sin',
        'Mean Anomaly (degrees)_cos']

In [38]:
all_features = original_features+diff_features+cyclical_features

# Generating lag features

In [28]:
data = data_for_training[all_features].copy()
data.head()

Unnamed: 0_level_0,First Derivative Mean Motion,Inclination (degrees),Eccentricity,Mean Motion (revolutions per day),Revolution Number at Epoch_Diff,Right Ascension of the Ascending Node (degrees)_sin,Right Ascension of the Ascending Node (degrees)_cos,Argument of Perigee (degrees)_sin,Argument of Perigee (degrees)_cos,Mean Anomaly (degrees)_sin,Mean Anomaly (degrees)_cos
Epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-01-01,0.000166,51.644567,0.000519,15.498221,9.166667,0.961985,0.273101,-0.516587,-0.856235,-0.99191,0.126945
2023-01-02,0.000175,51.64468,0.000511,15.498584,14.333333,0.937007,0.349311,-0.583451,-0.812148,-0.66451,-0.747279
2023-01-03,0.000162,51.64486,0.000499,15.498874,15.2,0.904183,0.427146,-0.640969,-0.767566,-0.384036,-0.923318
2023-01-04,0.000142,51.64474,0.000497,15.499123,17.6,0.857816,0.513957,-0.715327,-0.69879,-0.636479,-0.771294
2023-01-05,0.000139,51.64482,0.00049,15.499368,13.8,0.816115,0.57789,-0.754367,-0.656453,-0.944209,-0.329347


In [29]:
look_back = 2 # numbers of lags
forecast_horizon = 1
features = []
targets = []
num_samples = data_for_training.shape[0]

In [30]:
for i in range(num_samples - look_back - forecast_horizon + 1):
    # The input features are the last `look_back` values of all series
    X_window = data.iloc[i : i + look_back].values
    
    # The target variables are the next `forecast_horizon` values of all series
    y_window = data.iloc[i + look_back : i + look_back + forecast_horizon].values
    
    # Flatten the windows to create a single row for the features and targets
    features.append(X_window.flatten())
    targets.append(y_window.flatten())

X = np.array(features)
y = np.array(targets)

print(f"Shape of X (input features): {X.shape}")
print(f"Shape of y (target variables): {y.shape}")

Shape of X (input features): (728, 22)
Shape of y (target variables): (728, 11)


In [31]:
# --- 3. Scaling the Data ---
# It's good practice to scale the data, especially for models that are
# sensitive to feature magnitudes. We use a MinMaxScaler.
# We need to scale X and y separately, and use a different scaler for each
# to allow for inverse transformation later.

# X_scaler = MinMaxScaler()
# y_scaler = MinMaxScaler()

X_scaler = StandardScaler()
y_scaler = StandardScaler()

X_scaled = X_scaler.fit_transform(X)
y_scaled = y_scaler.fit_transform(y)

# --- 4. Splitting the Data ---
# For time series, we split chronologically. The last 20% of the data
# will be used for testing.
split_index = int(len(X_scaled) * 0.8)
X_train, X_test = X_scaled[:split_index], X_scaled[split_index:]
y_train, y_test = y_scaled[:split_index], y_scaled[split_index:]

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((582, 22), (582, 11), (146, 22), (146, 11))

# Traing classical ML

In [44]:
# --- Configuration ---
SAVE_DIR = "saved_model_state"
os.makedirs(SAVE_DIR, exist_ok=True)

SAVE_model_DIR = "saved_model_state/classifical_ML"
os.makedirs(SAVE_model_DIR, exist_ok=True)

models_and_params = [

    {
        'name': 'RandomForestRegressor',
        'estimator': RandomForestRegressor(random_state=42, n_jobs=1), # Use n_jobs=1 for inner estimator
        'param_grid': {
            'estimator__n_estimators': [50, 100,],
            'estimator__max_depth': [5, 10, 100]
        }
    },
    # 3. Linear/Robust Models
    {
        'name': 'RidgeRegressor',
        'estimator': Ridge(random_state=42),
        'param_grid': {
            'estimator__alpha': [0.1, 1.0, 10.0]
        }
    },
    {
        'name': 'HuberRegressor',
        'estimator': HuberRegressor(max_iter=1000), # Huber is robust to outliers
        'param_grid': {
            'estimator__epsilon': [1.1, 1.35]
        }
    },
    # 4. Neighbors/SVM
    {
        'name': 'KNeighborsRegressor',
        'estimator': KNeighborsRegressor(n_jobs=1),
        'param_grid': {
            'estimator__n_neighbors': [5, 10],
            'estimator__weights': ['uniform', 'distance']
        }
    },
    # NOTE: SVR (Support Vector Regression) 
    {
        'name': 'SVR',
        'estimator': SVR(),
        'param_grid': {
            'estimator__C': [1.0, 10.0, 0.1, 100.0],
            'estimator__kernel': ['rbf']
        }
    },
    # 5. Simple Neural Network (MLP)
    {
        'name': 'MLPRegressor',
        'estimator': MLPRegressor(random_state=42, max_iter=1000, early_stopping=True),
        'param_grid': {
            'estimator__hidden_layer_sizes': [(50,), (100,),],
            'estimator__activation': ['relu', 'sigmoid']
        }
    }
]


In [41]:
# --- Training and Saving Loop ---
best_models = {}

print(f"Starting Grid Search CV for {len(models_and_params)} Multi-Output Regressors...")
print(f"Models will be saved to: {SAVE_DIR}")

for item in models_and_params:
    model_name = item['name']
    base_estimator = item['estimator']
    param_grid = item['param_grid']
    
    # 1. Wrap the estimator with MultiOutputRegressor
    # n_jobs=-1 here parallelizes the training across the multiple output targets.
    multi_output_model = MultiOutputRegressor(base_estimator, n_jobs=1)
    
    # 2. Configure GridSearchCV
    # The 'estimator__' prefix targets the inner regressor's parameters.
    grid_search = GridSearchCV(
        estimator=multi_output_model,
        param_grid=param_grid,
        scoring='neg_mean_squared_error',
        cv=3,
        verbose=1, # Reduced verbosity for a cleaner output with many models
        n_jobs=1 # Parallelize the folds/hyperparameters across 1 core to avoid conflicts with MultiOutput's n_jobs=-1
    )
    
    print(f"\n--- Training {model_name} ---")
    
    try:
        # 3. Fit the model
        # X_train and y_train are assumed to be defined and available
        grid_search.fit(X_train, y_train) 
        
        # 4. Save the best estimator
        best_model = grid_search.best_estimator_
        file_path = os.path.join(SAVE_DIR, f'best_{model_name}.joblib')
        joblib.dump(best_model, file_path)
        
        best_models[model_name] = {
            'best_score': grid_search.best_score_,
            'best_params': grid_search.best_params_,
            'file_path': file_path
        }
        
        print(f"Finished {model_name}. Best score: {grid_search.best_score_:.4f}. Model saved to: {file_path}")
        
    except Exception as e:
        print(f"Error training {model_name}: {e}")



Starting Grid Search CV for 6 Multi-Output Regressors...
Models will be saved to: saved_model_state

--- Training RandomForestRegressor ---
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Finished RandomForestRegressor. Best score: -0.4159. Model saved to: saved_model_state/best_RandomForestRegressor.joblib

--- Training RidgeRegressor ---
Fitting 3 folds for each of 3 candidates, totalling 9 fits
Finished RidgeRegressor. Best score: -0.3154. Model saved to: saved_model_state/best_RidgeRegressor.joblib

--- Training HuberRegressor ---
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Finished HuberRegressor. Best score: -0.3148. Model saved to: saved_model_state/best_HuberRegressor.joblib

--- Training KNeighborsRegressor ---
Fitting 3 folds for each of 4 candidates, totalling 12 fits
Finished KNeighborsRegressor. Best score: -0.5797. Model saved to: saved_model_state/best_KNeighborsRegressor.joblib

--- Training SVR ---
Fitting 3 folds for each of 4 candidates, to

6 fits failed out of a total of 12.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
6 fits failed with the following error:
Traceback (most recent call last):
  File "/home/noppachanin/.local/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/noppachanin/.local/lib/python3.9/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/home/noppachanin/.local/lib/python3.9/site-packages/sklearn/multioutput.py", line 274, in fit
    self.estimators_ = Parallel(n_jobs=self.n_jobs)(
  File "/home/noppachanin/.local/lib/python3.9/site-packages/sklearn/utils/parallel.py", line 77, in 

Finished MLPRegressor. Best score: -0.4570. Model saved to: saved_model_state/best_MLPRegressor.joblib


In [43]:
print("\n--- Summary of Best Models ---")
for name, data in best_models.items():
    print(f"Model: {name}")
    print(f"  Best Score (Neg MSE): {data['best_score']:.4f}")
    print(f"  Best Params: {data['best_params']}")


--- Summary of Best Models ---
Model: RandomForestRegressor
  Best Score (Neg MSE): -0.4159
  Best Params: {'estimator__max_depth': 5, 'estimator__n_estimators': 100}
Model: RidgeRegressor
  Best Score (Neg MSE): -0.3154
  Best Params: {'estimator__alpha': 10.0}
Model: HuberRegressor
  Best Score (Neg MSE): -0.3148
  Best Params: {'estimator__epsilon': 1.35}
Model: KNeighborsRegressor
  Best Score (Neg MSE): -0.5797
  Best Params: {'estimator__n_neighbors': 10, 'estimator__weights': 'distance'}
Model: SVR
  Best Score (Neg MSE): -0.4353
  Best Params: {'estimator__C': 1.0, 'estimator__kernel': 'rbf'}
Model: MLPRegressor
  Best Score (Neg MSE): -0.4570
  Best Params: {'estimator__activation': 'relu', 'estimator__hidden_layer_sizes': (100,)}


# Selecting models to make predictions

In [49]:
all_model_paths = sorted(glob.glob(os.path.join(SAVE_model_DIR,'*.joblib')))
all_model_paths

['saved_model_state/classifical_ML/best_HuberRegressor.joblib',
 'saved_model_state/classifical_ML/best_KNeighborsRegressor.joblib',
 'saved_model_state/classifical_ML/best_MLPRegressor.joblib',
 'saved_model_state/classifical_ML/best_RandomForestRegressor.joblib',
 'saved_model_state/classifical_ML/best_RidgeRegressor.joblib',
 'saved_model_state/classifical_ML/best_SVR.joblib']

In [51]:
model_path = os.path.join(SAVE_model_DIR,'best_RidgeRegressor.joblib')
print(model_path)


saved_model_state/classifical_ML/best_RidgeRegressor.joblib


# Revert cyclical features and a differencing feature

In [50]:

model = joblib.load(model_path)
y_pred_scaled = model.predict(X_test)
y_pred_original = y_scaler.inverse_transform(y_pred_scaled)
y_test_original = y_scaler.inverse_transform(y_test)

y_pred_original.shape, y_test_original.shape

saved_model_state/classifical_ML/best_RidgeRegressor.joblib


((146, 11), (146, 11))

In [52]:
df_pred = pd.DataFrame(data=y_pred_original, columns=all_features)
# df_pred

In [66]:
df_test_data = data_for_training[split_index+look_back:][gt_features]
initial_revs = data_for_training.iloc[split_index+2,7]
# df_test_data.shape, gt_features, initial_revs

## Cyclical features

In [67]:
df_pred_decoded = decode_cyclical_feature(df_pred.copy(),
                                         sin_col='Right Ascension of the Ascending Node (degrees)_sin',
                                         cos_col='Right Ascension of the Ascending Node (degrees)_cos',period=CYCLE_PERIOD)
df_pred_decoded = decode_cyclical_feature(df_pred_decoded,
                                         sin_col='Argument of Perigee (degrees)_sin',
                                         cos_col='Argument of Perigee (degrees)_cos',period=CYCLE_PERIOD)
df_pred_decoded = decode_cyclical_feature(df_pred_decoded,
                                         sin_col='Mean Anomaly (degrees)_sin',
                                         cos_col='Mean Anomaly (degrees)_cos',period=CYCLE_PERIOD)

reconstructed_cols = [ col for col in df_pred_decoded.columns if '_reconstructed' in col]
encoded_cols = [col for col in df_pred_decoded.columns if '_sin' in col or '_cos' in col]

df_pred_decoded.drop(encoded_cols, axis=1, inplace=True)




--- Decoding from 'Right Ascension of the Ascending Node (degrees)_sin' and 'Right Ascension of the Ascending Node (degrees)_cos' ---
Reconstructed column: 'Right Ascension of the Ascending Node (degrees)_reconstructed'

--- Decoding from 'Argument of Perigee (degrees)_sin' and 'Argument of Perigee (degrees)_cos' ---
Reconstructed column: 'Argument of Perigee (degrees)_reconstructed'

--- Decoding from 'Mean Anomaly (degrees)_sin' and 'Mean Anomaly (degrees)_cos' ---
Reconstructed column: 'Mean Anomaly (degrees)_reconstructed'


In [68]:
df_pred_decoded.columns

Index(['First Derivative Mean Motion', 'Inclination (degrees)', 'Eccentricity',
       'Mean Motion (revolutions per day)', 'Revolution Number at Epoch_Diff',
       'Right Ascension of the Ascending Node (degrees)_reconstructed',
       'Argument of Perigee (degrees)_reconstructed',
       'Mean Anomaly (degrees)_reconstructed'],
      dtype='object')

## Differencing features

In [69]:
df_pred_decoded['Revolution Number at Epoch'] = df_pred_decoded['Revolution Number at Epoch_Diff'].cumsum() + initial_revs
# df_pred_decoded.shape, df_pred_decoded.head()

## Rearranging columns

In [70]:
# Rearranging columns
df_pred_data = df_pred_decoded[['First Derivative Mean Motion', 'Inclination (degrees)', 
        'Right Ascension of the Ascending Node (degrees)_reconstructed',
       'Argument of Perigee (degrees)_reconstructed',
       'Mean Anomaly (degrees)_reconstructed', 'Eccentricity', 'Mean Motion (revolutions per day)', 'Revolution Number at Epoch']].copy()
df_pred_data.head()

Unnamed: 0,First Derivative Mean Motion,Inclination (degrees),Right Ascension of the Ascending Node (degrees)_reconstructed,Argument of Perigee (degrees)_reconstructed,Mean Anomaly (degrees)_reconstructed,Eccentricity,Mean Motion (revolutions per day),Revolution Number at Epoch
0,0.000205,51.639753,58.185889,174.92725,229.558326,0.000581,15.497571,46681.704198
1,0.000185,51.639817,53.095386,178.401682,240.750957,0.000574,15.498055,46697.250127
2,0.000221,51.640185,47.032975,182.555433,242.442302,0.000574,15.498466,46712.542188
3,0.000243,51.640883,41.578216,187.376322,234.505039,0.000572,15.498874,46727.933431
4,0.00018,51.641113,36.527567,190.630177,239.388646,0.000559,15.499237,46743.685301


# Making prediction

In [71]:
y_actual = df_test_data.values
y_pred = df_pred_data.values

if y_actual.shape != y_pred.shape:
    raise ValueError(f"Actual and prediction arrays must have the same shape. Got {y_actual.shape} and {y_pred.shape}")

# Ensure arrays are at least 2D (even for a single feature)
if y_actual.ndim == 1:
    y_actual = y_actual.reshape(-1, 1)
    y_pred = y_pred.reshape(-1, 1)



In [72]:
num_features = y_actual.shape[1]

if gt_features is None:
    gt_features = [f'Feature_{i+1}' for i in range(num_features)]
elif len(gt_features) != num_features:
    raise ValueError(f"The number of feature names ({len(gt_features)}) must match the number of columns ({num_features}).")


metrics_data = {}

for i in range(num_features):
    actual_flat = y_actual[:, i].flatten()
    pred_flat = y_pred[:, i].flatten()

    # Calculate Error
    error = pred_flat - actual_flat

    # --- 1. MSE (Mean Squared Error) ---
    mse = np.mean(error ** 2)

    # --- 2. RMSE (Root Mean Squared Error) ---
    rmse = np.sqrt(mse)

    # --- 3. MAE (Mean Absolute Error) ---
    mae = np.mean(np.abs(error))

    metrics_data[gt_features[i]] = {
        'RMSE': rmse,
        'MSE': mse,
        'MAE': mae,
    }

# Convert the dictionary of metrics into a DataFrame
results_df = pd.DataFrame.from_dict(metrics_data, orient='index')
results_df.index.name = 'Feature'

In [73]:
results_df

Unnamed: 0_level_0,RMSE,MSE,MAE
Feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
First Derivative Mean Motion,0.000928,8.604353e-07,0.000273
Inclination (degrees),0.000922,8.500376e-07,0.000588
Right Ascension of the Ascending Node (degrees),37.222957,1385.549,6.176348
Argument of Perigee (degrees),68.551392,4699.293,18.130611
Mean Anomaly (degrees),51.99462,2703.44,37.528584
Eccentricity,6.3e-05,3.98754e-09,2.8e-05
Mean Motion (revolutions per day),0.001641,2.693518e-06,0.000943
Revolution Number at Epoch,13.099375,171.5936,12.634157
