# Imports

In [21]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, PolynomialFeatures, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE

# Loaded the dataset

In [22]:
df = pd.read_csv('/home/eric_baldwin/ddiMain/capstone/ddi_capstone_2/data/space_decay.csv')
print(df.head())

   CCSDS_OMM_VERS                            COMMENT        CREATION_DATE  \
0               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:46:11   
1               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   
2               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:26:11   
3               2  GENERATED VIA SPACE-TRACK.ORG API  2021-10-31T18:07:15   
4               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   

  ORIGINATOR      OBJECT_NAME   OBJECT_ID CENTER_NAME REF_FRAME TIME_SYSTEM  \
0    18 SPCS  ARIANE 42P+ DEB   1992-072J       EARTH      TEME         UTC   
1    18 SPCS         SL-8 DEB   1979-028C       EARTH      TEME         UTC   
2    18 SPCS           GSAT 1   2001-015A       EARTH      TEME         UTC   
3    18 SPCS         CZ-4 DEB  1999-057MB       EARTH      TEME         UTC   
4    18 SPCS         CZ-4 DEB  1999-057MC       EARTH      TEME         UTC   

  MEAN_ELEMENT_THEORY  ... RCS_SIZE  COUNTRY_CODE  LAUNCH_DATE

# Dropped unnecessary columns

In [23]:
df = df.drop(labels=['DECAY_DATE'], axis=1)
print(df.head())

   CCSDS_OMM_VERS                            COMMENT        CREATION_DATE  \
0               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:46:11   
1               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   
2               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:26:11   
3               2  GENERATED VIA SPACE-TRACK.ORG API  2021-10-31T18:07:15   
4               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   

  ORIGINATOR      OBJECT_NAME   OBJECT_ID CENTER_NAME REF_FRAME TIME_SYSTEM  \
0    18 SPCS  ARIANE 42P+ DEB   1992-072J       EARTH      TEME         UTC   
1    18 SPCS         SL-8 DEB   1979-028C       EARTH      TEME         UTC   
2    18 SPCS           GSAT 1   2001-015A       EARTH      TEME         UTC   
3    18 SPCS         CZ-4 DEB  1999-057MB       EARTH      TEME         UTC   
4    18 SPCS         CZ-4 DEB  1999-057MC       EARTH      TEME         UTC   

  MEAN_ELEMENT_THEORY  ... OBJECT_TYPE  RCS_SIZE  COUNTRY_CODE

# Renamed object type for better presentation

In [24]:
df['OBJECT_TYPE'] = df['OBJECT_TYPE'].replace({'DEBRIS': 'Debris', 'PAYLOAD': 'Payload', 'TBA': 'Unknown', 'ROCKET BODY': 'Rocket'})
print(df.head())

   CCSDS_OMM_VERS                            COMMENT        CREATION_DATE  \
0               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:46:11   
1               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   
2               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:26:11   
3               2  GENERATED VIA SPACE-TRACK.ORG API  2021-10-31T18:07:15   
4               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   

  ORIGINATOR      OBJECT_NAME   OBJECT_ID CENTER_NAME REF_FRAME TIME_SYSTEM  \
0    18 SPCS  ARIANE 42P+ DEB   1992-072J       EARTH      TEME         UTC   
1    18 SPCS         SL-8 DEB   1979-028C       EARTH      TEME         UTC   
2    18 SPCS           GSAT 1   2001-015A       EARTH      TEME         UTC   
3    18 SPCS         CZ-4 DEB  1999-057MB       EARTH      TEME         UTC   
4    18 SPCS         CZ-4 DEB  1999-057MC       EARTH      TEME         UTC   

  MEAN_ELEMENT_THEORY  ... OBJECT_TYPE  RCS_SIZE  COUNTRY_CODE

# Filled missing values for categorical columns with 'Unknown'

In [25]:
df['COUNTRY_CODE'] = df['COUNTRY_CODE'].replace(to_replace={'TBD': 'Unknown', np.nan: 'Unknown'})
df['RCS_SIZE'] = df['RCS_SIZE'].replace(to_replace={np.nan: 'Unknown'})
print(df.head())

   CCSDS_OMM_VERS                            COMMENT        CREATION_DATE  \
0               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:46:11   
1               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   
2               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:26:11   
3               2  GENERATED VIA SPACE-TRACK.ORG API  2021-10-31T18:07:15   
4               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   

  ORIGINATOR      OBJECT_NAME   OBJECT_ID CENTER_NAME REF_FRAME TIME_SYSTEM  \
0    18 SPCS  ARIANE 42P+ DEB   1992-072J       EARTH      TEME         UTC   
1    18 SPCS         SL-8 DEB   1979-028C       EARTH      TEME         UTC   
2    18 SPCS           GSAT 1   2001-015A       EARTH      TEME         UTC   
3    18 SPCS         CZ-4 DEB  1999-057MB       EARTH      TEME         UTC   
4    18 SPCS         CZ-4 DEB  1999-057MC       EARTH      TEME         UTC   

  MEAN_ELEMENT_THEORY  ... OBJECT_TYPE  RCS_SIZE  COUNTRY_CODE

# Created PERIOD_HOURS and ALTITUDE_MI columns

In [26]:
df['PERIOD_HOURS'] = df['PERIOD'] / 60
df['ALTITUDE_MI'] = (df['SEMIMAJOR_AXIS'] - 6371) * 0.6213
print(df.head())

   CCSDS_OMM_VERS                            COMMENT        CREATION_DATE  \
0               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:46:11   
1               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   
2               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:26:11   
3               2  GENERATED VIA SPACE-TRACK.ORG API  2021-10-31T18:07:15   
4               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   

  ORIGINATOR      OBJECT_NAME   OBJECT_ID CENTER_NAME REF_FRAME TIME_SYSTEM  \
0    18 SPCS  ARIANE 42P+ DEB   1992-072J       EARTH      TEME         UTC   
1    18 SPCS         SL-8 DEB   1979-028C       EARTH      TEME         UTC   
2    18 SPCS           GSAT 1   2001-015A       EARTH      TEME         UTC   
3    18 SPCS         CZ-4 DEB  1999-057MB       EARTH      TEME         UTC   
4    18 SPCS         CZ-4 DEB  1999-057MC       EARTH      TEME         UTC   

  MEAN_ELEMENT_THEORY  ... COUNTRY_CODE  LAUNCH_DATE   SITE   

# Converted OBJECT_TYPE to binary classification

In [27]:
df['OBJECT_TYPE'] = df['OBJECT_TYPE'].apply(lambda x: 1 if x == 'Payload' else 0)
print(df.head())

   CCSDS_OMM_VERS                            COMMENT        CREATION_DATE  \
0               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:46:11   
1               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   
2               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:26:11   
3               2  GENERATED VIA SPACE-TRACK.ORG API  2021-10-31T18:07:15   
4               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   

  ORIGINATOR      OBJECT_NAME   OBJECT_ID CENTER_NAME REF_FRAME TIME_SYSTEM  \
0    18 SPCS  ARIANE 42P+ DEB   1992-072J       EARTH      TEME         UTC   
1    18 SPCS         SL-8 DEB   1979-028C       EARTH      TEME         UTC   
2    18 SPCS           GSAT 1   2001-015A       EARTH      TEME         UTC   
3    18 SPCS         CZ-4 DEB  1999-057MB       EARTH      TEME         UTC   
4    18 SPCS         CZ-4 DEB  1999-057MC       EARTH      TEME         UTC   

  MEAN_ELEMENT_THEORY  ... COUNTRY_CODE  LAUNCH_DATE   SITE   

# Encoded categorical variables

In [28]:
le_country_code = LabelEncoder()
df['COUNTRY_CODE'] = le_country_code.fit_transform(df['COUNTRY_CODE'])
le_rcs_size = LabelEncoder()
df['RCS_SIZE'] = le_rcs_size.fit_transform(df['RCS_SIZE'])
print(df.head())

   CCSDS_OMM_VERS                            COMMENT        CREATION_DATE  \
0               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:46:11   
1               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   
2               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:26:11   
3               2  GENERATED VIA SPACE-TRACK.ORG API  2021-10-31T18:07:15   
4               2  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   

  ORIGINATOR      OBJECT_NAME   OBJECT_ID CENTER_NAME REF_FRAME TIME_SYSTEM  \
0    18 SPCS  ARIANE 42P+ DEB   1992-072J       EARTH      TEME         UTC   
1    18 SPCS         SL-8 DEB   1979-028C       EARTH      TEME         UTC   
2    18 SPCS           GSAT 1   2001-015A       EARTH      TEME         UTC   
3    18 SPCS         CZ-4 DEB  1999-057MB       EARTH      TEME         UTC   
4    18 SPCS         CZ-4 DEB  1999-057MC       EARTH      TEME         UTC   

  MEAN_ELEMENT_THEORY  ... COUNTRY_CODE  LAUNCH_DATE   SITE   

# Filled missing values for numerical columns

In [29]:
imputer = SimpleImputer(strategy='mean')
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
df[numerical_cols] = imputer.fit_transform(df[numerical_cols])
print(df.head())

   CCSDS_OMM_VERS                            COMMENT        CREATION_DATE  \
0             2.0  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:46:11   
1             2.0  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   
2             2.0  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T06:26:11   
3             2.0  GENERATED VIA SPACE-TRACK.ORG API  2021-10-31T18:07:15   
4             2.0  GENERATED VIA SPACE-TRACK.ORG API  2021-11-01T04:58:37   

  ORIGINATOR      OBJECT_NAME   OBJECT_ID CENTER_NAME REF_FRAME TIME_SYSTEM  \
0    18 SPCS  ARIANE 42P+ DEB   1992-072J       EARTH      TEME         UTC   
1    18 SPCS         SL-8 DEB   1979-028C       EARTH      TEME         UTC   
2    18 SPCS           GSAT 1   2001-015A       EARTH      TEME         UTC   
3    18 SPCS         CZ-4 DEB  1999-057MB       EARTH      TEME         UTC   
4    18 SPCS         CZ-4 DEB  1999-057MC       EARTH      TEME         UTC   

  MEAN_ELEMENT_THEORY  ... COUNTRY_CODE  LAUNCH_DATE   SITE   

# Feature selection

In [30]:
features = ['INCLINATION', 'PERIOD_HOURS', 'ALTITUDE_MI', 'ECCENTRICITY', 'RA_OF_ASC_NODE', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY', 'SEMIMAJOR_AXIS', 'APOAPSIS', 'PERIAPSIS']
X = df[features]
y = df['OBJECT_TYPE']

# Added polynomial features

In [31]:
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

# Standardized features

In [32]:
scaler = StandardScaler()
X_poly = scaler.fit_transform(X_poly)

# Handled class imbalance using SMOTE

In [33]:
smote = SMOTE(random_state=42)
X_poly_resampled, y_resampled = smote.fit_resample(X_poly, y)

# Split data into training and test sets

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X_poly_resampled, y_resampled, test_size=0.3, random_state=42)

# Defined the Logistic Regression model with class weights to handle imbalance

In [35]:
logreg = LogisticRegression(max_iter=1000, class_weight='balanced')

# Hyperparameter tuning using GridSearchCV

In [36]:
param_grid = {
    'C': [0.1, 1, 10, 100],
    'solver': ['liblinear', 'saga']
}
grid_search = GridSearchCV(logreg, param_grid, cv=5, scoring='f1')
print("Starting grid search for hyperparameter tuning...")
grid_search.fit(X_train, y_train)
print("Completed grid search for hyperparameter tuning.")

Starting grid search for hyperparameter tuning...




Completed grid search for hyperparameter tuning.


# Best model from grid search

In [37]:
best_logreg = grid_search.best_estimator_

# Evaluated the best Logistic Regression model

In [38]:
y_pred_logreg = best_logreg.predict(X_test)
print(classification_report(y_test, y_pred_logreg))

              precision    recall  f1-score   support

         0.0       0.84      0.83      0.84      2824
         1.0       0.83      0.84      0.84      2830

    accuracy                           0.84      5654
   macro avg       0.84      0.84      0.84      5654
weighted avg       0.84      0.84      0.84      5654



# Printed the best hyperparameters

In [39]:
print("Best Hyperparameters:", grid_search.best_params_)

Best Hyperparameters: {'C': 100, 'solver': 'liblinear'}
