# Import libraries and functions

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from tqdm.notebook import tqdm
from math import pi as PI

## Initialize Orekit and import Orekit libraries

In [2]:
import orekit
vm = orekit.initVM()
print ('Java version:',vm.java_version)
print ('Orekit version:', orekit.VERSION)

Java version: 1.8.0_152-release
Orekit version: 12.0.1


In [3]:
from orekit.pyhelpers import setup_orekit_curdir, download_orekit_data_curdir
setup_orekit_curdir('../../orekit-data.zip')

In [4]:
from java.util import Arrays
from orekit import JArray_double

In [5]:
from org.orekit.propagation.analytical.tle import TLE, TLEPropagator
from org.orekit.utils import Constants

In [12]:
# Define list of dates
today = datetime(2024, 1, 28).date()
days_prior = 32
one_month_ago = today - timedelta(days=days_prior)
dates = [d.strftime("%Y-%m-%d") for d in (today - timedelta(n) for n in range((today - one_month_ago).days))]
dates.sort()
print(f'Period dates: {dates}')

Period dates: ['2023-12-28', '2023-12-29', '2023-12-30', '2023-12-31', '2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05', '2024-01-06', '2024-01-07', '2024-01-08', '2024-01-09', '2024-01-10', '2024-01-11', '2024-01-12', '2024-01-13', '2024-01-14', '2024-01-15', '2024-01-16', '2024-01-17', '2024-01-18', '2024-01-19', '2024-01-20', '2024-01-21', '2024-01-22', '2024-01-23', '2024-01-24', '2024-01-25', '2024-01-26', '2024-01-27', '2024-01-28']


# Load Space-Track TLE data from 2024-01-28

In [6]:
df = pd.read_csv('../datasets/space-track_2024-01-28.csv', memory_map=True)
df.head()

  df = pd.read_csv('../datasets/space-track_2024-01-28.csv', memory_map=True)


Unnamed: 0,CCSDS_OMM_VERS,COMMENT,CREATION_DATE,ORIGINATOR,OBJECT_NAME,OBJECT_ID,CENTER_NAME,REF_FRAME,TIME_SYSTEM,MEAN_ELEMENT_THEORY,...,RCS_SIZE,COUNTRY_CODE,LAUNCH_DATE,SITE,DECAY_DATE,FILE,GP_ID,TLE_LINE0,TLE_LINE1,TLE_LINE2
0,2.0,GENERATED VIA SPACE-TRACK.ORG API,2024-01-28T06:26:18,18 SPCS,STARLINK-1550,2020-062AS,EARTH,TEME,UTC,SGP4,...,LARGE,US,2020-09-03,AFETR,,4173044,247965759,0 STARLINK-1550,1 46365U 20062AS 24028.00001157 .00109337 0...,2 46365 53.0463 156.6338 0002343 254.9883 169...
1,2.0,GENERATED VIA SPACE-TRACK.ORG API,2024-01-28T06:16:18,18 SPCS,STARLINK-30875,2023-170U,EARTH,TEME,UTC,SGP4,...,LARGE,US,2023-11-03,AFETR,,4173001,247965588,0 STARLINK-30875,1 58224U 23170U 24028.00001157 .00051085 0...,2 58224 43.0021 68.9109 0000996 258.4165 289...
2,2.0,GENERATED VIA SPACE-TRACK.ORG API,2024-01-28T07:06:19,18 SPCS,STARLINK-30852,2023-177Y,EARTH,TEME,UTC,SGP4,...,LARGE,US,2023-11-18,AFETR,,4173264,247971200,0 STARLINK-30852,1 58373U 23177Y 24028.00001157 .00070107 0...,2 58373 42.9990 225.0487 0001687 266.2201 308...
3,2.0,GENERATED VIA SPACE-TRACK.ORG API,2024-01-28T06:56:19,18 SPCS,STARLINK-31104,2023-211F,EARTH,TEME,UTC,SGP4,...,LARGE,US,2023-12-29,AFETR,,4173188,247970554,0 STARLINK-31104,1 58672U 23211F 24028.00001157 -.00023704 0...,2 58672 43.0002 116.5404 0001384 268.8440 113...
4,2.0,GENERATED VIA SPACE-TRACK.ORG API,2024-01-28T17:53:48,18 SPCS,COSMOS 1857,1987-051F,EARTH,TEME,UTC,SGP4,...,MEDIUM,CIS,1987-06-16,PKMTR,,4173554,247983793,0 COSMOS 1857,1 18118U 87051F 24028.00001326 .00000000 0...,2 18118 74.0014 240.7666 0037903 43.2836 317...


## Drop unnecessary columns

In [7]:
df.columns

Index(['CCSDS_OMM_VERS', 'COMMENT', 'CREATION_DATE', 'ORIGINATOR',
       'OBJECT_NAME', 'OBJECT_ID', 'CENTER_NAME', 'REF_FRAME', 'TIME_SYSTEM',
       'MEAN_ELEMENT_THEORY', 'EPOCH', 'MEAN_MOTION', 'ECCENTRICITY',
       'INCLINATION', 'RA_OF_ASC_NODE', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY',
       'EPHEMERIS_TYPE', 'CLASSIFICATION_TYPE', 'NORAD_CAT_ID',
       'ELEMENT_SET_NO', 'REV_AT_EPOCH', 'BSTAR', 'MEAN_MOTION_DOT',
       'MEAN_MOTION_DDOT', 'SEMIMAJOR_AXIS', 'PERIOD', 'APOAPSIS', 'PERIAPSIS',
       'OBJECT_TYPE', 'RCS_SIZE', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE',
       'DECAY_DATE', 'FILE', 'GP_ID', 'TLE_LINE0', 'TLE_LINE1', 'TLE_LINE2'],
      dtype='object')

In [8]:
df = df.drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)
df.head()

Unnamed: 0,CREATION_DATE,OBJECT_NAME,OBJECT_ID,CENTER_NAME,REF_FRAME,TIME_SYSTEM,MEAN_ELEMENT_THEORY,EPOCH,MEAN_MOTION,ECCENTRICITY,...,MEAN_MOTION_DDOT,SEMIMAJOR_AXIS,PERIOD,APOAPSIS,PERIAPSIS,OBJECT_TYPE,RCS_SIZE,DECAY_DATE,TLE_LINE1,TLE_LINE2
0,2024-01-28T06:26:18,STARLINK-1550,2020-062AS,EARTH,TEME,UTC,SGP4,2024-01-28T00:00:00.999648,15.700039,0.000234,...,0.0,6737.023,91.72,360.466,357.309,PAYLOAD,LARGE,,1 46365U 20062AS 24028.00001157 .00109337 0...,2 46365 53.0463 156.6338 0002343 254.9883 169...
1,2024-01-28T06:16:18,STARLINK-30875,2023-170U,EARTH,TEME,UTC,SGP4,2024-01-28T00:00:00.999648,15.025113,0.0001,...,0.0,6937.292,95.84,559.848,558.466,PAYLOAD,LARGE,,1 58224U 23170U 24028.00001157 .00051085 0...,2 58224 43.0021 68.9109 0000996 258.4165 289...
2,2024-01-28T07:06:19,STARLINK-30852,2023-177Y,EARTH,TEME,UTC,SGP4,2024-01-28T00:00:00.999648,15.044861,0.000169,...,0.0,6931.221,95.714,554.255,551.916,PAYLOAD,LARGE,,1 58373U 23177Y 24028.00001157 .00070107 0...,2 58373 42.9990 225.0487 0001687 266.2201 308...
3,2024-01-28T06:56:19,STARLINK-31104,2023-211F,EARTH,TEME,UTC,SGP4,2024-01-28T00:00:00.999648,15.259172,0.000138,...,0.0,6866.17,94.369,488.985,487.085,PAYLOAD,LARGE,,1 58672U 23211F 24028.00001157 -.00023704 0...,2 58672 43.0002 116.5404 0001384 268.8440 113...
4,2024-01-28T17:53:48,COSMOS 1857,1987-051F,EARTH,TEME,UTC,SGP4,2024-01-28T00:00:01.145664,12.542506,0.00379,...,0.0,7824.903,114.81,1476.427,1417.109,PAYLOAD,MEDIUM,,1 18118U 87051F 24028.00001326 .00000000 0...,2 18118 74.0014 240.7666 0037903 43.2836 317...


## Split EPOCH columns into EPOCH_DATE and EPOCH_TIME columns

In [9]:
df[['EPOCH_DATE', 'EPOCH_TIME']] = df['EPOCH'].str.split('T', n=1, expand=True)
df = df.drop(['EPOCH'], axis=1)
print(len(df.columns))
df.columns

31


Index(['CREATION_DATE', 'OBJECT_NAME', 'OBJECT_ID', 'CENTER_NAME', 'REF_FRAME',
       'TIME_SYSTEM', 'MEAN_ELEMENT_THEORY', 'MEAN_MOTION', 'ECCENTRICITY',
       'INCLINATION', 'RA_OF_ASC_NODE', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY',
       'EPHEMERIS_TYPE', 'CLASSIFICATION_TYPE', 'NORAD_CAT_ID', 'REV_AT_EPOCH',
       'BSTAR', 'MEAN_MOTION_DOT', 'MEAN_MOTION_DDOT', 'SEMIMAJOR_AXIS',
       'PERIOD', 'APOAPSIS', 'PERIAPSIS', 'OBJECT_TYPE', 'RCS_SIZE',
       'DECAY_DATE', 'TLE_LINE1', 'TLE_LINE2', 'EPOCH_DATE', 'EPOCH_TIME'],
      dtype='object')

## Change column order

In [10]:
columns = list(df.columns)

first_col_idx = columns.index('NORAD_CAT_ID')
second_col_idx = columns.index('OBJECT_NAME')
third_col_idx = columns.index('OBJECT_ID')
forth_col_idx = columns.index('DECAY_DATE')
fifth_col_idx = columns.index('EPOCH_DATE')
sixth_col_idx = columns.index('EPOCH_TIME')
indices = [first_col_idx, second_col_idx, third_col_idx, forth_col_idx, fifth_col_idx, sixth_col_idx]
indices.sort()
col_order = [columns[first_col_idx]] + [columns[second_col_idx]] + [columns[third_col_idx]] + [columns[forth_col_idx]] + [columns[fifth_col_idx]] + [columns[sixth_col_idx]]
col_order += columns[:indices[0]] + columns[indices[1]+1:indices[2]] + columns[indices[2]+1:indices[3]] + columns[indices[3]+1:indices[4]] + columns[indices[4]+1:indices[5]] + columns[indices[5]+1:]
print(len(col_order))
col_order

31


['NORAD_CAT_ID',
 'OBJECT_NAME',
 'OBJECT_ID',
 'DECAY_DATE',
 'EPOCH_DATE',
 'EPOCH_TIME',
 'CREATION_DATE',
 'CENTER_NAME',
 'REF_FRAME',
 'TIME_SYSTEM',
 'MEAN_ELEMENT_THEORY',
 'MEAN_MOTION',
 'ECCENTRICITY',
 'INCLINATION',
 'RA_OF_ASC_NODE',
 'ARG_OF_PERICENTER',
 'MEAN_ANOMALY',
 'EPHEMERIS_TYPE',
 'CLASSIFICATION_TYPE',
 'REV_AT_EPOCH',
 'BSTAR',
 'MEAN_MOTION_DOT',
 'MEAN_MOTION_DDOT',
 'SEMIMAJOR_AXIS',
 'PERIOD',
 'APOAPSIS',
 'PERIAPSIS',
 'OBJECT_TYPE',
 'RCS_SIZE',
 'TLE_LINE1',
 'TLE_LINE2']

In [11]:
df = df[col_order]
print(len(df.columns))
df.columns

31


Index(['NORAD_CAT_ID', 'OBJECT_NAME', 'OBJECT_ID', 'DECAY_DATE', 'EPOCH_DATE',
       'EPOCH_TIME', 'CREATION_DATE', 'CENTER_NAME', 'REF_FRAME',
       'TIME_SYSTEM', 'MEAN_ELEMENT_THEORY', 'MEAN_MOTION', 'ECCENTRICITY',
       'INCLINATION', 'RA_OF_ASC_NODE', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY',
       'EPHEMERIS_TYPE', 'CLASSIFICATION_TYPE', 'REV_AT_EPOCH', 'BSTAR',
       'MEAN_MOTION_DOT', 'MEAN_MOTION_DDOT', 'SEMIMAJOR_AXIS', 'PERIOD',
       'APOAPSIS', 'PERIAPSIS', 'OBJECT_TYPE', 'RCS_SIZE', 'TLE_LINE1',
       'TLE_LINE2'],
      dtype='object')

# Load Space-Track TLEs from 2023-12-28 to 2024-01-28 and store them in a dataframe

In [6]:
def tle_to_sv(row):
    tle = TLE(row['TLE_LINE1'], row['TLE_LINE2'])
    propagator = TLEPropagator.selectExtrapolator(tle)
    state = propagator.getInitialState()
    pv = state.getPVCoordinates()
    return pv

def sv_df_from(spacetrack_df):
    sv_df = spacetrack_df[
        ['NORAD_CAT_ID', 'OBJECT_NAME', 'OBJECT_ID', 'DECAY_DATE', 'EPOCH_DATE', 'EPOCH_TIME', 'CENTER_NAME',
         'REF_FRAME', 'TIME_SYSTEM', 'OBJECT_TYPE', 'RCS_SIZE']].copy()

    sv_series = spacetrack_df[['TLE_LINE1', 'TLE_LINE2']].apply(tle_to_sv, axis=1)
    sv_df['PX'] = sv_series.apply(lambda sv: sv.getPosition().x / 1000)  # in km
    sv_df['PY'] = sv_series.apply(lambda sv: sv.getPosition().y / 1000)
    sv_df['PZ'] = sv_series.apply(lambda sv: sv.getPosition().z / 1000)
    sv_df['ALTITUDE'] = np.sqrt(sv_df['PX'] ** 2 + sv_df['PY'] ** 2 + sv_df['PZ'] ** 2) - (
                Constants.WGS84_EARTH_EQUATORIAL_RADIUS / 1000)
    
    sv_df['VX'] = sv_series.apply(lambda sv: sv.getVelocity().x / 1000)  # in km/s
    sv_df['VY'] = sv_series.apply(lambda sv: sv.getVelocity().y / 1000)
    sv_df['VZ'] = sv_series.apply(lambda sv: sv.getVelocity().z / 1000)
    sv_df['VELOCITY_NORM'] = np.sqrt(sv_df['VX'] ** 2 + sv_df['VY'] ** 2 + sv_df['VZ'] ** 2)
    return sv_df

def csv_to_dataframe(dates, ids, col_order, col_check_duplicates):
    spacetrack_df = pd.DataFrame()
    spacetrack_sv_df = pd.DataFrame()
    for date in tqdm(dates):
        # Load csv and drop unncessary columns
        tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)
        
        # Split EPOCH columns into EPOCH_DATE and EPOCH_TIME columns
        tmp[['EPOCH_DATE', 'EPOCH_TIME']] = tmp['EPOCH'].str.split('T', n=1, expand=True)
        tmp = tmp.drop(['EPOCH'], axis=1)
        
        # Change column order
        tmp = tmp[col_order]
        
        # Filter tmp to have only the space objects info available on 2024-01-28
        tmp = tmp[tmp['NORAD_CAT_ID'].isin(ids)]
        
        # Drop duplicate lines. Lines with equal NORAD_CAT_ID, EPOCH_DATE and EPOCH_TIME
        tmp.drop_duplicates(subset=col_check_duplicates, inplace=True, ignore_index=True)
        
        # Build complementary State Vector dataframe
        tmp_sv = sv_df_from(tmp)
        
        print(f'Concatenating {date} TLEs\nNumber of lines: {tmp.shape[0]}\n\n')
        
        # Concat the temporary dataframe to the Space-Track dataset
        spacetrack_df = pd.concat([spacetrack_df, tmp], ignore_index=True)
        spacetrack_sv_df = pd.concat([spacetrack_sv_df, tmp_sv], ignore_index=True)
        
        
    print(f'Space-Track dataset built.\nNumber of lines: {spacetrack_df.shape[0]}\n')
    return spacetrack_df, spacetrack_sv_df

In [9]:
spacetrack_df, spacetrack_sv_df = csv_to_dataframe(dates, df['NORAD_CAT_ID'].unique(), col_order, ['NORAD_CAT_ID', 'EPOCH_DATE', 'EPOCH_TIME'])
spacetrack_df.head()

Period dates: ['2023-12-28', '2023-12-29', '2023-12-30', '2023-12-31', '2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05', '2024-01-06', '2024-01-07', '2024-01-08', '2024-01-09', '2024-01-10', '2024-01-11', '2024-01-12', '2024-01-13', '2024-01-14', '2024-01-15', '2024-01-16', '2024-01-17', '2024-01-18', '2024-01-19', '2024-01-20', '2024-01-21', '2024-01-22', '2024-01-23', '2024-01-24', '2024-01-25', '2024-01-26', '2024-01-27', '2024-01-28']


  0%|          | 0/32 [00:00<?, ?it/s]

Concatenating 2023-12-28 TLEs
Number of lines: 39692


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2023-12-29 TLEs
Number of lines: 40111


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2023-12-30 TLEs
Number of lines: 42433


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2023-12-31 TLEs
Number of lines: 31133


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-01 TLEs
Number of lines: 42104


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-02 TLEs
Number of lines: 42583


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-03 TLEs
Number of lines: 40392

Concatenating 2024-01-04 TLEs
Number of lines: 42904


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-05 TLEs
Number of lines: 42278


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-06 TLEs
Number of lines: 42436


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-07 TLEs
Number of lines: 43031


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-08 TLEs
Number of lines: 39869


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-09 TLEs
Number of lines: 42321

Concatenating 2024-01-10 TLEs
Number of lines: 30994


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-11 TLEs
Number of lines: 43257

Concatenating 2024-01-12 TLEs
Number of lines: 43468


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-13 TLEs
Number of lines: 32282

Concatenating 2024-01-14 TLEs
Number of lines: 42874


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-15 TLEs
Number of lines: 42022


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-16 TLEs
Number of lines: 43664


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-17 TLEs
Number of lines: 39389


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-18 TLEs
Number of lines: 42351


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-19 TLEs
Number of lines: 43823


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-20 TLEs
Number of lines: 44083


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-21 TLEs
Number of lines: 43122

Concatenating 2024-01-22 TLEs
Number of lines: 44251


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-23 TLEs
Number of lines: 36996


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-24 TLEs
Number of lines: 37410


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-25 TLEs
Number of lines: 39403

Concatenating 2024-01-26 TLEs
Number of lines: 29635


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-27 TLEs
Number of lines: 41409


  tmp = pd.read_csv(f'../datasets/space-track_{date}.csv',  memory_map=True).drop(['CCSDS_OMM_VERS', 'COMMENT', 'ORIGINATOR', 'COUNTRY_CODE', 'LAUNCH_DATE', 'SITE', 'FILE', 'GP_ID', 'ELEMENT_SET_NO', 'TLE_LINE0'], axis=1)


Concatenating 2024-01-28 TLEs
Number of lines: 45056


Space-Track dataset built.
Number of lines: 1296776


Unnamed: 0,NORAD_CAT_ID,OBJECT_NAME,OBJECT_ID,DECAY_DATE,EPOCH_DATE,EPOCH_TIME,CREATION_DATE,CENTER_NAME,REF_FRAME,TIME_SYSTEM,...,MEAN_MOTION_DOT,MEAN_MOTION_DDOT,SEMIMAJOR_AXIS,PERIOD,APOAPSIS,PERIAPSIS,OBJECT_TYPE,RCS_SIZE,TLE_LINE1,TLE_LINE2
0,45102,STARLINK-1195,2020-006BL,,2023-12-28,00:00:00.999648,2023-12-28T06:46:17,EARTH,TEME,UTC,...,0.001659,0.0,6711.059,91.19,333.568,332.281,PAYLOAD,LARGE,1 45102U 20006BL 23362.00001157 .00165925 0...,2 45102 53.0456 223.3367 0000959 49.0274 96...
1,56899,STARLINK-6203,2023-083Z,,2023-12-28,00:00:00.999648,2023-12-28T06:26:17,EARTH,TEME,UTC,...,-0.00014,0.0,6937.217,95.838,560.049,558.115,PAYLOAD,LARGE,1 56899U 23083Z 23362.00001157 -.00013968 0...,2 56899 43.0034 123.0158 0001394 272.0222 283...
2,58063,STARLINK-30585,2023-158N,,2023-12-28,00:00:00.999648,2023-12-28T06:16:17,EARTH,TEME,UTC,...,-0.012126,0.0,6934.593,95.784,557.373,555.544,PAYLOAD,LARGE,1 58063U 23158N 23362.00001157 -.01212631 0...,2 58063 43.0014 71.6139 0001319 262.3570 310...
3,58515,STARLINK-31017,2023-191H,,2023-12-28,00:00:00.999648,2023-12-28T06:46:17,EARTH,TEME,UTC,...,0.001205,0.0,6761.643,92.223,384.581,382.435,PAYLOAD,LARGE,1 58515U 23191H 23362.00001157 .00120477 0...,2 58515 43.0004 162.7285 0001587 278.9590 219...
4,58515,STARLINK-31017,2023-191H,,2023-12-28,00:00:00.999648,2023-12-28T18:10:27,EARTH,TEME,UTC,...,0.001205,0.0,6761.643,92.223,384.581,382.435,PAYLOAD,LARGE,1 58515U 23191H 23362.00001157 .00120477 0...,2 58515 43.0004 162.7285 0001587 278.9590 219...


In [10]:
spacetrack_df.describe()

Unnamed: 0,NORAD_CAT_ID,MEAN_MOTION,ECCENTRICITY,INCLINATION,RA_OF_ASC_NODE,ARG_OF_PERICENTER,MEAN_ANOMALY,EPHEMERIS_TYPE,REV_AT_EPOCH,BSTAR,MEAN_MOTION_DOT,MEAN_MOTION_DDOT,SEMIMAJOR_AXIS,PERIOD,APOAPSIS,PERIAPSIS
count,1296776.0,1296776.0,1296776.0,1296776.0,1296776.0,1296776.0,1296776.0,1296776.0,1296776.0,1296776.0,1296776.0,1296776.0,1296776.0,1296776.0,1296776.0,1296776.0
mean,39451.61,14.46251,0.005839052,74.81927,162.9612,166.4653,193.6323,0.0,29250.69,0.0005521824,2.594924e-05,6.058023e-07,7131.33,99.95608,796.8572,709.5331
std,17035.15,0.8689821,0.01579416,21.85046,104.8953,97.8414,98.465,0.0,29022.21,0.01456273,0.001541225,0.0001192431,304.2178,6.478958,373.4517,275.3659
min,11.0,11.25018,4e-07,6.7313,0.0001,0.0002,0.0006,0.0,1.0,-1.005,-0.02960601,-1.2786e-05,6520.091,87.325,147.723,130.781
25%,26847.0,14.01164,0.0001533,53.1606,73.2591,88.2978,97.8934,0.0,6307.0,8.1289e-05,1.46e-06,0.0,6917.949,95.439,547.765,538.774
50%,45785.0,14.87345,0.0008318,74.0965,148.1988,135.587,225.0643,0.0,15929.0,0.00027979,1.318e-05,0.0,6984.373,96.817,626.3595,570.213
75%,53878.0,15.08818,0.0043929,97.6542,255.2681,262.1465,272.0713,0.0,49416.0,0.00087414,7.068e-05,0.0,7267.905,102.772,956.7795,819.5412
max,58848.0,16.49006,0.2235296,144.6443,359.9996,359.9999,359.9992,0.0,99999.0,0.91568,0.668681,0.068213,8413.219,127.998,3907.909,2020.608


In [11]:
spacetrack_df.to_csv("../datasets/space-track-dataset.csv", index=False)

In [None]:
spacetrack_sv_df.head()

In [None]:
spacetrack_sv_df.describe()

In [None]:
spacetrack_sv_df.to_csv("../datasets/space-track-dataset-sv.csv", index=False)

# Build complementary Space-Track dataset with TLEs converted to State Vectors

In [18]:
spacetrack_df = pd.read_csv('../datasets/space-track-dataset.csv', memory_map=True)
spacetrack_df.head()

  spacetrack_df = pd.read_csv('../datasets/space-track-dataset.csv', memory_map=True)


Unnamed: 0,NORAD_CAT_ID,OBJECT_NAME,OBJECT_ID,DECAY_DATE,EPOCH_DATE,EPOCH_TIME,CREATION_DATE,CENTER_NAME,REF_FRAME,TIME_SYSTEM,...,MEAN_MOTION_DOT,MEAN_MOTION_DDOT,SEMIMAJOR_AXIS,PERIOD,APOAPSIS,PERIAPSIS,OBJECT_TYPE,RCS_SIZE,TLE_LINE1,TLE_LINE2
0,45102,STARLINK-1195,2020-006BL,,2023-12-28,00:00:00.999648,2023-12-28T06:46:17,EARTH,TEME,UTC,...,0.001659,0.0,6711.059,91.19,333.568,332.281,PAYLOAD,LARGE,1 45102U 20006BL 23362.00001157 .00165925 0...,2 45102 53.0456 223.3367 0000959 49.0274 96...
1,56899,STARLINK-6203,2023-083Z,,2023-12-28,00:00:00.999648,2023-12-28T06:26:17,EARTH,TEME,UTC,...,-0.00014,0.0,6937.217,95.838,560.049,558.115,PAYLOAD,LARGE,1 56899U 23083Z 23362.00001157 -.00013968 0...,2 56899 43.0034 123.0158 0001394 272.0222 283...
2,58063,STARLINK-30585,2023-158N,,2023-12-28,00:00:00.999648,2023-12-28T06:16:17,EARTH,TEME,UTC,...,-0.012126,0.0,6934.593,95.784,557.373,555.544,PAYLOAD,LARGE,1 58063U 23158N 23362.00001157 -.01212631 0...,2 58063 43.0014 71.6139 0001319 262.3570 310...
3,58515,STARLINK-31017,2023-191H,,2023-12-28,00:00:00.999648,2023-12-28T06:46:17,EARTH,TEME,UTC,...,0.001205,0.0,6761.643,92.223,384.581,382.435,PAYLOAD,LARGE,1 58515U 23191H 23362.00001157 .00120477 0...,2 58515 43.0004 162.7285 0001587 278.9590 219...
4,17725,COSMOS 374 DEB *,1970-089DC,,2023-12-28,00:00:05.080320,2023-12-28T18:10:27,EARTH,TEME,UTC,...,1.2e-05,0.0,7521.696,108.202,1736.43,550.691,DEBRIS,SMALL,1 17725U 70089DC 23362.00005880 .00001185 0...,2 17725 62.7445 204.8063 0788212 345.5877 80...


In [19]:
sv_df = spacetrack_df[['NORAD_CAT_ID', 'OBJECT_NAME', 'OBJECT_ID', 'DECAY_DATE', 'EPOCH_DATE', 'EPOCH_TIME', 'CENTER_NAME', 'REF_FRAME', 'TIME_SYSTEM', 'OBJECT_TYPE', 'RCS_SIZE']].copy()

In [22]:
sv_series = spacetrack_df[['TLE_LINE1', 'TLE_LINE2']].apply(tle_to_sv, axis=1)
sv_df['PX'] =  sv_series.apply(lambda sv: sv.getPosition().x / 1000) # in km
sv_df['PY'] =  sv_series.apply(lambda sv: sv.getPosition().y / 1000)
sv_df['PZ'] =  sv_series.apply(lambda sv: sv.getPosition().z / 1000)
sv_df['ALTITUDE'] = np.sqrt(sv_df['PX']**2 + sv_df['PY']**2 + sv_df['PZ']**2) - (Constants.WGS84_EARTH_EQUATORIAL_RADIUS / 1000)

sv_df['VX'] =  sv_series.apply(lambda sv: sv.getVelocity().x / 1000) # in km/s
sv_df['VY'] =  sv_series.apply(lambda sv: sv.getVelocity().y / 1000)
sv_df['VZ'] =  sv_series.apply(lambda sv: sv.getVelocity().z / 1000)
sv_df['VELOCITY_NORM'] = np.sqrt(sv_df['VX']**2 + sv_df['VY']**2 + sv_df['VZ']**2)

In [23]:
sv_df.columns

Index(['NORAD_CAT_ID', 'OBJECT_NAME', 'OBJECT_ID', 'DECAY_DATE', 'EPOCH_DATE',
       'EPOCH_TIME', 'CENTER_NAME', 'REF_FRAME', 'TIME_SYSTEM', 'OBJECT_TYPE',
       'RCS_SIZE', 'PX', 'PY', 'PZ', 'ALTITUDE', 'VX', 'VY', 'VZ',
       'VELOCITY_NORM'],
      dtype='object')

In [24]:
print(f'Number of lines: {sv_df.shape[0]}')
sv_df.head()

Number of lines: 1068861


Unnamed: 0,NORAD_CAT_ID,OBJECT_NAME,OBJECT_ID,DECAY_DATE,EPOCH_DATE,EPOCH_TIME,CENTER_NAME,REF_FRAME,TIME_SYSTEM,OBJECT_TYPE,RCS_SIZE,PX,PY,PZ,ALTITUDE,VX,VY,VZ,VELOCITY_NORM
0,45102,STARLINK-1195,2020-006BL,,2023-12-28,00:00:00.999648,EARTH,TEME,UTC,PAYLOAD,LARGE,5593.08996,2174.212241,2998.003175,329.908706,0.509016,5.75724,-5.105527,7.711765
1,56899,STARLINK-6203,2023-083Z,,2023-12-28,00:00:00.999648,EARTH,TEME,UTC,PAYLOAD,LARGE,4783.321895,-4862.754078,-1268.752433,559.894299,3.372156,4.612576,-4.984458,7.582355
2,58063,STARLINK-30585,2023-158N,,2023-12-28,00:00:00.999648,EARTH,TEME,UTC,PAYLOAD,LARGE,792.022048,-6390.032459,-2578.743598,557.980956,5.713646,2.455746,-4.336949,7.581924
3,58515,STARLINK-31017,2023-191H,,2023-12-28,00:00:00.999648,EARTH,TEME,UTC,PAYLOAD,LARGE,3816.860969,-4648.969267,3080.819677,380.027898,6.149002,2.459398,-3.895316,7.683251
4,17725,COSMOS 374 DEB *,1970-089DC,,2023-12-28,00:00:05.080320,EARTH,TEME,UTC,DEBRIS,SMALL,-345.566266,-3801.94701,6414.006665,1086.023945,6.747001,1.896311,2.15417,7.332017


In [25]:
sv_df.to_csv("../datasets/space-track-dataset-sv.csv", index=False)

# Sample reduced Space-Track dataset following no criteria

In [8]:
spacetrack_df = pd.read_csv('../datasets/space-track-dataset.csv', memory_map=True)
spacetrack_df.head()

  spacetrack_df = pd.read_csv('../datasets/space-track-dataset.csv', memory_map=True)


Unnamed: 0,NORAD_CAT_ID,OBJECT_NAME,OBJECT_ID,DECAY_DATE,EPOCH_DATE,EPOCH_TIME,CREATION_DATE,CENTER_NAME,REF_FRAME,TIME_SYSTEM,...,MEAN_MOTION_DOT,MEAN_MOTION_DDOT,SEMIMAJOR_AXIS,PERIOD,APOAPSIS,PERIAPSIS,OBJECT_TYPE,RCS_SIZE,TLE_LINE1,TLE_LINE2
0,45102,STARLINK-1195,2020-006BL,,2023-12-28,00:00:00.999648,2023-12-28T06:46:17,EARTH,TEME,UTC,...,0.001659,0.0,6711.059,91.19,333.568,332.281,PAYLOAD,LARGE,1 45102U 20006BL 23362.00001157 .00165925 0...,2 45102 53.0456 223.3367 0000959 49.0274 96...
1,56899,STARLINK-6203,2023-083Z,,2023-12-28,00:00:00.999648,2023-12-28T06:26:17,EARTH,TEME,UTC,...,-0.00014,0.0,6937.217,95.838,560.049,558.115,PAYLOAD,LARGE,1 56899U 23083Z 23362.00001157 -.00013968 0...,2 56899 43.0034 123.0158 0001394 272.0222 283...
2,58063,STARLINK-30585,2023-158N,,2023-12-28,00:00:00.999648,2023-12-28T06:16:17,EARTH,TEME,UTC,...,-0.012126,0.0,6934.593,95.784,557.373,555.544,PAYLOAD,LARGE,1 58063U 23158N 23362.00001157 -.01212631 0...,2 58063 43.0014 71.6139 0001319 262.3570 310...
3,58515,STARLINK-31017,2023-191H,,2023-12-28,00:00:00.999648,2023-12-28T06:46:17,EARTH,TEME,UTC,...,0.001205,0.0,6761.643,92.223,384.581,382.435,PAYLOAD,LARGE,1 58515U 23191H 23362.00001157 .00120477 0...,2 58515 43.0004 162.7285 0001587 278.9590 219...
4,17725,COSMOS 374 DEB *,1970-089DC,,2023-12-28,00:00:05.080320,2023-12-28T18:10:27,EARTH,TEME,UTC,...,1.2e-05,0.0,7521.696,108.202,1736.43,550.691,DEBRIS,SMALL,1 17725U 70089DC 23362.00005880 .00001185 0...,2 17725 62.7445 204.8063 0788212 345.5877 80...


In [9]:
def reduced_dataset(dates, dataset, frac):
    reduced_df = pd.DataFrame()
    for date in tqdm(dates):
        df_date = dataset[dataset['EPOCH_DATE'] == date].sample(frac=frac, random_state=7, ignore_index=True)
        df_date.sort_values(by=['EPOCH_DATE', 'EPOCH_TIME', 'NORAD_CAT_ID'], ignore_index=True, inplace=True)
        reduced_df = pd.concat([reduced_df, df_date], ignore_index=True)
    return reduced_df

In [10]:
frac = 0.25
reduced_df = reduced_dataset(dates, spacetrack_df, frac)
print(f'Number of lines: {reduced_df.shape[0]}')
reduced_df.head()

  0%|          | 0/32 [00:00<?, ?it/s]

Number of lines: 267216


Unnamed: 0,NORAD_CAT_ID,OBJECT_NAME,OBJECT_ID,DECAY_DATE,EPOCH_DATE,EPOCH_TIME,CREATION_DATE,CENTER_NAME,REF_FRAME,TIME_SYSTEM,...,MEAN_MOTION_DOT,MEAN_MOTION_DDOT,SEMIMAJOR_AXIS,PERIOD,APOAPSIS,PERIAPSIS,OBJECT_TYPE,RCS_SIZE,TLE_LINE1,TLE_LINE2
0,58063,STARLINK-30585,2023-158N,,2023-12-28,00:00:00.999648,2023-12-28T06:16:17,EARTH,TEME,UTC,...,-0.01212631,0.0,6934.593,95.784,557.373,555.544,PAYLOAD,LARGE,1 58063U 23158N 23362.00001157 -.01212631 0...,2 58063 43.0014 71.6139 0001319 262.3570 310...
1,45739,STARLINK-1475,2020-038K,,2023-12-28,00:01:55.626528,2023-12-28T18:10:27,EARTH,TEME,UTC,...,8.73e-06,0.0,6925.347,95.592,548.089,546.334,PAYLOAD,LARGE,1 45739U 20038K 23362.00133827 .00000873 0...,2 45739 53.0535 200.0197 0001267 97.3550 262...
2,45227,STARLINK-1221,2020-012BB,,2023-12-28,00:01:58.804320,2023-12-28T06:26:17,EARTH,TEME,UTC,...,1.595e-05,0.0,6925.4,95.593,548.114,546.416,PAYLOAD,LARGE,1 45227U 20012BB 23362.00137505 .00001595 0...,2 45227 53.0556 10.0261 0001226 101.0635 259...
3,27609,TRAILBLAZER 2,2002-058E,,2023-12-28,00:02:24.040032,2023-12-28T06:16:17,EARTH,TEME,UTC,...,6.3e-06,0.0,7009.008,97.33,638.15,623.597,PAYLOAD,LARGE,1 27609U 02058E 23362.00166713 .00000630 0...,2 27609 64.5552 236.5835 0010382 181.6066 178...
4,14879,THORAD DELTA 1 DEB,1974-089ES,,2023-12-28,00:02:43.493856,2023-12-28T18:10:27,EARTH,TEME,UTC,...,5.3e-07,0.0,7990.496,118.473,1783.498,1441.225,DEBRIS,SMALL,1 14879U 74089ES 23362.00189229 .00000053 0...,2 14879 101.2066 12.9018 0214175 298.1690 85...


In [11]:
print(reduced_df['EPOCH_DATE'].unique())
np.all((reduced_df['EPOCH_DATE'].unique() == dates))

['2023-12-28' '2023-12-29' '2023-12-30' '2023-12-31' '2024-01-01'
 '2024-01-02' '2024-01-03' '2024-01-04' '2024-01-05' '2024-01-06'
 '2024-01-07' '2024-01-08' '2024-01-09' '2024-01-10' '2024-01-11'
 '2024-01-12' '2024-01-13' '2024-01-14' '2024-01-15' '2024-01-16'
 '2024-01-17' '2024-01-18' '2024-01-19' '2024-01-20' '2024-01-21'
 '2024-01-22' '2024-01-23' '2024-01-24' '2024-01-25' '2024-01-26'
 '2024-01-27' '2024-01-28']


True

In [12]:
reduced_df.to_csv(f"../datasets/space-track-dataset-reduced-{int(frac*100)}.csv", index=False)

# Sample reduced Space-Track dataset based on periapsis (perigee) or altitude and eccentricity

In [7]:
spacetrack_df = pd.read_csv('../datasets/space-track-dataset.csv', memory_map=True)
spacetrack_df.head()

  spacetrack_df = pd.read_csv('../datasets/space-track-dataset.csv', memory_map=True)


Unnamed: 0,NORAD_CAT_ID,OBJECT_NAME,OBJECT_ID,DECAY_DATE,EPOCH_DATE,EPOCH_TIME,CREATION_DATE,CENTER_NAME,REF_FRAME,TIME_SYSTEM,...,MEAN_MOTION_DOT,MEAN_MOTION_DDOT,SEMIMAJOR_AXIS,PERIOD,APOAPSIS,PERIAPSIS,OBJECT_TYPE,RCS_SIZE,TLE_LINE1,TLE_LINE2
0,45102,STARLINK-1195,2020-006BL,,2023-12-28,00:00:00.999648,2023-12-28T06:46:17,EARTH,TEME,UTC,...,0.001659,0.0,6711.059,91.19,333.568,332.281,PAYLOAD,LARGE,1 45102U 20006BL 23362.00001157 .00165925 0...,2 45102 53.0456 223.3367 0000959 49.0274 96...
1,56899,STARLINK-6203,2023-083Z,,2023-12-28,00:00:00.999648,2023-12-28T06:26:17,EARTH,TEME,UTC,...,-0.00014,0.0,6937.217,95.838,560.049,558.115,PAYLOAD,LARGE,1 56899U 23083Z 23362.00001157 -.00013968 0...,2 56899 43.0034 123.0158 0001394 272.0222 283...
2,58063,STARLINK-30585,2023-158N,,2023-12-28,00:00:00.999648,2023-12-28T06:16:17,EARTH,TEME,UTC,...,-0.012126,0.0,6934.593,95.784,557.373,555.544,PAYLOAD,LARGE,1 58063U 23158N 23362.00001157 -.01212631 0...,2 58063 43.0014 71.6139 0001319 262.3570 310...
3,58515,STARLINK-31017,2023-191H,,2023-12-28,00:00:00.999648,2023-12-28T06:46:17,EARTH,TEME,UTC,...,0.001205,0.0,6761.643,92.223,384.581,382.435,PAYLOAD,LARGE,1 58515U 23191H 23362.00001157 .00120477 0...,2 58515 43.0004 162.7285 0001587 278.9590 219...
4,17725,COSMOS 374 DEB *,1970-089DC,,2023-12-28,00:00:05.080320,2023-12-28T18:10:27,EARTH,TEME,UTC,...,1.2e-05,0.0,7521.696,108.202,1736.43,550.691,DEBRIS,SMALL,1 17725U 70089DC 23362.00005880 .00001185 0...,2 17725 62.7445 204.8063 0788212 345.5877 80...


In [8]:
use_altitude = False
col = ''
if use_altitude:
    col = 'ALTITUDE'
else:
    col = 'PERIAPSIS'
spacetrack_alt_df = pd.read_csv('../datasets/space-track-dataset-sv.csv', usecols=['NORAD_CAT_ID','EPOCH_DATE', 'EPOCH_TIME', 'ALTITUDE'], memory_map=True)
spacetrack_alt_df.head()

Unnamed: 0,NORAD_CAT_ID,EPOCH_DATE,EPOCH_TIME,ALTITUDE
0,45102,2023-12-28,00:00:00.999648,329.908706
1,56899,2023-12-28,00:00:00.999648,559.894299
2,58063,2023-12-28,00:00:00.999648,557.980956
3,58515,2023-12-28,00:00:00.999648,380.027898
4,17725,2023-12-28,00:00:05.080320,1086.023945


In [25]:
if use_altitude:
    spacetrack_alt_df['ALTITUDE'].describe()

count    1.068861e+06
mean     7.547569e+02
std      3.128136e+02
min      1.335982e+02
25%      5.414181e+02
50%      6.043675e+02
75%      8.809051e+02
max      3.768265e+03
Name: ALTITUDE, dtype: float64

In [9]:
df = spacetrack_df.merge(spacetrack_alt_df, on=['NORAD_CAT_ID', 'EPOCH_DATE', 'EPOCH_TIME'])
print(spacetrack_df.shape[0], spacetrack_alt_df.shape[0], df.shape[0])
print(df.columns)
df.head()

1068861 1068861 1068861
Index(['NORAD_CAT_ID', 'OBJECT_NAME', 'OBJECT_ID', 'DECAY_DATE', 'EPOCH_DATE',
       'EPOCH_TIME', 'CREATION_DATE', 'CENTER_NAME', 'REF_FRAME',
       'TIME_SYSTEM', 'MEAN_ELEMENT_THEORY', 'MEAN_MOTION', 'ECCENTRICITY',
       'INCLINATION', 'RA_OF_ASC_NODE', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY',
       'EPHEMERIS_TYPE', 'CLASSIFICATION_TYPE', 'REV_AT_EPOCH', 'BSTAR',
       'MEAN_MOTION_DOT', 'MEAN_MOTION_DDOT', 'SEMIMAJOR_AXIS', 'PERIOD',
       'APOAPSIS', 'PERIAPSIS', 'OBJECT_TYPE', 'RCS_SIZE', 'TLE_LINE1',
       'TLE_LINE2', 'ALTITUDE'],
      dtype='object')


Unnamed: 0,NORAD_CAT_ID,OBJECT_NAME,OBJECT_ID,DECAY_DATE,EPOCH_DATE,EPOCH_TIME,CREATION_DATE,CENTER_NAME,REF_FRAME,TIME_SYSTEM,...,MEAN_MOTION_DDOT,SEMIMAJOR_AXIS,PERIOD,APOAPSIS,PERIAPSIS,OBJECT_TYPE,RCS_SIZE,TLE_LINE1,TLE_LINE2,ALTITUDE
0,45102,STARLINK-1195,2020-006BL,,2023-12-28,00:00:00.999648,2023-12-28T06:46:17,EARTH,TEME,UTC,...,0.0,6711.059,91.19,333.568,332.281,PAYLOAD,LARGE,1 45102U 20006BL 23362.00001157 .00165925 0...,2 45102 53.0456 223.3367 0000959 49.0274 96...,329.908706
1,56899,STARLINK-6203,2023-083Z,,2023-12-28,00:00:00.999648,2023-12-28T06:26:17,EARTH,TEME,UTC,...,0.0,6937.217,95.838,560.049,558.115,PAYLOAD,LARGE,1 56899U 23083Z 23362.00001157 -.00013968 0...,2 56899 43.0034 123.0158 0001394 272.0222 283...,559.894299
2,58063,STARLINK-30585,2023-158N,,2023-12-28,00:00:00.999648,2023-12-28T06:16:17,EARTH,TEME,UTC,...,0.0,6934.593,95.784,557.373,555.544,PAYLOAD,LARGE,1 58063U 23158N 23362.00001157 -.01212631 0...,2 58063 43.0014 71.6139 0001319 262.3570 310...,557.980956
3,58515,STARLINK-31017,2023-191H,,2023-12-28,00:00:00.999648,2023-12-28T06:46:17,EARTH,TEME,UTC,...,0.0,6761.643,92.223,384.581,382.435,PAYLOAD,LARGE,1 58515U 23191H 23362.00001157 .00120477 0...,2 58515 43.0004 162.7285 0001587 278.9590 219...,380.027898
4,17725,COSMOS 374 DEB *,1970-089DC,,2023-12-28,00:00:05.080320,2023-12-28T18:10:27,EARTH,TEME,UTC,...,0.0,7521.696,108.202,1736.43,550.691,DEBRIS,SMALL,1 17725U 70089DC 23362.00005880 .00001185 0...,2 17725 62.7445 204.8063 0788212 345.5877 80...,1086.023945


In [32]:
reduced_df[reduced_df.EPOCH_DATE == '2024-01-28'].shape[0]

1595

In [25]:
def reduced_filtered_dataset(col, dates, dataset, altitude_limits, e_limit, sample, frac):
    min_lim, max_lim = altitude_limits
    altitude_cond = (dataset[col] > min_lim) & (dataset[col] <= max_lim)
    
    eccentricity_cond = dataset['ECCENTRICITY'] < e_limit
    
    filtered_dataset = dataset[(altitude_cond) & (eccentricity_cond)]
    if sample:
        reduced_df = pd.DataFrame()
        for date in tqdm(dates):
            df_date = filtered_dataset[filtered_dataset['EPOCH_DATE'] == date].sample(frac=frac, random_state=7, ignore_index=True)
            df_date.sort_values(by=['EPOCH_DATE', 'EPOCH_TIME', 'NORAD_CAT_ID'], ignore_index=True, inplace=True)
            reduced_df = pd.concat([reduced_df, df_date], ignore_index=True)
        return reduced_df
    else:
        return filtered_dataset

In [26]:
sample = False
frac = 0.25
altitude_limits = (500, 520) # in km
e_limit = 0.25

reduced_df = reduced_filtered_dataset(col, dates, df, altitude_limits, e_limit, sample, frac)
print(f'Number of lines: {reduced_df.shape[0]}')
reduced_df.head()

Number of lines: 51680


Unnamed: 0,NORAD_CAT_ID,OBJECT_NAME,OBJECT_ID,DECAY_DATE,EPOCH_DATE,EPOCH_TIME,CREATION_DATE,CENTER_NAME,REF_FRAME,TIME_SYSTEM,...,MEAN_MOTION_DDOT,SEMIMAJOR_AXIS,PERIOD,APOAPSIS,PERIAPSIS,OBJECT_TYPE,RCS_SIZE,TLE_LINE1,TLE_LINE2,ALTITUDE
39,38248,RISAT 1,2012-017A,,2023-12-28,00:03:00.734112,2023-12-28T06:56:18,EARTH,TEME,UTC,...,0.0,6884.714,94.752,509.926,503.232,PAYLOAD,LARGE,1 38248U 12017A 23362.00209183 .00009596 0...,2 38248 97.5335 14.2732 0004861 113.2771 246...,510.978052
122,58048,STARLINK-30554,2023-156W,,2023-12-28,00:13:51.285504,2023-12-28T18:10:27,EARTH,TEME,UTC,...,0.0,6887.84,94.817,510.479,508.93,PAYLOAD,LARGE,1 58048U 23156W 23362.00962136 .00003038 0...,2 58048 53.0520 205.6284 0001124 115.1558 244...,510.913987
196,58047,STARLINK-30529,2023-156V,,2023-12-28,00:21:34.607232,2023-12-28T14:26:18,EARTH,TEME,UTC,...,0.0,6887.814,94.816,510.613,508.745,PAYLOAD,LARGE,1 58047U 23156V 23362.01498388 .00006323 0...,2 58047 53.0519 204.9470 0001356 137.9996 222...,511.253386
205,39770,SPROUT,2014-029E,,2023-12-28,00:22:03.111456,2023-12-28T06:36:18,EARTH,TEME,UTC,...,0.0,6898.018,95.027,523.367,516.4,PAYLOAD,MEDIUM,1 39770U 14029E 23362.01531379 .00042171 0...,2 39770 98.0019 136.9274 0005050 16.5476 343...,519.605531
232,58239,STARLINK-30847,2023-171K,,2023-12-28,00:24:21.929472,2023-12-28T06:26:17,EARTH,TEME,UTC,...,0.0,6896.208,94.989,518.578,517.567,PAYLOAD,LARGE,1 58239U 23171K 23362.01692048 .00055547 0...,2 58239 43.0004 332.5049 0000733 271.1214 88...,517.838117


In [27]:
reduced_df.NORAD_CAT_ID.unique().shape[0]

777

In [23]:
reduced_df.drop('ALTITUDE', axis=1, inplace=True)

KeyError: "['ALTITUDE'] not found in axis"

In [28]:
if sample:
    savepath = f"../datasets/space-track-dataset-reduced-{int(frac*100)}-h-{altitude_limits[0]}-{altitude_limits[1]}-e-{int(e_limit*100)}.csv"
else:
    savepath = f"../datasets/space-track-dataset-reduced-h-{altitude_limits[0]}-{altitude_limits[1]}-e-{int(e_limit*100)}.csv"

reduced_df.to_csv(savepath, index=False)

# Sample reduced Space-Track dataset based on LEO 1, LEO 2, LEO 4 and LEO 4 definitions from Spaceflight Safety Handbook for Satellite Operators

In [2]:
leo1_limits = ('leo1', 0, 500) # in km
leo2_limits = ('leo2', 500, 750)
leo3_limits = ('leo3', 750, 1200)
leo4_limits = ('leo4', 1200, 2000)

In [3]:
spacetrack_df = pd.read_csv('../datasets/space-track-dataset.csv', memory_map=True)
spacetrack_df.head()

  spacetrack_df = pd.read_csv('../datasets/space-track-dataset.csv', memory_map=True)


Unnamed: 0,NORAD_CAT_ID,OBJECT_NAME,OBJECT_ID,DECAY_DATE,EPOCH_DATE,EPOCH_TIME,CREATION_DATE,CENTER_NAME,REF_FRAME,TIME_SYSTEM,...,MEAN_MOTION_DOT,MEAN_MOTION_DDOT,SEMIMAJOR_AXIS,PERIOD,APOAPSIS,PERIAPSIS,OBJECT_TYPE,RCS_SIZE,TLE_LINE1,TLE_LINE2
0,45102,STARLINK-1195,2020-006BL,,2023-12-28,00:00:00.999648,2023-12-28T06:46:17,EARTH,TEME,UTC,...,0.001659,0.0,6711.059,91.19,333.568,332.281,PAYLOAD,LARGE,1 45102U 20006BL 23362.00001157 .00165925 0...,2 45102 53.0456 223.3367 0000959 49.0274 96...
1,56899,STARLINK-6203,2023-083Z,,2023-12-28,00:00:00.999648,2023-12-28T06:26:17,EARTH,TEME,UTC,...,-0.00014,0.0,6937.217,95.838,560.049,558.115,PAYLOAD,LARGE,1 56899U 23083Z 23362.00001157 -.00013968 0...,2 56899 43.0034 123.0158 0001394 272.0222 283...
2,58063,STARLINK-30585,2023-158N,,2023-12-28,00:00:00.999648,2023-12-28T06:16:17,EARTH,TEME,UTC,...,-0.012126,0.0,6934.593,95.784,557.373,555.544,PAYLOAD,LARGE,1 58063U 23158N 23362.00001157 -.01212631 0...,2 58063 43.0014 71.6139 0001319 262.3570 310...
3,58515,STARLINK-31017,2023-191H,,2023-12-28,00:00:00.999648,2023-12-28T06:46:17,EARTH,TEME,UTC,...,0.001205,0.0,6761.643,92.223,384.581,382.435,PAYLOAD,LARGE,1 58515U 23191H 23362.00001157 .00120477 0...,2 58515 43.0004 162.7285 0001587 278.9590 219...
4,17725,COSMOS 374 DEB *,1970-089DC,,2023-12-28,00:00:05.080320,2023-12-28T18:10:27,EARTH,TEME,UTC,...,1.2e-05,0.0,7521.696,108.202,1736.43,550.691,DEBRIS,SMALL,1 17725U 70089DC 23362.00005880 .00001185 0...,2 17725 62.7445 204.8063 0788212 345.5877 80...


In [4]:
def reduced_filtered_dataset(dates, dataset, perigee_limits, e_limit, sample, frac):
    _, min_lim, max_lim = perigee_limits
    altitude_cond = (dataset['PERIAPSIS'] > min_lim) & (dataset['PERIAPSIS'] <= max_lim)
    
    eccentricity_cond = dataset['ECCENTRICITY'] < e_limit
    
    filtered_dataset = dataset[(altitude_cond) & (eccentricity_cond)]
    
    if sample:
        reduced_df = pd.DataFrame()
        for date in tqdm(dates):
            df_date = filtered_dataset[filtered_dataset['EPOCH_DATE'] == date].sample(frac=frac, random_state=7, ignore_index=True)
            df_date.sort_values(by=['EPOCH_DATE', 'EPOCH_TIME', 'NORAD_CAT_ID'], ignore_index=True, inplace=True)
            reduced_df = pd.concat([reduced_df, df_date], ignore_index=True)
        return reduced_df
    else:
        return filtered_dataset.copy()

In [7]:
sample = True
frac = 0.25
perigee_limits = leo2_limits # in km
e_limit = 0.25

reduced_df = reduced_filtered_dataset(dates, spacetrack_df, perigee_limits, e_limit, sample, frac)
print(f'Number of lines: {reduced_df.shape[0]}')
reduced_df.head()

  0%|          | 0/32 [00:00<?, ?it/s]

Number of lines: 142548


Unnamed: 0,NORAD_CAT_ID,OBJECT_NAME,OBJECT_ID,DECAY_DATE,EPOCH_DATE,EPOCH_TIME,CREATION_DATE,CENTER_NAME,REF_FRAME,TIME_SYSTEM,...,MEAN_MOTION_DOT,MEAN_MOTION_DDOT,SEMIMAJOR_AXIS,PERIOD,APOAPSIS,PERIAPSIS,OBJECT_TYPE,RCS_SIZE,TLE_LINE1,TLE_LINE2
0,17725,COSMOS 374 DEB *,1970-089DC,,2023-12-28,00:00:05.080320,2023-12-28T18:10:27,EARTH,TEME,UTC,...,1.2e-05,0.0,7521.696,108.202,1736.43,550.691,DEBRIS,SMALL,1 17725U 70089DC 23362.00005880 .00001185 0...,2 17725 62.7445 204.8063 0788212 345.5877 80...
1,53732,STARLINK-4661,2022-107AJ,,2023-12-28,00:00:45.481824,2023-12-28T18:10:27,EARTH,TEME,UTC,...,1.8e-05,0.0,6917.854,95.437,540.599,538.839,PAYLOAD,LARGE,1 53732U 22107AJ 23362.00052641 .00001756 0...,2 53732 53.2156 257.6184 0001272 90.2252 269...
2,21346,DELTA 1 DEB,1975-052BQ,,2023-12-28,00:01:06.678336,2023-12-28T14:16:16,EARTH,TEME,UTC,...,0.000763,0.0,7148.379,100.247,845.575,694.912,DEBRIS,SMALL,1 21346U 75052BQ 23362.00077174 .00076350 0...,2 21346 98.9098 25.4086 0105383 70.9796 290...
3,55885,CZ-6A DEB,2022-151AAH,,2023-12-28,00:01:52.138560,2023-12-28T06:26:17,EARTH,TEME,UTC,...,0.000152,0.0,7108.927,99.418,792.693,668.891,DEBRIS,SMALL,1 55885U 22151AAH 23362.00129790 .00015238 0...,2 55885 98.6315 15.9526 0087075 150.7342 209...
4,57776,STARLINK-30360,2023-134F,,2023-12-28,00:02:01.118976,2023-12-28T18:10:27,EARTH,TEME,UTC,...,4.1e-05,0.0,6937.193,95.837,560.035,558.08,PAYLOAD,LARGE,1 57776U 23134F 23362.00140184 .00004125 0...,2 57776 42.9996 239.2955 0001409 248.9670 111...


In [8]:
print(reduced_df['EPOCH_DATE'].unique())
np.all((reduced_df['EPOCH_DATE'].unique() == dates))

['2023-12-28' '2023-12-29' '2023-12-30' '2023-12-31' '2024-01-01'
 '2024-01-02' '2024-01-03' '2024-01-04' '2024-01-05' '2024-01-06'
 '2024-01-07' '2024-01-08' '2024-01-09' '2024-01-10' '2024-01-11'
 '2024-01-12' '2024-01-13' '2024-01-14' '2024-01-15' '2024-01-16'
 '2024-01-17' '2024-01-18' '2024-01-19' '2024-01-20' '2024-01-21'
 '2024-01-22' '2024-01-23' '2024-01-24' '2024-01-25' '2024-01-26'
 '2024-01-27' '2024-01-28']


True

In [9]:
if sample:
    savepath = f"../datasets/space-track-dataset-{perigee_limits[0]}-reduced-{int(frac*100)}.csv"
else:
    savepath = f"../datasets/space-track-dataset-{perigee_limits[0]}.csv"
reduced_df.to_csv(savepath, index=False)