In [67]:
import pandas
import seaborn as sns

# To analyze orbital dynamics effectively, we need these orbital and physical parameters:
identifier_params = [
    'full_name',  # Full designation
    'spkid',     # SPK ID
    'neo',        # Near-Earth Object flag
    'pha',        # Potentially Hazardous Asteroid flag
]

crucial_orbital_params = [
    'a',        # Semi-major axis (AU)
    'e',        # Eccentricity
    'i',        # Inclination (deg)
    'q',        # Perihelion distance (AU)
    'ad',       # Aphelion distance (AU)
    'per',      # Orbital period (years)
    'n',        # Mean motion (deg/day)
    'ma',       # Mean anomaly (deg)
]

physical_properties = [
    'H',        # Absolute magnitude (size proxy)
    'diameter', # Diameter (km) - when available
    'albedo',   # Geometric albedo
    'rot_per',  # Rotation period (hours)
    'GM',       # Mass parameter (rare but valuable)
    'spec_B',   # Spectral type (Bus/Tholen)
    'spec_T',   # Taxonomic class
]


# Non-essential data but useful for assessing data quality later down the line
quality_metrics = [
    'condition_code',  # Orbit uncertainty (0-9, 0=best)
    'n_obs_used',      # Number of observations
    'data_arc',        # Observation span (days)
    'first_obs',       # First observation date
    'last_obs',        # Last observation date
]


In [68]:
import requests

def query_jpl_sbdb(advanced=False):
    base_url = "https://ssd-api.jpl.nasa.gov/sbdb_query.api"
    results = []
    fields_str = identifier_params + crucial_orbital_params + physical_properties + (quality_metrics if advanced else [])
    print(','.join(fields_str))

    params = {
        'fields': ','.join(fields_str),
        'limit': 100  # Limit the number of results
    }
    response = requests.get(base_url, params=params)

    if response.status_code == 200:
        results.append(response.json())
    else:
        print(f"Error: {response.status_code} - {response.text}")
    
    return results


In [69]:
jpl_data = query_jpl_sbdb()


full_name,spkid,neo,pha,a,e,i,q,ad,per,n,ma,H,diameter,albedo,rot_per,GM,spec_B,spec_T


In [70]:
# pandas.DataFrame(jpl_data)
df = pandas.json_normalize(jpl_data, record_path=['data'])

# set column names from jpl_data[0]['fields'] if present
fields = jpl_data[0].get('fields') if isinstance(jpl_data, list) and jpl_data else None
if fields:
    if len(fields) == len(df.columns):
        df.columns = fields
    else:
        # align/truncate if lengths differ
        df.columns = fields[:len(df.columns)]
df.head()

Unnamed: 0,full_name,spkid,neo,pha,a,e,i,q,ad,per,n,ma,H,diameter,albedo,rot_per,GM,spec_B,spec_T
0,1 Ceres (A801 AA),20000001,N,N,2.766,0.0796,10.59,2.546,2.99,1680.0,0.2143,231.54,3.35,939.4,0.09,9.07417,62.6284,C,G
1,2 Pallas (A802 FA),20000002,N,N,2.77,0.2306,34.93,2.131,3.41,1680.0,0.2138,211.53,4.11,513.0,0.155,7.8132214,13.63,B,B
2,3 Juno (A804 RA),20000003,N,N,2.671,0.2558,12.99,1.988,3.35,1590.0,0.2258,217.59,5.19,246.596,0.214,7.21,,Sk,S
3,4 Vesta (A807 FA),20000004,N,N,2.362,0.0902,7.14,2.149,2.57,1330.0,0.2716,26.81,3.25,522.77,0.4228,5.3421276322,17.2882844,V,V
4,5 Astraea (A845 XA),20000005,N,N,2.577,0.1875,5.36,2.094,3.06,1510.0,0.2383,133.87,6.97,106.699,0.274,16.806,,S,S


In [71]:
# Now let's clean that data from Gaia (`tableb1.dat`)
# CSV headers converted from the fixed-width spec for tableb1.dat

csv_headers = [
    'Asteroid',   # Name of the asteroid
    'H',          # Absolute magnitude (mag)
    'RMS',        # RMS of normalized residuals, 7D OD
    'RMS6D',      # RMS of normalized residuals, 6D OD
    'A2',         # Transversal acceleration component (au/d2)
    'e_A2',       # Error in A2 (au/d2)
    'da_dt',      # Semi-major axis drift (au/Myr)
    'e_da_dt',    # Error in da/dt (au/Myr)
    'max_da_dt',  # Maximum da/dt from Monte Carlo model (au/Myr)
    'SNR',        # Signal-to-noise of A2 detection
    'FAccept',    # [1 Rej.] Flag for acceptance of the detection
    'NOptObs',    # Number of optical observations
    'NRejOpt',    # Number of rejected optical observations in 7D OD
    'NRej6D',     # Number of rejected optical observations in 6D OD
    'NRadObs',    # Number of radar observations
    'NRejRad',    # Number of rejected radar obs in 7D OD
    'NRejRad6D',  # Number of rejected radar obs in 6D OD
    'NOptOld',    # Number of old observations
    'Dlow',       # 15-th percentile of diameter (m)
    'Dmed',       # 50-th percentile of diameter (m)
    'Dhigh',      # 85-th percentile of diameter (m)
    'ModFlag',    # [0/1] Flag for model used in Monte Carlo
    'Prot',       # Rotation period of the asteroid (h), ?=-1 if unknown
    'Tax',        # Taxonomic complex of the asteroid
    'deltat'      # Length of observational arc (yr)
]

# Helpful mapping (header -> short explanation) for reference in the notebook
headers_comments = {
    'Asteroid': 'Name of the asteroid',
    'H': 'Absolute magnitude (mag)',
    'RMS': 'RMS of normalized residuals, 7D orbit determination',
    'RMS6D': 'RMS of normalized residuals, 6D orbit determination',
    'A2': 'Transversal acceleration component (au/d^2)',
    'e_A2': 'Uncertainty on A2 (au/d^2)',
    'da_dt': 'Semi-major axis drift (au/Myr)',
    'e_da_dt': 'Uncertainty on da/dt (au/Myr)',
    'max_da_dt': 'Maximum da/dt from Monte Carlo modelling (au/Myr)',
    'SNR': 'Signal-to-noise ratio of A2 detection',
    'FAccept': 'Acceptance flag for detection (1 = Rejected?)',
    'NOptObs': 'Number of optical observations used',
    'NRejOpt': 'Number of optical observations rejected in 7D OD',
    'NRej6D': 'Number of optical observations rejected in 6D OD',
    'NRadObs': 'Number of radar observations',
    'NRejRad': 'Number of radar observations rejected in 7D OD',
    'NRejRad6D': 'Number of radar observations rejected in 6D OD',
    'NOptOld': 'Number of old optical observations',
    'Dlow': '15th percentile of diameter (m)',
    'Dmed': '50th percentile (median) diameter (m)',
    'Dhigh': '85th percentile of diameter (m)',
    'ModFlag': 'Model flag used in Monte Carlo (0/1)',
    'Prot': 'Rotation period (h), -1 if unknown',
    'Tax': 'Taxonomic complex / class',
    'deltat': 'Length of observational arc (yr)'
}

# CSV header line (use when writing a new CSV file)
csv_header_line = ','.join(csv_headers)
print(csv_header_line)


Asteroid,H,RMS,RMS6D,A2,e_A2,da_dt,e_da_dt,max_da_dt,SNR,FAccept,NOptObs,NRejOpt,NRej6D,NRadObs,NRejRad,NRejRad6D,NOptOld,Dlow,Dmed,Dhigh,ModFlag,Prot,Tax,deltat


In [72]:
# Now let's load that data from Gaia (`tableb1.dat`)
gaia_data = pandas.read_csv('./confirmed_yark_dataset/tableb1.dat', sep='\s+', names=csv_header_line, comment='#')
gaia_data.head()

ValueError: Duplicate names are not allowed.