In [1]:
import pandas
import seaborn as sns

# To analyze orbital dynamics effectively, we need these orbital and physical parameters:
identifier_params = [
    'full_name',  # Full designation
    'spkid',     # SPK ID
    'neo',        # Near-Earth Object flag
    'pha',        # Potentially Hazardous Asteroid flag
]

crucial_orbital_params = [
    'a',        # Semi-major axis (AU)
    'e',        # Eccentricity
    'i',        # Inclination (deg)
    'q',        # Perihelion distance (AU)
    'ad',       # Aphelion distance (AU)
    'per',      # Orbital period (years)
    'n',        # Mean motion (deg/day)
    'ma',       # Mean anomaly (deg)
]

physical_properties = [
    'H',        # Absolute magnitude (size proxy)
    'diameter', # Diameter (km) - when available
    'albedo',   # Geometric albedo
    'rot_per',  # Rotation period (hours)
    'GM',       # Mass parameter (rare but valuable)
    'spec_B',   # Spectral type (Bus/Tholen)
    'spec_T',   # Taxonomic class
]


# Non-essential data but useful for assessing data quality later down the line
quality_metrics = [
    'condition_code',  # Orbit uncertainty (0-9, 0=best)
    'n_obs_used',      # Number of observations
    'data_arc',        # Observation span (days)
    'first_obs',       # First observation date
    'last_obs',        # Last observation date
]


In [None]:
import requests

def query_jpl_sbdb(include_quality_metrics=False):
    base_url = "https://ssd-api.jpl.nasa.gov/sbdb_query.api"
    results = []
    fields_str = identifier_params + crucial_orbital_params + physical_properties + (quality_metrics if include_quality_metrics else [])
    print(','.join(fields_str))

    params = {
        'fields': ','.join(fields_str),
        'limit': 100  # Limit the number of results
    }
    response = requests.get(base_url, params=params)

    if response.status_code == 200:
        results.append(response.json())
    else:
        print(f"Error: {response.status_code} - {response.text}")
    
    return results


In [44]:
jpl_data = query_jpl_sbdb()


full_name,spkid,neo,pha,a,e,i,q,ad,per,n,ma,H,diameter,albedo,rot_per,GM,spec_B,spec_T


In [4]:
# pandas.DataFrame(jpl_data)
df = pandas.json_normalize(jpl_data, record_path=['data'])

# set column names from jpl_data[0]['fields'] if present
fields = jpl_data[0].get('fields') if isinstance(jpl_data, list) and jpl_data else None
if fields:
    if len(fields) == len(df.columns):
        df.columns = fields
    else:
        # align/truncate if lengths differ
        df.columns = fields[:len(df.columns)]
df.head()

Unnamed: 0,full_name,spkid,neo,pha,a,e,i,q,ad,per,n,ma,H,diameter,albedo,rot_per,GM,spec_B,spec_T
0,1 Ceres (A801 AA),20000001,N,N,2.766,0.0796,10.59,2.546,2.99,1680.0,0.2143,231.54,3.35,939.4,0.09,9.07417,62.6284,C,G
1,2 Pallas (A802 FA),20000002,N,N,2.77,0.2306,34.93,2.131,3.41,1680.0,0.2138,211.53,4.11,513.0,0.155,7.8132214,13.63,B,B
2,3 Juno (A804 RA),20000003,N,N,2.671,0.2558,12.99,1.988,3.35,1590.0,0.2258,217.59,5.19,246.596,0.214,7.21,,Sk,S
3,4 Vesta (A807 FA),20000004,N,N,2.362,0.0902,7.14,2.149,2.57,1330.0,0.2716,26.81,3.25,522.77,0.4228,5.3421276322,17.2882844,V,V
4,5 Astraea (A845 XA),20000005,N,N,2.577,0.1875,5.36,2.094,3.06,1510.0,0.2383,133.87,6.97,106.699,0.274,16.806,,S,S


In [None]:
# Now let's clean that data from Gaia (`tableb1.dat`)
# CSV headers converted from the fixed-width spec for tableb1.dat

csv_headers = [
    'Asteroid',   # Name of the asteroid
    'H',          # Absolute magnitude (mag)
    'RMS',        # RMS of normalized residuals, 7D OD
    'RMS6D',      # RMS of normalized residuals, 6D OD
    'A2',         # Transversal acceleration component (au/d2)
    'e_A2',       # Error in A2 (au/d2)
    'da_dt',      # Semi-major axis drift (au/Myr)
    'e_da_dt',    # Error in da/dt (au/Myr)
    'max_da_dt',  # Maximum da/dt from Monte Carlo model (au/Myr)
    'SNR',        # Signal-to-noise of A2 detection
    'FAccept',    # [1 Rej.] Flag for acceptance of the detection
    'NOptObs',    # Number of optical observations
    'NRejOpt',    # Number of rejected optical observations in 7D OD
    'NRej6D',     # Number of rejected optical observations in 6D OD
    'NRadObs',    # Number of radar observations
    'NRejRad',    # Number of rejected radar obs in 7D OD
    'NRejRad6D',  # Number of rejected radar obs in 6D OD
    'NOptOld',    # Number of old observations
    'Dlow',       # 15-th percentile of diameter (m)
    'Dmed',       # 50-th percentile of diameter (m)
    'Dhigh',      # 85-th percentile of diameter (m)
    'ModFlag',    # [0/1] Flag for model used in Monte Carlo
    'Prot',       # Rotation period of the asteroid (h), ?=-1 if unknown
    'Tax',        # Taxonomic complex of the asteroid
    'deltat'      # Length of observational arc (yr)
]


Asteroid,H,RMS,RMS6D,A2,e_A2,da_dt,e_da_dt,max_da_dt,SNR,FAccept,NOptObs,NRejOpt,NRej6D,NRadObs,NRejRad,NRejRad6D,NOptOld,Dlow,Dmed,Dhigh,ModFlag,Prot,Tax,deltat


In [23]:
# Now let's load that data from Gaia (`tableb1.dat`)

# `names` must be a list of column names (use csv_headers), and explicitly set header=None

gaia_data = pandas.read_fwf(
	'./confirmed_yark_dataset/tableb1.dat',
	colspecs='infer',
)
gaia_data.columns = csv_headers
gaia_data.head()

gaia_data

Unnamed: 0,Asteroid,H,RMS,RMS6D,A2,e_A2,da_dt,e_da_dt,max_da_dt,SNR,...,NRejRad,NRejRad6D,NOptOld,Dlow,Dmed,Dhigh,ModFlag,Prot,Tax,deltat
0,1998 SD9,23.918,0.788754,6.841540,-2.898460e-13,1.564160e-15,-0.019706,0.000106,0.031790,185.30500,...,0,0,27,25.295,35.41,75.886,1,-1.00,N,19.992
1,99942,18.945,0.307869,0.336636,-2.888940e-14,2.350760e-16,-0.001326,0.000011,0.002811,122.89400,...,0,5,0,300.000,340.00,380.000,0,27.38,N,17.180
2,480883,20.900,0.472885,2.486020,-6.918980e-14,7.312950e-16,-0.005045,0.000053,0.011612,94.61270,...,0,2,0,103.673,172.78,287.979,1,-1.00,V,20.085
3,2340,20.437,0.728716,0.759563,-2.998710e-14,5.250600e-16,-0.001738,0.000030,0.008028,57.11180,...,0,0,92,134.014,193.57,312.700,1,3.35,Q,45.120
4,524522,20.530,0.463988,1.099570,-5.983760e-14,1.051020e-15,-0.003592,0.000063,0.004257,56.93290,...,0,7,0,112.383,176.60,337.150,1,13.43,N,16.060
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
457,2005 NE21,21.268,0.587481,0.615949,5.852140e-14,1.938950e-14,0.003712,0.001230,0.008357,3.01820,...,0,0,0,102.221,147.65,261.232,1,-1.00,S,14.007
458,152664,19.669,0.478537,0.486609,1.623280e-14,5.381410e-15,0.000907,0.000301,0.004420,3.01645,...,0,0,40,225.018,353.59,739.343,1,17.38,Q,27.910
459,7336,18.789,0.464234,0.469641,3.145140e-14,1.043840e-14,0.001143,0.000379,0.002218,3.01305,...,0,0,203,273.707,430.11,742.918,1,3.36,Q,35.239
460,162416,21.330,0.585074,0.489558,1.559140e-11,5.182290e-12,0.630594,0.209598,0.006902,3.00859,...,0,0,0,116.000,141.00,166.000,0,-1.00,N,5.322


In [33]:
# Let's get clean names or IAU designations from the Gaia dataset
gaia_names = gaia_data['Asteroid'].tolist()

In [68]:
f = open("names", "w")
for name in gaia_names:
    f.write(name + "\n")
f.close()

In [83]:
import requests
import time
import json

def fetch_asteroid_data(designation):
    """Fetches complete data for an asteroid, including physical parameters."""
    url = "https://ssd-api.jpl.nasa.gov/sbdb.api"
    params = {
        'sstr': designation,
        'phys-par': 1  # Essential to get physical properties
    }
    
    try:
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raises an exception for bad status codes
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None

f = open("names", "r")
read_list = f.readlines()
f.close()

for designation in read_list[329:]:
    data = fetch_asteroid_data(designation)
    print(json.dumps(data, indent=2))
    time.sleep(1)  # Be polite to the API
    if not data:
        continue

    # write the raw "object" JSON to file (one JSON object per line)
    with open('asteroid_mapping.txt', 'a') as f:
        f.write(json.dumps(data) + '\n')

    print(f"Wrote raw object for {designation}: spkid={data.get('object').get('spkid')}, des={data.get('object').get('des')}")


{
  "orbit": {
    "pe_used": "DE441",
    "soln_date": "2022-12-20 14:11:11",
    "not_valid_before": null,
    "elements": [
      {
        "sigma": "9.8e-08",
        "units": null,
        "value": "0.313",
        "label": "e",
        "title": "eccentricity",
        "name": "e"
      },
      {
        "value": "0.93",
        "units": "au",
        "sigma": "1.3e-08",
        "title": "semi-major axis",
        "name": "a",
        "label": "a"
      },
      {
        "label": "q",
        "name": "q",
        "title": "perihelion distance",
        "sigma": "9.7e-08",
        "value": "0.639",
        "units": "au"
      },
      {
        "units": "deg",
        "value": "6.53",
        "sigma": "4.5e-06",
        "name": "i",
        "title": "inclination; angle with respect to x-y ecliptic plane",
        "label": "i"
      },
      {
        "label": "node",
        "title": "longitude of the ascending node",
        "name": "om",
        "sigma": "1.6e-05",
        "uni

In [59]:
import requests
import time
from typing import List, Dict, Any

# I'm going to beat up the person in charge of this API
def batch_query_asteroids(designations: List[str], batch_size: int = 10, delay: float = 1.0) -> List[Dict[str, Any]]:
    """
    Batch query asteroids from JPL SBDB API using IAU designations.
    
    Args:
        designations: List of asteroid IAU designations (e.g., ['Ceres', 'Pallas', 'Vesta'])
        batch_size: Number of asteroids to query in each API call (JPL recommends small batches)
        delay: Delay between requests in seconds to be respectful to the API
    
    Returns:
        List of asteroid data dictionaries
    """
    base_url = "https://ssd-api.jpl.nasa.gov/sbdb_query.api"
    results = []
    
    # Process in batches to avoid overwhelming the API
    for i in range(0, len(designations), batch_size):
        batch = designations[i:i + batch_size]
        
        # Create custom field constraint for IAU names
        names_constraint = '{"OR":' + '[' + ",".join(f'"sstr|RE|{name}"' for name in batch) + ']}'
        print(names_constraint)
        
        # Define the fields we want to retrieve
        params = {
            'fields': ','.join(identifier_params + crucial_orbital_params + physical_properties),
            'sb-cdata': names_constraint,
        }
        
        try:
            print(f"Querying batch {i//batch_size + 1}: {batch}")
            response = requests.get(base_url, params=params, timeout=30)
            response.raise_for_status()
            
            data = response.json()
            
            if 'data' in data and data['data']:
                # Map fields to data values
                for asteroid_data in data['data']:
                    asteroid_dict = dict(zip(data['fields'], asteroid_data))
                    results.append(asteroid_dict)
                    print(f"  âœ“ Retrieved: {asteroid_dict.get('full_name', 'Unknown')}")
            else:
                print(f"  âš  No data returned for batch")
                
        except requests.exceptions.RequestException as e:
            print(f"  âœ— Error querying batch: {e}")
        except ValueError as e:
            print(f"  âœ— JSON parsing error: {e}")
        
        # Be respectful to the API - add delay between requests
        if i + batch_size < len(designations):
            print(f"Waiting {delay} seconds before next batch...")
            time.sleep(delay)
    
    return results


print("Starting batch query of asteroids...")
asteroids_data = batch_query_asteroids(
    designations=gaia_names[:10],
    batch_size=5,  # Small batches to be API-friendly
    delay=2.0      # 2 second delay between requests
)

print(f"\nQuery complete! Retrieved data for {len(asteroids_data)} asteroids.")
asteroids_data

Starting batch query of asteroids...
{"OR":["sstr|RE|1998 SD9","sstr|RE|99942","sstr|RE|480883","sstr|RE|2340","sstr|RE|524522"]}
Querying batch 1: ['1998 SD9', '99942', '480883', '2340', '524522']
  âœ— Error querying batch: 400 Client Error: Bad Request for url: https://ssd-api.jpl.nasa.gov/sbdb_query.api?fields=full_name%2Cspkid%2Cneo%2Cpha%2Ca%2Ce%2Ci%2Cq%2Cad%2Cper%2Cn%2Cma%2CH%2Cdiameter%2Calbedo%2Crot_per%2CGM%2Cspec_B%2Cspec_T&sb-cdata=%7B%22OR%22%3A%5B%22sstr%7CRE%7C1998+SD9%22%2C%22sstr%7CRE%7C99942%22%2C%22sstr%7CRE%7C480883%22%2C%22sstr%7CRE%7C2340%22%2C%22sstr%7CRE%7C524522%22%5D%7D
Waiting 2.0 seconds before next batch...
{"OR":["sstr|RE|2002 BF25","sstr|RE|2012 UR158","sstr|RE|523599","sstr|RE|2000 EB14","sstr|RE|483656"]}
Querying batch 2: ['2002 BF25', '2012 UR158', '523599', '2000 EB14', '483656']
  âœ— Error querying batch: 400 Client Error: Bad Request for url: https://ssd-api.jpl.nasa.gov/sbdb_query.api?fields=full_name%2Cspkid%2Cneo%2Cpha%2Ca%2Ce%2Ci%2Cq%2Cad%2Cpe

[]

In [None]:
# test bulk query of 5
jpl_results = query_jpl_sbdb(gaia_names[:5], include_quality_metrics=False)

full_name,spkid,neo,pha,a,e,i,q,ad,per,n,ma,H,diameter,albedo,rot_per,GM,spec_B,spec_T
Error: 400 - {"message":"bad character(s) in sb-cdata EQ argument for field 'name': check API document at https://ssd-api.jpl.nasa.gov/doc/sbdb_filter.html","code":"400"}



In [29]:
jpl_results_df = pandas.json_normalize(jpl_results)
jpl_results_df.head()

Unnamed: 0,orbit.source,orbit.equinox,orbit.n_del_obs_used,orbit.cov_epoch,orbit.model_pars,orbit.last_obs,orbit.data_arc,orbit.moid,orbit.soln_date,orbit.pe_used,...,object.des,object.prefix,object.orbit_class.code,object.orbit_class.name,object.neo,object.spkid,object.shortname,object.orbit_id,signature.source,signature.version
0,JPL,J2000,58,2455562.5,[{'desc': 'solar radiation pressure area-to-ma...,2020-10-03,7693,0.00322,2021-01-07 11:18:06,DE424,...,101955,,APO,Apollo,True,20101955,101955 Bennu,118,NASA/JPL Small-Body Database (SBDB) API,1.3


In [16]:
# Get drift column candidates for analysis from Gaia dataset
drift_candidates = gaia_data[['Asteroid', 'da_dt', 'e_da_dt', 'SNR', 'FAccept']] 
drift_candidates.head()

Unnamed: 0,Asteroid,da_dt,e_da_dt,SNR,FAccept
0,1998 SD9,-0.019706,0.000106,185.305,1
1,99942,-0.001326,1.1e-05,122.894,1
2,480883,-0.005045,5.3e-05,94.6127,1
3,2340,-0.001738,3e-05,57.1118,1
4,524522,-0.003592,6.3e-05,56.9329,1


In [74]:
import numpy as np

class SimpleImpactProbability:
    def __init__(self):
        self.earth_radius_au = 6371 / 149597870.7  # Earth radius in AU
        self.earth_cross_section = np.pi * (self.earth_radius_au ** 2)
        
    def calculate_impact_probability(self, asteroid_data, yarkovsky_drift):
        """
        Simple impact probability based on Yarkovsky drift and orbital geometry
        
        Args:
            asteroid_data: dict with 'a' (semi-major axis), 'e' (eccentricity), 
                          'i' (inclination), 'moid' (minimum orbit intersection distance)
            yarkovsky_drift: da/dt from your ML model (AU/year)
            
        Returns:
            impact_probability: 0 to 1 probability
        """
        
        # 1. Base probability from current orbital geometry
        base_prob = self._calculate_geometric_probability(asteroid_data)
        
        # 2. Yarkovsky amplification factor
        yarkovsky_factor = self._calculate_yarkovsky_amplification(yarkovsky_drift, asteroid_data)
        
        # 3. Combined probability
        impact_prob = base_prob * yarkovsky_factor
        
        return min(impact_prob, 1.0)  # Cap at 100%
    
    def _calculate_geometric_probability(self, asteroid):
        """Probability based on orbital intersection with Earth"""
        moid = asteroid.get('moid', 1.0)  # Minimum Orbit Intersection Distance (AU)
        inclination = asteroid.get('i', 0.0)  # Degrees
        
        # Convert MOID to "effective miss distance" considering inclination
        inclination_rad = np.radians(inclination)
        effective_moid = moid / np.cos(inclination_rad) if inclination_rad > 0 else moid
        
        # Simple geometric probability: smaller MOID = higher probability
        if effective_moid <= 0.001:  # Very close approaches
            geometric_prob = 0.1
        elif effective_moid <= 0.01:  # Close approaches
            geometric_prob = 0.01
        elif effective_moid <= 0.05:  # PHA territory
            geometric_prob = 0.001
        else:
            geometric_prob = 1e-6
            
        return geometric_prob
    
    def _calculate_yarkovsky_amplification(self, yarkovsky_drift, asteroid):
        """Amplify probability based on Yarkovsky drift rate"""
        a = asteroid.get('a', 1.0)  # Semi-major axis (AU)
        
        # Normalize drift by orbital size (bigger effect for inner solar system)
        normalized_drift = abs(yarkovsky_drift) / a
        
        # Amplification factor: more drift = higher probability of orbit changes
        if normalized_drift > 1e-4:  # Very strong drift
            amplification = 100.0
        elif normalized_drift > 1e-5:  # Strong drift
            amplification = 10.0
        elif normalized_drift > 1e-6:  # Moderate drift
            amplification = 3.0
        elif normalized_drift > 1e-7:  # Weak drift
            amplification = 1.5
        else:  # Negligible drift
            amplification = 1.0
            
        return amplification

    def risk_category(self, probability, diameter_km):
        """Simple risk categorization"""
        if probability > 0.01:  # 1% chance
            return "HIGH RISK" if diameter_km > 0.14 else "MEDIUM RISK"
        elif probability > 0.001:  # 0.1% chance
            return "LOW RISK"
        else:
            return "MINIMAL RISK"

In [76]:
# Your ML model output (example)
def predict_yarkovsky_drift(diameter, albedo, rotation_period, semi_major_axis):
    """
    Your trained ML model goes here!
    Returns da/dt in AU/year
    """
    # Placeholder - replace with your actual model
    drift = (1 - albedo) * diameter / (rotation_period * semi_major_axis) * 1e-5
    return -18.954e-4  # Example fixed drift of Bennu

# Calculate impact probability for an asteroid
calculator = SimpleImpactProbability()

# Example asteroid data (from JPL)
asteroid = {
    'a': 1.2,           # AU
    'e': 0.3,           # eccentricity  
    'i': 5.0,           # degrees
    'moid': 0.02,       # AU (close approach!)
    'diameter': 0.3     # km
}

# Get Yarkovsky drift from your ML model
yarkovsky_drift = predict_yarkovsky_drift(
    diameter=asteroid['diameter'],
    albedo=0.15,
    rotation_period=12.0,
    semi_major_axis=asteroid['a']
)

# Calculate impact probability
impact_prob = calculator.calculate_impact_probability(asteroid, yarkovsky_drift)
risk_level = calculator.risk_category(impact_prob, asteroid['diameter'])

print(f"Impact Probability: {impact_prob:.2e}")
print(f"Risk Level: {risk_level}")
print(f"Yarkovsky Drift: {yarkovsky_drift:.2e} AU/year")

Impact Probability: 1.00e-01
Risk Level: HIGH RISK
Yarkovsky Drift: -1.90e-03 AU/year


In [88]:
f = open("asteroid_mapping.txt", "r")
asteroid_mapping_lines = f.readlines()
f.close()

In [89]:
import pandas as pd
import json

def extract_asteroid_data(json_data):
    """
    Extract specified parameters from JPL SBDB JSON data
    """
    extracted = {}
    
    # Identifier Parameters
    extracted['full_name'] = json_data.get('object', {}).get('fullname')
    extracted['spkid'] = json_data.get('object', {}).get('spkid')
    extracted['neo'] = json_data.get('object', {}).get('neo', False)
    extracted['pha'] = json_data.get('object', {}).get('pha', False)
    
    # Extract orbital elements from the elements list
    orbit_elements = {}
    if 'orbit' in json_data and 'elements' in json_data['orbit']:
        for element in json_data['orbit']['elements']:
            label = element.get('label')
            value = element.get('value')
            if label and value is not None:
                orbit_elements[label] = value
    
    # Crucial Orbital Parameters
    extracted['a'] = orbit_elements.get('a')  # semi-major axis
    extracted['e'] = orbit_elements.get('e')  # eccentricity
    extracted['i'] = orbit_elements.get('i')  # inclination
    extracted['q'] = orbit_elements.get('q')  # perihelion distance
    extracted['ad'] = orbit_elements.get('Q')  # aphelion distance (note: 'Q' not 'ad')
    extracted['per'] = orbit_elements.get('period')  # orbital period
    extracted['n'] = orbit_elements.get('n')  # mean motion
    extracted['ma'] = orbit_elements.get('M')  # mean anomaly (note: 'M' not 'ma')
    
    # Extract physical parameters from phys_par list
    phys_params = {}
    if 'phys_par' in json_data:
        for param in json_data['phys_par']:
            name = param.get('name')
            value = param.get('value')
            if name and value is not None:
                phys_params[name] = value
    
    # Physical Properties
    extracted['H'] = phys_params.get('H')  # absolute magnitude
    extracted['diameter'] = phys_params.get('diameter')  # diameter in km
    extracted['albedo'] = phys_params.get('albedo')  # geometric albedo
    extracted['rot_per'] = phys_params.get('rot_per')  # rotation period
    extracted['GM'] = phys_params.get('GM')  # mass parameter
    
    # Spectral types (these are typically in the object section)
    extracted['spec_B'] = json_data.get('object', {}).get('spec_B')
    extracted['spec_T'] = json_data.get('object', {}).get('spec_T')
    
    return extracted

def clean_asteroid_data(json_list):
    """
    Clean and convert a list of asteroid JSON objects to a DataFrame
    """
    cleaned_data = []
    
    for json_obj in json_list:
        try:
            extracted = extract_asteroid_data(json_obj)
            cleaned_data.append(extracted)
        except Exception as e:
            print(f"Error processing asteroid data: {e}")
            continue
    
    # Create DataFrame
    df = pd.DataFrame(cleaned_data)
    
    # Convert data types
    numeric_columns = ['a', 'e', 'i', 'q', 'ad', 'per', 'n', 'ma', 'H', 'diameter', 'albedo', 'rot_per', 'GM']
    
    for col in numeric_columns:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    
    # Convert boolean columns
    bool_columns = ['neo', 'pha']
    for col in bool_columns:
        if col in df.columns:
            df[col] = df[col].astype(bool)
    
    return df

# Alternative function for processing from a file
def process_asteroid_file(filename):
    """
    Process asteroid data from a JSON file
    """
    try:
        with open(filename, 'r') as f:
            if filename.endswith('.json'):
                # Single JSON object or array
                data = json.load(f)
            else:
                # JSONL format (one JSON per line)
                data = [json.loads(line) for line in f]
        
        return clean_asteroid_data(data)
    except Exception as e:
        print(f"Error reading file: {e}")
        return pd.DataFrame()

    
# Clean the data
cleaned_asteroid_df = process_asteroid_file('asteroid_mapping.txt')

print("Cleaned DataFrame:")
print(cleaned_asteroid_df.head())
print(f"\nDataFrame shape: {cleaned_asteroid_df.shape}")
print(f"\nColumns: {list(cleaned_asteroid_df.columns)}")
print(f"\nData types:\n{cleaned_asteroid_df.dtypes}")

# Optional: Save to CSV for your ML model
cleaned_asteroid_df.to_csv('cleaned_asteroid_data.csv', index=False)
print("\nData saved to 'cleaned_asteroid_data.csv'")

Cleaned DataFrame:
                   full_name     spkid   neo    pha      a      e     i  \
0                 (1998 SD9)   3014184  True  False  0.702  0.504  2.99   
1   99942 Apophis (2004 MN4)  20099942  True   True  0.922  0.191  3.34   
2          480883 (2001 YE4)  20480883  True   True  0.677  0.540  4.85   
3      2340 Hathor (1976 UA)  20002340  True   True  0.844  0.450  5.86   
4  524522 Zoozve (2002 VE68)  20524522  True   True  0.724  0.410  9.04   

       q    ad    per     n     ma      H  diameter  albedo  rot_per  GM  \
0  0.348  1.06  215.0  1.67  233.0  24.09       NaN     NaN      NaN NaN   
1  0.746  1.10  324.0  1.11  313.0  19.09      0.34    0.35    30.56 NaN   
2  0.311  1.04  203.0  1.77  193.0  20.92       NaN     NaN      NaN NaN   
3  0.464  1.22  283.0  1.27  202.0  20.43      0.30     NaN     3.35 NaN   
4  0.427  1.02  225.0  1.60  329.0  20.54       NaN     NaN    13.50 NaN   

  spec_B spec_T  
0   None   None  
1   None   None  
2   None   None  
3