# Generate Asteroid CSV for Calibration Level 3 Data

This pipeline will scrape through a specified dataset within JWST observations to identify what asteroids are present within the images at the moment of their observation. The names of any known asteroid contained in the image bounds, along with useful information regarding the observation parameters will be saved in a CSV.

## Import Libraries

In [1]:
from astroquery.esa.jwst import Jwst
import numpy as np
from datetime import datetime

from tqdm.notebook import tqdm
tqdm.pandas()

import pandas as pd
from astropy.io import fits
import astropy.time as at
from astroquery.jplhorizons import Horizons
import re
import os
import sys
import time
import logging
import shapely.wkt
from shapely.geometry import Polygon, Point
from astropy.wcs import WCS
from astropy.coordinates import SkyCoord
from astropy.time import Time
import astropy.units as u
from sbident import SBIdent

import warnings
warnings.filterwarnings('ignore')

## Utility Functions

In [2]:
def formatPolygon(polyString):
    #Format the archive polygon string to a format that is compatible with the shapely function
    
    #slice away the polygon charactors 'polygon((' from the start and '))' from the end
    coords = polyString[8:-2].split(' ')
    
    #Add in the fist location at the end to close the loop
    coords.append(coords[0])
    coords.append(coords[1])
    
    return f"POLYGON (({', '.join([coords[i] + ' ' + coords[i+1] for i in range(0, len(coords), 2)])}))"

In [3]:
def replace_values(val):
    #Convert binary elements into 'yes' or 'no' string outputs
    
    return 'yes' if value == '1' else 'no'

In [4]:
class HiddenPrints:
    #Used for specific functions that have default printout statements that are unnecessary 
    
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

In [5]:
def MJDconversion(modifiedJulianDate):
    #Convert string from Modified Julian Date to YYYMMDD[H:M:S] format
   
    return (Time(modifiedJulianDate, format='mjd').iso)

In [6]:
def isLocatedInImage(targetRA, targetDEC, imagePOLYGON):
    #Check if the RA and DEC coordinate exist within the image bound polygon (returns boolean)
    
    return(imagePOLYGON.contains(Point(targetRA, targetDEC)))

In [7]:
def definePolyEdges(polyString):
    #Define the image boundaries for the cone search
    
    #Deconstruct the polygon string into its RA and DEC coordinates 
    coordinates = polyString.replace("POLYGON ((", "").replace("))", "").replace(", ", " ").split()
    coordinates = list(map(float, coordinates))
    
    #pull RA and DEC values
    RA_elements = coordinates[::2]
    DEC_elements = coordinates[1::2]
    
    #identify the boundary for the lower right corner of the cone
    low_right_corner = SkyCoord(min(RA_elements), min(DEC_elements), frame='icrs', unit='deg') 
    
    #identify the boundary for the upper left corner of the cone
    up_left_corner = SkyCoord(max(RA_elements), max(DEC_elements), frame='icrs', unit='deg')
    
    return([low_right_corner, up_left_corner])   

In [8]:
def JWSTposition(obsTime):
    #Determine the position of JWST during the observation time from an Earth perspective. Follows example 3 from  https://github.com/bengebre/sbident/blob/main/examples/sbident-examples.ipynb 
    
    #generate AU to km conversion
    au_to_km = (1 * u.au).to(u.km).value
    
    #probe for the jwst output from jpl horizons, state vector
    jwst_output = Horizons(id='JWST',location='Geocentric',epochs=obsTime.jd, id_type='id').vectors(refplane='earth')

    # Convert position and velocity from AU to km and km/s respectively
    jwst_output_km = jwst_output[['x', 'y', 'z', 'vx', 'vy', 'vz']].to_pandas().to_numpy()
    jwst_output_km[:, :3] *= au_to_km  # Convert position (x, y, z) from AU to km
    jwst_output_km[:, 3:] /= 86400     # Convert velocity (vx, vy, vz) from AU/day to km/s


    #Form the xobs dictionary that is the input for SBIdent location argument
    xobs = ','.join([f"{s:.12e}" for s in jwst_output_km[0]])
    return {'xobs': xobs}

## Generate ADQL Search

In [9]:
def queryArchive(volume, readouts, query_filters):
    #Generate an adql query search for the JWST archive to filter the observations and produce a pandas DF containing all useful information
    
    #Setting up constraints and filters for data selected for the archive return
    query_string = f"SELECT {','.join(readouts)} FROM jwst.{volume} WHERE {' AND '.join(query_filters)}"
    
    #Pull data formatted by the display amount, volume and filter 
    job = Jwst.launch_job(query_string, async_job=True)
    panda_result = job.get_results().to_pandas()
        
    #Sort the dataframe by the proposal ID
    return panda_result.sort_values(by=['observationid']).reset_index(drop=True)

In [10]:
def query4proposals(volume, readouts, query_filters, proposals):
    #Generate an adql query search for the JWST archive to filter the observations and produce a pandas DF containing all useful information
    
    propIDs = ', '.join([f"'{props}'" for props in proposals])
    
    #determine what level 2 i2d image composites for a given level 3 image
    query_filters.append(f"jwst.{volume}.proposal_id IN ({propIDs})")
    
    #Setting up constraints and filters for data selected for the CSV
    readout_string = ','.join(readouts)
    filter_string = ' AND '.join(query_filters)
    query_string = f"SELECT {readout_string} FROM jwst.{volume} WHERE ({filter_string})"

    #Pull data formatted by the display amount, volume and filter 
    job = Jwst.launch_job(query_string, async_job=True)
    panda_results = job.get_results().to_pandas()
    #panda_results = result.to_pandas()
    
    #Insert additional columns containing the path to the observation fits and the exposure time range (converted from MJD)
    panda_results['Instrument'] = panda_results['instrument_name'].str.replace('/IMAGE', '', regex=False)
    panda_results['Lower_Time'] = panda_results['time_bounds_lower'].apply(MJDconversion)
    panda_results['Middle_Time'] = [
        MJDconversion(lower + (upper - lower) / 2)
        for lower, upper in zip(panda_results['time_bounds_lower'], panda_results['time_bounds_upper'])
        ]
    panda_results['Upper_Time'] = panda_results['time_bounds_upper'].apply(MJDconversion)

    # Sort and reset index
    panda_results.sort_values(by='observationid', inplace=True)
    panda_results.reset_index(drop=True, inplace=True)

    return(panda_results)

## Check Asteroids Flagged by the JPL Horizons Database (From JWST Refference Frame)

In [11]:
def coneSearch(Exptime, Edge1, Edge2):
    #Cone search method to identify what asteroids are present in the observation at a specific time bound by the image corners
    
    #Convert the exposure time string into the observation time to probe the cone search
    ObsTime = Time(Exptime)
    
    #determine the JWST position at the moment of observation
    jwstLocation= JWSTposition(ObsTime)

    #Apply the small body identification cone search method 'sbid' from https://github.com/bengebre/sbident
    sbid = SBIdent(jwstLocation, ObsTime, [Edge1, Edge2]).results
    
    return(sbid)

In [12]:
def jplHorizonsSearch(targetID, startTime, stopTime, polyString):
    #Search the JPL Horizons data for a specific target to get orbital values. 
    #This method is more accurate then the cone search and provides a double check for asteroids (named from the cone search) existing in the image
    
    #generate polygon variable from the polygon string
    poly = shapely.wkt.loads(polyString)
    
    #define the probe minutes at its lowest setting as to not miss ny additional data
    #Note: there is likely a way to toggle this depending on the length of the exposure time as sometimes the exposure tis very long
    probeMinutes = 1

    #get return from the JPL horizons output regarding the specific target searched
    jpl_output = Horizons(
        id=targetID, 
        location='Geocentric@JWST', 
        epochs={'start': str(startTime), 'stop' : str(stopTime), 'step' : f"{probeMinutes}m"}
    ) 

    #get Ephemerides data
    jpl_pandas = jpl_output.ephemerides().to_pandas()
    
    #pull out the RA and DEC lists marking the position of the asteroid at this time
    asteroid_positions = zip(jpl_pandas['RA'], jpl_pandas['DEC'])
    
    #check to see if there are any times during this that the asteroid intercepts with the imaging window
    return (any(isLocatedInImage(ra, dec, poly) for ra, dec in asteroid_positions))

In [13]:
def asteroidSearch(polyString, ExpStart, ExpMid, ExpEnd):
    #Begin the asteroid search process 

    #convert the format of the polygon string
    poly_string_formatted = formatPolygon(str(polyString))
 
    #define the image bounds from the polygon
    poly_corners = definePolyEdges(poly_string_formatted)
    
    contained_asteroids = []
    
    #apply the small body identification cone search, using the midway exposure time
    #note: unlike the jpl horizons, the sbident cone search only utilizes 1 fixed time
    sbid_middle_results = coneSearch(ExpMid, *poly_corners)
    
    if not sbid_middle_results or len(sbid_middle_results) == 0:
        return ""  # No asteroids found in the cone search
    
    
    #If there are asteroids in the cone search

    sbid_asteroids = sbid_middle_results['Object name']
    
    #determine the unique names in the lists
    unique_asteroids = {
            x.split('(')[-1].replace(')', '') 
            for x in sbid_middle_results['Object name']
    }

    #Now that we have a rough Idea of which asteroids are located in the greater cone search area
    #Apply the more accurate JPL Horizons search to identify asteroids within the image polygon instead of the much larger cone search region 
    contained_asteroids = [
        asteroid for asteroid in unique_asteroids 
        if jplHorizonsSearch(asteroid, ExpStart, ExpEnd, poly_string_formatted)
    ]

    return contained_asteroids if contained_asteroids else ""                                    

## Main Function

In [14]:
def level3asteroidDetection(username, password, save_loc='LVL3_Asteroids', propRange=[1000,2000], instrumentName = 'MIRI/IMAGE', dataType = 'image', calLVL = 3, volume = 'archive', additionalFilters = []):
    """ This function will examine the level 3 calibration JWST images from the archive volume to determine which known asteroids (from JPL Horizons)
    are present within the image bounds. Specific proposals will be checked fro ma provided range """
    #username = JWST archive login info
    #password = JWST archive login info
    #save_loc = name of generated folder to contain results
    ### Filter Search Parameters ###
    #propRange = list containind the starting and ending proposal range to check
    #instrumentName = archive instrument name
    #datatype = archive data type
    #calLVL = archive calibration level
    #volume = what volume to do the archive search on
    #additionalFilters = list of strings containing additional filters to be used in the archive filtering
        
    ### Login to the JWST archive ###
    #Cosmos Account Credentials, can be removed or changed to use another username or password
    with HiddenPrints():
        Jwst.login(user=f"{username}", password=f"{password}")
    
    
    ### Generate Save Location ###
    
    #logging.info('Checking save_loc exists...')
    if not os.path.exists(save_loc):
        os.mkdir(save_loc)
        print(f'Generating New Save Location {save_loc}')
     
    
    ### Apply Initial Query Search ###
       
    lowerbound_proposal, upperbound_proposal = propRange
    
    #apply initial search filters
    initQueryFilters = [
            f'jwst.{volume}.calibrationlevel = {calLVL}',
            f"jwst.{volume}.dataproducttype = '{dataType}'",
            f"jwst.{volume}.instrument_name = '{instrumentName}'",
            f"jwst.{volume}.proposal_id >= '{lowerbound_proposal}'",
            f"jwst.{volume}.proposal_id <= '{upperbound_proposal}'"
        ] + additionalFilters  # Append additional filters if provided
    
    #query the archive
    try:
        with HiddenPrints():
            initArchiveDF = queryArchive(volume, ['proposal_id','observationid'], initQueryFilters)
            propList = initArchiveDF.to_dict("list")['proposal_id']
            uniquePropList = sorted(set(initArchiveDF['proposal_id']))
            
    except Exception as e:
        print("Filter did not comply with Archive query expectations")
        print(e)
        print("Ensure the following format ... 'jwst.{volume}.{filterTitle} {(in)equlity} {filterAmount}'")

    
    if not uniquePropList:
        #print(f"No proposals found in the specified range {propRange}")
        return None
    
    
    ### Loop Through Proposals ###
        
    print(f"Processing proposals: {uniquePropList}")

    #apply important parameters to return and filter by
    queryTopics = ['proposal_id',  'observationid',        'instrument_name',  'energy_bandpassname',
                   'target_moving','position_bounds_spoly','time_bounds_lower','time_bounds_upper']
    
    queryFilters = [
        f'jwst.{volume}.calibrationlevel = {calLVL}',
        f"jwst.{volume}.dataproducttype = '{dataType}'",
        f"jwst.{volume}.instrument_name = '{instrumentName}'"
    ]

    #query the archive for the specific proposals
    with HiddenPrints():
        archiveDF = query4proposals(volume, queryTopics, queryFilters, uniquePropList)


    ### loop through the observations in a specific proposal ###
    """
    asteroidCSV = (
        archiveDF
        .assign(
            Known_Asteroids = archiveDF.progress_apply(
                lambda row: asteroidSearch(polyString = row['position_bounds_spoly'], ExpStart = row['Lower_Time'], ExpMid = row['Middle_Time'], ExpEnd = row['Upper_Time']), axis = 1
            )
        )
    )  
    """
    archiveDF['Known_Asteroids'] = archiveDF.progress_apply(
        lambda row: asteroidSearch(
            polyString=row['position_bounds_spoly'],
            ExpStart=row['Lower_Time'],
            ExpMid=row['Middle_Time'],  
            ExpEnd=row['Upper_Time']
        ), axis=1
    )

    ### Complete and format the CSV and publish the output ###

    asteroidCSV = (
        archiveDF[['proposal_id', 'observationid', 'Instrument', 'energy_bandpassname', 
                     'target_moving', 'position_bounds_spoly', 'Lower_Time', 'Upper_Time', 'Known_Asteroids']]
        .rename(columns={
            'proposal_id': 'Proposal', 
            'observationid': 'Observation', 
            'Instrument': 'Instrument', 
            'energy_bandpassname': 'Filter', 
            'target_moving': 'Moving', 
            'position_bounds_spoly': 'Polygon', 
            'Lower_Time': 'Exp Start', 
            'Upper_Time': 'Exp End', 
            'Known_Asteroids': 'Asteroids'
        })
        .assign(Moving=lambda df: df['Moving'].replace({0: 'No', 1: 'Yes'}))
        .reset_index(drop=True)
    )

    #asteroidCSV = asteroidCSV[['proposal_id', 'observationid','Instrument','energy_bandpassname', 'target_moving','position_bounds_spoly','Lower_Time', 'Upper_Time', 'Known_Asteroids']]
    #asteroidCSV.reset_index(inplace=True, drop=True)
    #asteroidCSV.columns = ['Proposal', 'Observation', 'Instrument', 'Filter', 'Moving', 'Polygon', 'Exp Start', 'Exp End', 'Asteroids']
    #asteroidCSV['Moving'] = asteroidCSV['Moving'].apply(replace_values)

    return(asteroidCSV)

## Run

In [None]:
%%time

# Constants and parameters
username = 'nmartind'
password = 'Mr.Fantastic1999'
propStart = 3000
propEnd = 4000
instrumentName = 'MIRI/IMAGE'
save_loc = 'LVL3_Asteroids'
propStepSize = 10

# List to hold CSV segments
csv_lists = []


current = propStart

while current <= propEnd:
    segment_end = min(current+propStepSize-1,propEnd)
    
    csv_segment = level3asteroidDetection(username, password, propRange = [int(current), int(segment_end)], instrumentName = instrumentName)
    
    if csv_segment is not None and not csv_segment.empty:
        csvName = f'{save_loc}/LVL3_Asteroids_Seg_{current}.csv'
        csv_segment.to_csv(csvName, index=False)
    
    csv_lists.append(csv_segment)
    current += propStepSize
   
if len(csv_lists) > 0:
    combined_csv = pd.concat(csv_lists, ignore_index=True)

    # Construct file name and save the combined CSV
    file_name = f'{save_loc}/LVL3_Asteroids_{instrumentName.split("/")[0]}_{propStart}_{propEnd}.csv'

    print(f'Saving to {file_name}')
    combined_csv.to_csv(file_name, index=False)
    
else:
    print('No data in that range')

Processing proposals: ['3034']


  0%|          | 0/30 [00:00<?, ?it/s]

Processing proposals: ['3050']


  0%|          | 0/5 [00:00<?, ?it/s]

Processing proposals: ['3153']


  0%|          | 0/27 [00:00<?, ?it/s]

Processing proposals: ['3177']


  0%|          | 0/6 [00:00<?, ?it/s]

Processing proposals: ['3195']


  0%|          | 0/8 [00:00<?, ?it/s]

Processing proposals: ['3226', '3228']


  0%|          | 0/33 [00:00<?, ?it/s]

Processing proposals: ['3271']


  0%|          | 0/6 [00:00<?, ?it/s]

Processing proposals: ['3295']


  0%|          | 0/81 [00:00<?, ?it/s]

Processing proposals: ['3368']


  0%|          | 0/153 [00:00<?, ?it/s]

Processing proposals: ['3384']


  0%|          | 0/5 [00:00<?, ?it/s]

Processing proposals: ['3429']


  0%|          | 0/28 [00:00<?, ?it/s]

Processing proposals: ['3435', '3436']


  0%|          | 0/38 [00:00<?, ?it/s]

Processing proposals: ['3445', '3449']


  0%|          | 0/27 [00:00<?, ?it/s]

Processing proposals: ['3477']


  0%|          | 0/6 [00:00<?, ?it/s]

Processing proposals: ['3523']


  0%|          | 0/6 [00:00<?, ?it/s]

Processing proposals: ['3533', '3535']


  0%|          | 0/27 [00:00<?, ?it/s]

Processing proposals: ['3547']


  0%|          | 0/1 [00:00<?, ?it/s]

Processing proposals: ['3558']


  0%|          | 0/3 [00:00<?, ?it/s]

Processing proposals: ['3571']


  0%|          | 0/20 [00:00<?, ?it/s]

Processing proposals: ['3621', '3629']


  0%|          | 0/9 [00:00<?, ?it/s]

Processing proposals: ['3671']


  0%|          | 0/18 [00:00<?, ?it/s]

Processing proposals: ['3696']


  0%|          | 0/24 [00:00<?, ?it/s]

Processing proposals: ['3730', '3738']


  0%|          | 0/8 [00:00<?, ?it/s]

Processing proposals: ['3743']


  0%|          | 0/15 [00:00<?, ?it/s]

Processing proposals: ['3760']


  0%|          | 0/5 [00:00<?, ?it/s]

Processing proposals: ['3772']


  0%|          | 0/4 [00:00<?, ?it/s]

Processing proposals: ['3786']


  0%|          | 0/18 [00:00<?, ?it/s]

Processing proposals: ['3794']


  0%|          | 0/97 [00:00<?, ?it/s]

In [23]:
%%time

# Constants and parameters
username = 'nmartind'
password = 'Mr.Fantastic1999'
propStart = 5000
propEnd = 6000
instrumentName = 'MIRI/IMAGE'
save_loc = 'LVL3_Asteroids'
propStepSize = 10

# List to hold CSV segments
csv_lists = []


current = propStart

while current <= propEnd:
    segment_end = min(current+propStepSize-1,propEnd)
    
    csv_segment = level3asteroidDetection(username, password, propRange = [int(current), int(segment_end)], instrumentName = instrumentName)
    
    if csv_segment is not None and not csv_segment.empty:
        csvName = f'{save_loc}/LVL3_Asteroids_Seg_{current}.csv'
        csv_segment.to_csv(csvName, index=False)
    
    csv_lists.append(csv_segment)
    current += propStepSize
   
if len(csv_lists) > 0:
    combined_csv = pd.concat(csv_lists, ignore_index=True)

    # Construct file name and save the combined CSV
    file_name = f'{save_loc}/LVL3_Asteroids_{instrumentName.split("/")[0]}_{propStart}_{propEnd}.csv'

    print(f'Saving to {file_name}')
    combined_csv.to_csv(file_name, index=False)
    
else:
    print('No data in that range')

Processing proposals: ['5014']


  0%|          | 0/9 [00:00<?, ?it/s]

Processing proposals: ['5105']


  0%|          | 0/3 [00:00<?, ?it/s]

Processing proposals: ['5114']


  0%|          | 0/15 [00:00<?, ?it/s]

Processing proposals: ['5204']


  0%|          | 0/7 [00:00<?, ?it/s]

Processing proposals: ['5279']


  0%|          | 0/6 [00:00<?, ?it/s]

Processing proposals: ['5299']


  0%|          | 0/6 [00:00<?, ?it/s]

Processing proposals: ['5365']


  0%|          | 0/3 [00:00<?, ?it/s]

Processing proposals: ['5407']


  0%|          | 0/26 [00:00<?, ?it/s]

Processing proposals: ['5451']


  0%|          | 0/6 [00:00<?, ?it/s]

Processing proposals: ['5578']


  0%|          | 0/2 [00:00<?, ?it/s]

Processing proposals: ['5627']


  0%|          | 0/6 [00:00<?, ?it/s]

Processing proposals: ['5709']


  0%|          | 0/6 [00:00<?, ?it/s]

Processing proposals: ['5842']


  0%|          | 0/3 [00:00<?, ?it/s]

Saving to LVL3_Asteroids/LVL3_Asteroids_MIRI_5000_6000.csv
CPU times: user 14.8 s, sys: 697 ms, total: 15.5 s
Wall time: 1h 15min 52s


In [20]:
#print