# Generate Asteroid CSV for Calibration Level 2 Data

This pipeline iterates through observations within the MIRI database to identify serendipitously detected asteroids. To optimize computational efficiency, it first scans the Level 3 database using the SBIDENT cone search method. Next, the pipeline reviews corresponding Level 2 member observations to avoid dithering effects that can reduce asteroid flux measurements. A final list of detected asteroids is generated, incorporating all relevant observations. Asteroids that do not meet the threshold criteria for accurate flux measurement are then excluded in the 'Asteroid_Analysis.csv' to identify usable flux measurments.  

## Import Libraries

In [2]:
import time
import requests
import json
import re
import os
import sys
import logging
import glob

import shapely.wkt
from shapely.geometry import Point

from PIL import Image
from sbident import SBIdent

import numpy as np
import math as mt
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Circle

from astropy.io import fits
from astropy.wcs import WCS
from astropy.coordinates import SkyCoord
from astropy.time import Time
import astropy.units as u
from astropy.visualization import simple_norm
from datetime import datetime, timedelta

from astroquery.jplhorizons import Horizons
from astroquery.esa.jwst import Jwst

from scipy.ndimage import label

from tqdm.notebook import tqdm
tqdm.pandas()

import warnings
warnings.filterwarnings('ignore')

## Useful Functions

In [5]:
def is_in_array(pixel, array):
    return any((pixel == x).all() for x in array)

In [6]:
def MJDconversion(modifiedJulianDate):
    # Convert string from Modified Julian Date to YYYMMDD [HH:MM:SS.SS] format
    return (Time(modifiedJulianDate, format='mjd').iso)

In [8]:
def combine_strings(string_list):
    # Convert a list of strings into a single string comma seperated
    return ', '.join(string_list)

In [3]:
def generateFolder(folderName):
    # Check if folder exists, and if not generates a folder
    if not os.path.exists(folderName):
        os.mkdir(folderName)

In [11]:
def members_string(members_init_string):
    # Convert 'members' string from the archive query into a more presentable fashion of level 3 CSV
    members_string = members_init_string.replace('caom:JWST/', '').replace(' ',', ')
    return (members_string)

In [7]:
def checkDataExists(proposal, observation):
    # Check if the observation is on Datalabs
    jwst_file = f"jw0{next(c for c in proposal if c != '0')}"
    dataPath = f'/data/user/jwst_{jwst_file}/jw0{proposal}/{observation}_i2d.fits.gz'    
    return (os.path.exists(dataPath))        

In [4]:
def pullWCS(imagePath):
    # Open fits image, retireving header information (including WCS) and image array
    with fits.open(imagePath) as hdul:
        main_header = hdul[0].header
        header = hdul[1].header
        data = hdul[1].data
        wcs_info = WCS(header)
    
    return(data, header, wcs_info, main_header)

In [10]:
class HiddenPrints:
    # Does not produce print outputs, used for built in functions with noisy print statmeents
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

In [15]:
def formatPolygon(polyString):
    # Format the archive polygon string to a format that is compatible with the shapely function
    #slice away the polygon charactors 'polygon((' from the start and '))' from the end
    coords = polyString[8:-2].split(' ')
    
    #Add in the fist location at the end to close the 4 point region, shapely expects 5 coordinates
    coords.append(coords[0])
    coords.append(coords[1])
    
    return f"POLYGON (({', '.join([coords[i] + ' ' + coords[i+1] for i in range(0, len(coords), 2)])}))"

## Archive Query Functions

In [17]:
def queryArchive(volume, readouts, query_filters):
    # Generate an adql query search for the JWST archive to filter the observations and produce a pandas DF containing all useful information
    
    # Set up constraints and filters for data selected for the archive to return
    query_string = f"SELECT {','.join(readouts)} FROM jwst.{volume} WHERE {' AND '.join(query_filters)}"
    
    # Run job and convert archive results to a pandas dataframe 
    job = Jwst.launch_job(query_string, async_job=True)
    panda_result = job.get_results().to_pandas()
        
    #Sort the dataframe by the proposal ID
    return panda_result.sort_values(by=['observationid']).reset_index(drop=True)

In [18]:
def level3_Archive_Query(propRange = [1000,9000], instrumentName = 'MIRI/IMAGE', dataType = 'image', volume = 'archive', additionalFilters = []):
    # Define the filters and outputs fro the ADQL query
    
    lowerbound_proposal, upperbound_proposal = propRange
    
    calLVL = 3
    
    # Define query filters
    query_filters = [
            f'jwst.{volume}.calibrationlevel = {calLVL}',
            f"jwst.{volume}.dataproducttype = '{dataType}'",
            f"jwst.{volume}.instrument_name = '{instrumentName}'",
            f"jwst.{volume}.proposal_id >= '{lowerbound_proposal}'",
            f"jwst.{volume}.proposal_id <= '{upperbound_proposal}'",
        ] + additionalFilters  # Append additional filters if provided
    
    # Define which archive outputs of interest
    query_topics = ['proposal_id',  'observationid', 'dataproducttype', 'intent',  'instrument_name',  'energy_bandpassname',
                   'target_moving','position_bounds_spoly','time_bounds_lower','time_bounds_upper','members']
    
    # Run the search, return a dataframe with results
    with HiddenPrints():
        level3_ArchiveDF = queryArchive(volume, query_topics, query_filters)
    
    return (level3_ArchiveDF)

In [19]:
def level2_Archive_Query(observation_list, instrumentName = 'MIRI/IMAGE', dataType = 'image', volume = 'archive', additionalFilters = []):
        
    # Join observations into a string list to search only those observations in the archive
    observation_list_string = ', '.join([f"'{obsid}'" for obsid in observation_list])
    
    calLVL = 2
    
    # Define query filters
    query_filters = [
            f'jwst.{volume}.observationid IN ({observation_list_string})',
            f'jwst.{volume}.calibrationlevel = {calLVL}',
            f"jwst.{volume}.dataproducttype = '{dataType}'",
            f"jwst.{volume}.instrument_name = '{instrumentName}'"
        ] + additionalFilters  # Append additional filters if provided
    
    # Define which archive outputs of interest
    query_topics = ['proposal_id',  'observationid', 'dataproducttype', 'intent',  'instrument_name',  'energy_bandpassname',
                   'target_moving','target_name','position_bounds_spoly','time_bounds_lower','time_bounds_upper']
    
    #Run the search, return a dataframe with results
    with HiddenPrints():
        level2_ArchiveDF = queryArchive(volume, query_topics, query_filters)
    
    return(level2_ArchiveDF)

## Small Body Identification Cone Search

In [9]:
def definePolyEdges(polyString):
    # Define the image boundaries for the cone search
    # NOTE: the cone search takes RA and DEC bounds so image bounds are aligned with the DEC and RA axis (using the min,max bounds)
    
    # Deconstruct the polygon string into its RA and DEC coordinates 
    coordinates = polyString.replace("POLYGON ((", "").replace("))", "").replace(", ", " ").split()
    coordinates = list(map(float, coordinates))
    
    RA_elements = coordinates[::2]
    DEC_elements = coordinates[1::2]
    
    # Add in a small buffer around the image edges
    buffer = 0.005  #Deg

    # Identify the max/min boundary range of the image by identifying 2 corners of the image
    low_right_corner = SkyCoord(min(RA_elements) - buffer, min(DEC_elements) - buffer, frame='icrs', unit='deg')
    up_left_corner   = SkyCoord(max(RA_elements) + buffer, max(DEC_elements) + buffer, frame='icrs', unit='deg')
    
    return([low_right_corner, up_left_corner])   

In [16]:
def expProbeTime(expStartMJD, expEndMJD):
    # For longer exposure times, define probe times to cone search for streaking asteroids
    probe_time = (1/3)/24 #20 minutes
    
    # Check how many 20 minute segments are in the exposure time
    probe = expStartMJD  + probe_time
    
    if (expEndMJD - expStartMJD) > probe:
        probe_list = [expStartMJD]
        
        while probe < expEndMJD:
            probe_list.append(MJDconversion(probe + probe_time))
            probe += probe_time
        
    else:
        # Choose the center of the exposure time
        probe_list = [MJDconversion(expStartMJD + (expEndMJD - expStartMJD)/2)]
        
    # Return a list containing all of the 20 minute times during the exposure time to be cone searched over
    return (probe_list)

In [29]:
def JWSTposition(obsTime):
    # Determine the position of JWST during the observation time from an Earth perspective
    # Follows example 3 from https://github.com/bengebre/sbident/blob/main/examples/sbident-examples.ipynb 
    
    #NOTE: it is likely that this value can be pulled from the image header in the future
    
    # Generate AU to km conversion
    au_to_km = (1 * u.au).to(u.km).value
    
    # Probe for the JWST output from JPL Horizons, state vector
    jwst_output = Horizons(id='JWST',location='Geocentric',epochs=obsTime.jd, id_type='id').vectors(refplane='earth')

    # Convert position and velocity from AU to km and km/s respectively
    jwst_output_km = jwst_output[['x', 'y', 'z', 'vx', 'vy', 'vz']].to_pandas().to_numpy()
    jwst_output_km[:, :3] *= au_to_km  # Convert position (x, y, z) from AU to km
    jwst_output_km[:, 3:] /= 86400     # Convert velocity (vx, vy, vz) from AU/day to km/s

    # Form the xobs dictionary that is the input for SBIdent location argument
    xobs = ','.join([f"{s:.12e}" for s in jwst_output_km[0]])
    return {'xobs': xobs}

In [30]:
def coneSearch(Exptime, Edge1, Edge2):
    # Apply cone search method to identify what asteroids are present in the observation at a specific time (exp time) bound by the image corners (ra, dec)
    # NOTE: unlike JPL Horizons, the sbident cone search only utilizes 1 fixed time
    
    # Convert the exposure time string into the observation time to probe the cone search
    ObsTime = Time(Exptime)
    
    # Determine the JWST position at the moment of observation
    jwstLocation = JWSTposition(ObsTime)

    # Apply the small body identification cone search method 'sbid' from https://github.com/bengebre/sbident
    try:
        sbid = SBIdent(jwstLocation, ObsTime, [Edge1, Edge2]).results
        
    except Exception as e:
        # Some times the connection gets interrupted and needs to be reran
        logging.info("Failed First Try in SBIDENT")
        logging.info(e)
        time.sleep(5)
        
        # Try again
        try:
            sbid = SBIdent(jwstLocation, ObsTime, [Edge1, Edge2]).results
        except Exception as e:
            logging.info("Failed Second Try in SBIDENT")
            logging.info(e)
            sbid = False
    
    # If the return sbid is an empty list convert it to a False boolean
    if isinstance(sbid, list) and not sbid:
        logging.info("SBIDENT Output was empty")
        sbid = False
    
    return(sbid)

## Level 3 Asteroid Detection

In [31]:
def level3_asteroid_names(expStartMJD, expEndMJD, polyStringFormatted):
    # Return all the asteroid names within the level 3 image conesearch
    
    # Define the image bounds from the polygon
    poly_corners = definePolyEdges(polyStringFormatted)
    
    # Define the probe time for individual searches
    probeList = expProbeTime(expStartMJD, expEndMJD)
    
    asteroid_names = []
    
    # Loop over the probe times within an image and return all asteroid names from the conesearch
    for probe in probeList:
        sbid_middle_results = coneSearch(probe, *poly_corners)
        
        if sbid_middle_results:
            asteroid_names.append(sbid_middle_results['Object name']) 
        else:
            # No asteroids found in the cone search
            pass
    
    # Find only uniqely contained Asteroids
    unique_asteroids = list(set(item.split('(')[-1].replace(')', '') for sublist in asteroid_names for item in sublist))

    if len(unique_asteroids) > 0:
        return(', '.join([f"{ast}" for ast in unique_asteroids]))

    else:
        # No Asteroids found
        return('')   

In [32]:
def level3_asteroid_search(row):
    #Begin the asteroid search process 
    
    proposal = row['proposal_id']
    observation = row['observationid']
    polygonString = row['position_bounds_spoly']
    expStartMJD = row['time_bounds_lower']
    expEndMJD = row['time_bounds_upper']

    #convert the format of the polygon string
    poly_string_formatted = formatPolygon(str(polygonString))
    
    #perform the initial Cone Search
    sbident_asteroids = level3_asteroid_names(expStartMJD, expEndMJD, poly_string_formatted)
        
    return(sbident_asteroids)

## JPL Horizons Comparitive Search

In [33]:
def jplHorizonsSearch(targetID, startTime, stopTime, polyString):
    #Search the JPL Horizons data for a specific target to get orbital values. 
    #This method is more accurate then the cone search and provides a double check for asteroids (named from the cone search) existing in the image
    
    #generate polygon variable from the polygon string
    poly_shape = shapely.wkt.loads(polyString)
    
    #define the probe minutes at its lowest setting as to not miss ny additional data
    #Note: there is likely a way to toggle this depending on the length of the exposure time as sometimes the exposure tis very long

    probeMinutes = 1
    #get return from the JPL horizons output regarding the specific target searched
    try:
        jpl_output = Horizons(
            id=targetID, 
            location='Geocentric@JWST', 
            epochs={'start': str(startTime), 'stop' : str(stopTime), 'step' : f"{probeMinutes}m"})
        
        #get Ephemerides data
        jpl_pandas = jpl_output.ephemerides().to_pandas()
        
    except Exception as e:
        logging.info(f"JPL Horizons Failed to retrieve data for {targetID}, Trying again")
        time.sleep(30)
        if targetID == '81P/Wild 2':
            targetID = '90000862'
        
        try:           
            jpl_output = Horizons(
            id=targetID, 
            id_type='smallbody',
            location='Geocentric@JWST', 
            epochs={'start': str(startTime), 'stop' : str(stopTime), 'step' : f"{probeMinutes}m"}) 
            
            #get Ephemerides data
            jpl_pandas = jpl_output.ephemerides().to_pandas()
            
        except Exception as e:
            logging.info(f"JPL Horizons Failed Second Try for {targetID}")
            logging.info(e)
            return(False, False)     
    
    #check to see if there are any times during this that the asteroid intercepts with the imaging window
    return (any(poly_shape.contains(Point(ra, dec)) for ra, dec in zip(jpl_pandas['RA'], jpl_pandas['DEC'])), jpl_pandas)

## Level 2 Asteroid Detection

In [34]:
def find_asteroid_center(imageData, image_parameters, jpl_x, jpl_y, uncertainty, image_filter):
    # Find the center of the asteroid
    
    filters_psf = { "F560W":  [3.87,  10.78, 18.39, 1.436],
                    "F770W":  [4.22,  8.92,  14.64, 1.442],
                    "F1000W": [4.60,  6.70,  11.58, 1.453],
                    "F1130W": [4.92,  7.06,  11.22, 1.465],
                    "F1280W": [5.18,  7.61,  10.99, 1.483],
                    "F1500W": [5.69,  8.63,  11.45, 1.497],
                    "F1800W": [6.29,  10.09, 12.52, 1.506],
                    "F2100W": [7.91,  11.90, 16.07, 1.492],
                    "F2550W": [9.18,  14.03, 17.71, 1.506],
                    "FND":    [5.27,  8.98,  13.74, 1.455],
                    "F1065C": [10.07, 17.77, 24.80, 1.464],
                    "F1140C": [10.68, 19.10, 27.06, 1.461],
                    "F1550C": [14.21, 25.52, 36.77, 1.459],
                    "F2300C": [13.27, 16.84, 27.25, 1.470]}

    apertureRange, innerAnnulus, outerAnnulus, correctionFactor = filters_psf[image_filter]
    
    
    if uncertainty < 2:
        #asteroid is close, requiring a small window to search
        extra_pixels = 14
        
    else:
        #asteroid is far, requiring a bigger window to search
        extra_pixels = 25
    
    current_pixel_units, sr_to_pix_conversion, arcsec_to_pix_conversion = image_parameters
    
    # Crop the image near the asteroid
    rows, cols = imageData.shape

    # Calculate crop boundaries with bounds checking using numpy.clip and additional buffer pixels
    col_start, col_end = np.clip([int(min(jpl_x) - extra_pixels), int(max(jpl_x) + extra_pixels)], 0, cols - 1)
    row_start, row_end = np.clip([int(min(jpl_y) - extra_pixels), int(max(jpl_y) + extra_pixels)], 0, rows - 1)

    # Extract cropped region
    cropped_image = imageData[row_start:row_end, col_start:col_end]

    # Adjust X and Y to the cropped image coordinate system
    adjusted_asteroid_X = jpl_x - col_start
    adjusted_asteroid_Y = jpl_y - row_start

    if len(adjusted_asteroid_X) > 1:
        dx = abs(adjusted_asteroid_X[-1] - adjusted_asteroid_X[0])
        dy = abs(adjusted_asteroid_Y[-1] - adjusted_asteroid_Y[0])
        streak_distance_in_pix = mt.sqrt(dx**2 + dy**2)
        streak_distance_in_arcsec = streak_distance_in_pix * arcsec_to_pix_conversion
        
        if streak_distance_in_pix <= 0.9*apertureRange*2:
            streakFlag = ''
        else:
            streakFlag = 'Yes'

    else:
        streak_distance_in_pix = 'Point'
        streakFlag = ''
            

    # Find the center of the asteroid       
    image_shape = cropped_image.shape    
        
    if len(adjusted_asteroid_X) > 1:
        path_difference_ra =  abs(adjusted_asteroid_X[0] - adjusted_asteroid_X[-1])
        path_difference_dec = abs(adjusted_asteroid_Y[0] - adjusted_asteroid_Y[-1])
            
        mid_index = len(adjusted_asteroid_X) // 2
        jpl_center_location = [adjusted_asteroid_X[mid_index], adjusted_asteroid_Y[mid_index]]
        
    else:
        jpl_center_location = [adjusted_asteroid_X[0], adjusted_asteroid_Y[0]]

        
    #Find the median brightness region of the image
    bright_center = locate_bright_region(cropped_image, desired_percentile = 99)

    #replace any nan values that are connected to the edge of the image to the average of the image as to not skew center location
    new_image = replace_nan_edges(cropped_image)
        
    #select center finding method
    if uncertainty < 2:       
        #refine the center guess using the starting point from the brightness center
        midway_point = ((jpl_center_location[0] + bright_center[0]) / 2, (jpl_center_location[1] + bright_center[1]) / 2)
        refined_from_midpoint = refine_center(new_image, midway_point, radius = 12)
        
    else:    
        #refine the center guess using the starting point from the brightness center  
        midway_point = ((jpl_center_location[0] + bright_center[0]) / 2, (jpl_center_location[1] + bright_center[1]) / 2)
        refined_from_midpoint = refine_center(new_image, midway_point)
    
    # Calculate the location on the original image
    x_center_original = refined_from_midpoint[0] + col_start
    y_center_original = refined_from_midpoint[1] + row_start
    
    center_on_original = np.array([x_center_original,y_center_original])
    
    # Find difference between the JPL center and the one calculated    
    center_dx = abs(jpl_center_location[0] - list(refined_from_midpoint)[0])
    center_dy = abs(jpl_center_location[1] - list(refined_from_midpoint)[1])
    center_distance_in_pix = mt.sqrt(center_dx**2 + center_dy**2)
    center_distance_in_sr = center_distance_in_pix * sr_to_pix_conversion
    
    return(refined_from_midpoint, center_on_original, streak_distance_in_pix, center_distance_in_sr, streakFlag)

In [35]:
def level2_asteroid_names(expStart, expEnd, filter_name, poly_string_formatted, proposal, observation, asteroid_list, folder_path, produce_images):
    
    contained_asteroid_names = []
    contained_asteroid_ephem = []
    
    containedAsteroidX = []
    containedAsteroidY = []
    
    asteroids_missing_datalabs = []
    asteroids_missing_detector = []
    
    usefulAsteroids = False
    
    #check if data exists
    jwst_file = f"jw0{next(c for c in proposal if c != '0')}"
    dataPath = f'/data/user/jwst_{jwst_file}/jw0{proposal}/{observation}_i2d.fits.gz'
    
    image_on_datalabs = checkDataExists(proposal, observation)
            
    if image_on_datalabs:
        imageData, header, WCS, main_header = pullWCS(dataPath)

        for asteroid in asteroid_list:
            containedCheck, asteroid_ephem = jplHorizonsSearch(asteroid, expStart, expEnd, poly_string_formatted)

            if containedCheck:
                asteroid_ra_positions = asteroid_ephem['RA']
                asteroid_dec_positions = asteroid_ephem['DEC']

                #convert RA and DEC to pixel locations
                asteroidPixelX, asteroidPixelY = WCS.all_world2pix(asteroid_ra_positions, asteroid_dec_positions, 0)

                #perform the check to ensure the asteroid is on the detector region and will return a useful value
                onDetector = onDetectorcheck(imageData, asteroidPixelX, asteroidPixelY)

                if onDetector:
                    contained_asteroid_names.append(asteroid)
                    containedAsteroidX.append(asteroidPixelX)
                    containedAsteroidY.append(asteroidPixelY)
                    contained_asteroid_ephem.append(asteroid_ephem)
                    usefulAsteroids = True

                else:
                    asteroids_missing_detector.append(asteroid)                
        
    else:
        logging.info(f"ERROR: Image Data Not Found {dataPath}")
            
        
    if usefulAsteroids:
        if produce_images:
            generateFolder(f"{folder_path}")
        #Determine Image Information
        
        #PHOTUJA2 = float(header['PHOTUJA2']) #  Flux density (uJy/arcsec2) producing 1 cps
        PIXAR_A2 = float(header['PIXAR_A2']) #  Nominal pixel area in arcsec^2
        PHOTMJSR = float(header['PHOTMJSR'])  #  Flux density (MJy/steradian) producing 1 cps
        PIXAR_SR = float(header['PIXAR_SR'])  #  Nominal pixel area in steradians  
        BUNIT    = header['BUNIT']            #  Pixel Units
        XPOSURE = float(header['XPOSURE'])    #  Effective exposure time [s]
        BITPIX = header['BITPIX']             #  Array data type
        
        READOUT = main_header.get('READPATT')                                      # Readout Pattern
        BRIGHTSKY = f"{main_header.get('SUBSIZE1')} {main_header.get('SUBSIZE2')}" # Pixel Array
        DITHERTYPE = main_header.get('PATTTYPE')                                   # Dithering Pattern
        SUBARRAY = main_header.get('SUBARRAY')                                     # Sub Array Type
        
        
        # Determine Asteroid Specific Values
        alpha_values = []
        sun_to_asteroid_values = []
        asteroid_to_jwst_values = []
        position_values = []
        position_uncertainty_values = []
        position_Rates = []
        Visual_Mag = []
        Surface_Brightnesses = []
        
        asteroid_class_list = []
        asteroid_radius_list = []
        asteroid_albedo_list = []
        asteroid_short_name_list = []
        
        distance_pix = []
        distance_arcsec_over_exptime = []
        
        aperture_flux_mJy_list = []
        flux_error_list = []
        annulus_median_ratio_list = []
        signal_to_noise_list = []
        center_offset_sr_list = []
        ap_nans = []
        ann_nans = []
        streak_flags = []
        
        
        for asteroid_ephem in contained_asteroid_ephem:
            alpha_values.append(f'{round(np.mean(asteroid_ephem["alpha"]),4)}')                       # Sun-Asteroid-JWST Angle (Deg)
            sun_to_asteroid_values.append(f'{round(np.mean(asteroid_ephem["r"]),4)}')                 # Heliocentric Distance of the Asteroid (AU)
            asteroid_to_jwst_values.append(f'{round(np.mean(asteroid_ephem["delta"]),4)}')            # Distance from the Asteroid to JWST (AU) 
            position_uncertainty_values.append(f'{round(np.mean(asteroid_ephem["RSS_3sigma"]),4)}')   # Root-Sum-Square of 3-Sigma Uncertainty
            
            midpoint = len(asteroid_ephem['RA']) // 2
            position_values.append([asteroid_ephem['RA'][midpoint], asteroid_ephem['DEC'][midpoint]])
            position_Rates.append([asteroid_ephem['RA_rate'][midpoint], asteroid_ephem['DEC_rate'][midpoint]])
            Visual_Mag.append(str(asteroid_ephem['V'][midpoint]))
            Surface_Brightnesses.append(str(asteroid_ephem['surfbright'][midpoint]) if '<NA>' not in str(asteroid_ephem['surfbright'][midpoint]) else '-')
            
            motion =np.sqrt(float(asteroid_ephem['RA_rate'][midpoint])**2 + float(asteroid_ephem['DEC_rate'][midpoint])**2) * (XPOSURE/60)/60
            distance_arcsec_over_exptime.append(f'{round(motion, 4) if not isinstance(motion, str) else motion}')
            
            
        
        for indx in range(len(contained_asteroid_names)):
            asteroid_name = contained_asteroid_names[indx]
            classification, radius, albedo, short_name = asteroid_physical(asteroid_name)
            asteroid_class_list.append(classification)
            asteroid_short_name_list.append(short_name)
            asteroid_radius_list.append(radius)
            asteroid_albedo_list.append(albedo)
        
            Image_parameters = [BUNIT, PIXAR_SR, PIXAR_A2]
        
            # Find the center of the asteroid
            asteroid_center_crop, asteroid_center_original, streak_distance_in_pix, center_offset_sr, streakFlag = find_asteroid_center(imageData, Image_parameters, containedAsteroidX[indx], 
                                                                                                                           containedAsteroidY[indx], float(position_uncertainty_values[indx]), filter_name)  
            #Run Flux Calculation
            aperture_flux, flux_error, annulus_median_ratio, signal2noise, cropped_image, pixel_offsets, ap_NAN_percent, ann_NAN_percent = Flux_Calculation(filter_name, Image_parameters, imageData, asteroid_center_original, 
                                                                                                               asteroid_name, observation, XPOSURE, folder_path, produce_images)
            
            adjusted_asteroid_X = containedAsteroidX[indx] - pixel_offsets[0]
            adjusted_asteroid_Y = containedAsteroidY[indx] - pixel_offsets[1]
            
            ann_nans.append(str(round(ann_NAN_percent,2)))
            ap_nans.append(str(round(ap_NAN_percent,2)))
            
            distance_pix.append(f'{round(streak_distance_in_pix, 4) if not isinstance(streak_distance_in_pix, str) else streak_distance_in_pix}')
                
            streak_flags.append(streakFlag)
            
            aperture_flux_mJy_list.append(aperture_flux)
            flux_error_list.append(flux_error)
            annulus_median_ratio_list.append(f'{round(annulus_median_ratio,4)}') 
            signal_to_noise_list.append(f'{round(signal2noise,2)}')
            center_offset_sr_list.append(f'{round(center_offset_sr*1e13,4)}')
            
            if produce_images:
                #highContrastImage(cropped_image, asteroid_name, folder_path, observation)
                highContrastImageWithOverlay(cropped_image, asteroid_name, adjusted_asteroid_X, adjusted_asteroid_Y, folder_path, observation)
        
            
        if produce_images:
            produceOverlayImage( imageData, WCS, observation, contained_asteroid_names, containedAsteroidX, containedAsteroidY, folder_path)
            #produceOriginalImage(imageData, WCS, observation, folder_path)

        
        results = [(round(XPOSURE/60,4)), combine_strings(contained_asteroid_names), combine_strings(asteroid_short_name_list), combine_strings(asteroid_class_list), combine_strings(asteroid_radius_list),
                    combine_strings(asteroid_albedo_list), combine_strings(alpha_values), combine_strings(sun_to_asteroid_values), combine_strings(asteroid_to_jwst_values),
                    ', '.join(f'({x}, {y})' for x, y in position_values), ', '.join(f'({x}, {y})' for x, y in position_Rates), combine_strings(position_uncertainty_values),READOUT, SUBARRAY, BITPIX, DITHERTYPE, BRIGHTSKY, 
                    BUNIT, f'{round(PHOTMJSR,4)}', float(f"{PIXAR_A2:.4g}"), combine_strings(Visual_Mag), combine_strings(Surface_Brightnesses), combine_strings(aperture_flux_mJy_list), combine_strings(flux_error_list), combine_strings(annulus_median_ratio_list), 
                   combine_strings(signal_to_noise_list), combine_strings(distance_pix), combine_strings(distance_arcsec_over_exptime), combine_strings(center_offset_sr_list), combine_strings(ap_nans), combine_strings(ann_nans),
                   combine_strings(streak_flags)]

        return(results)       

    else:
        return(['' for _ in range(24)])

In [36]:
def level2_asteroid_search(row, Proposal_Asteroid_Dict, produce_images, folderName):
    #Begin the asteroid search process 
    
    proposal =      row['proposal_id']
    observation =   row['observationid']
    polygonString = row['position_bounds_spoly']
    expStartMJD =   row['time_bounds_lower']
    expEndMJD =     row['time_bounds_upper']
    filter_name =   row['energy_bandpassname']
    
    Asteroid_List = Proposal_Asteroid_Dict[proposal]
    
    #convert the format of the polygon string
    poly_string_formatted = formatPolygon(str(polygonString))
    
    #Convert Exp Time to proper Format
    expStart = (MJDconversion(expStartMJD))
    expEnd   = (MJDconversion(expEndMJD))
        
    #path
    propPath = f'{folderName}/{proposal}'
        
    results = level2_asteroid_names(expStart, expEnd, filter_name, poly_string_formatted, proposal, observation, Asteroid_List, propPath, produce_images)

    return(results)

## On Detector Check

In [14]:
def onDetectorCheck(imageData, X, Y):
    # Ensures that there is measurable data from the pixels corresponding to the JPL asteroid positions
    
    # Convert coordinates to integers (floor them)
    X_int = np.floor(X).astype(int)
    Y_int = np.floor(Y).astype(int)

    # Find which coordinates are within the bounds of the image
    valid_coords = (X_int >= 0) & (X_int < imageData.shape[1]) & (Y_int >= 0) & (Y_int < imageData.shape[0])

    # Get pixel values at the valid coordinates
    probe_values = imageData[Y_int[valid_coords], X_int[valid_coords]]

    # Check if any of the valid pixel values are non-zero and not NaN
    if np.any((probe_values != 0) & ~np.isnan(probe_values)):
        return True

    # Additional checks at a distance of 3 pixels
    offsets = [(5,5), (5,-5), (-5,5), (-5,-5)]  # +5/-5 in X and Y directions
    for dx, dy in offsets:
        # Adjust coordinates
        X_offset = X_int + dx
        Y_offset = Y_int + dy

        # Ensure the new coordinates are within bounds
        valid_coords_offset = (X_offset >= 0) & (X_offset < imageData.shape[1]) & (Y_offset >= 0) & (Y_offset < imageData.shape[0])

        # Get pixel values for valid offset coordinates
        offset_values = imageData[Y_offset[valid_coords_offset], X_offset[valid_coords_offset]]

        # Check if any of the offset pixel values are non-zero and not NaN
        if np.any((offset_values != 0) & ~np.isnan(offset_values)):
            return True

    # If no valid pixel values are found in the initial or offset tests
    return False

## Retrieve Asteroid Parameters

In [24]:
def asteroid_physical(asteroid_name):  
    # Pull the ASteroids physical parameters from the online api
    
    # Default values for radius and albedo
    # NOTE: these are not always documented on JPL Horizons
    radius = '-'
    albedo = '-'
    classification = 'NA'
    short_name = '-'
    
    # Retrieve asteroid classification
    try:
        # Make the request to the API
        url = "https://ssd-api.jpl.nasa.gov/sbdb.api"
        data = {'sstr': asteroid_name}
        response = requests.get(url, params=data)

        # Parse the JSON string into a Python dictionary
        parsed_data = json.loads(response.text)

        # Extract the classification term
        classification = parsed_data["object"]["orbit_class"]["name"]
        if "shortname" in parsed_data["object"]:
            short_name = parsed_data["object"]["shortname"]
        
    except:
        logging.info(f"Could not pull api information for {asteroid_name}")
        pass

    # Retrieve asteroid physical parameters (if they are known)
    # NOTE: at this time the method for obtaining this is not ideal as it uses an error readout 
    # it is not known how to retrieve this data from the horizons output otherwise
    try:
        jpl_output = Horizons(id=asteroid_name, location='500')
        element = jpl_output.elements().items()
    except Exception as e:
        error_message= str(e)
    # Search for RAD and ALBEDO in the string
    radius_match = re.search(r'RAD=\s*([0-9.]+)', error_message)
    albedo_match = re.search(r'ALBEDO=\s*([0-9.]+)', error_message)
    
    # If found, assign the values to radius and albedo
    if radius_match:
        radius = radius_match.group(1)
    if albedo_match:
        albedo = albedo_match.group(1)
    
    return (classification, radius, albedo, short_name)

## Detecting The Asteroid Center

In [13]:
def get_pixel_value(array, pixels):
    # Return statistics on the pixel values for the corresponding coordinates
    pixel_values = np.array(array)[pixels[:, 0], pixels[:, 1]]
    valid_pixel_values = pixel_values[~np.isnan(pixel_values)]
    
    # Count NaN values
    total_pixels = len(pixel_values)
    nan_count = np.isnan(pixel_values).sum()
    nan_percentage = (nan_count / total_pixels) * 100 if total_pixels > 0 else 100
    
    total = np.sum(valid_pixel_values)
    amount = len(valid_pixel_values)
    average = total / amount if amount > 0 else float('nan')
    
    return total, amount, average, nan_percentage

In [12]:
def get_pixels_within_radius(array_size, center, radius):
    # Determine which pixels are within the provided distance from some central target
    
    # Adjust radius to remove partially enclosed pixels (looking for pixels that are fully contained
    # NOTE: these measurments are from the center of the pixel 
    adjusted_radius = radius - 0.4 #pixels
    
    x, y = np.meshgrid(np.arange(array_size[1]), np.arange(array_size[0]))
    distances = np.sqrt((x - center[0])**2 + (y - center[1])**2)
    pixels_within_radius = np.column_stack(np.where(distances <= adjusted_radius))
    
    return pixels_within_radius

In [25]:
def locate_bright_region(array, desired_percentile = 98):
    # Determine the average location of pixels within the array that are within the top 98% of pixel values
    
    flattened_array = array.flatten()

    # Determine bright pixels
    custom_percentile_value = np.nanpercentile(flattened_array, desired_percentile)

    # Create a mask where values are above the custom percentile, ignoring NaNs
    mask = array >= custom_percentile_value

    # Find the coordinates of the thresholded values
    y, x = np.where(mask)

    # Calculate the mean coordinates
    xmean = np.mean(x)
    ymean = np.mean(y)
    
    return([xmean, ymean])

In [26]:
def replace_nan_edges(image):
    # Replace all nan value pixels within an image crop to the average of the image
    # NOTE: this is only used for determining the center of the asteroid

    modified_image = image.copy()
    
    # Create a boolean mask of NaN values
    nan_mask = np.isnan(image)
    
    # Create a mask for border-connected NaNs
    border_mask = np.zeros_like(image, dtype=bool)
    
    # Set border-connected NaNs to True
    border_mask[:, 0]  = nan_mask[:, 0]   # Left border
    border_mask[:, -1] = nan_mask[:, -1]  # Right border
    border_mask[0, :]  = nan_mask[0, :]   # Top border
    border_mask[-1, :] = nan_mask[-1, :]  # Bottom border
    
    # Propagate the border-connected mask to all connected NaN regions, using labels
    labeled, num_features = label(nan_mask)  
    border_connected_labels = np.unique(labeled[border_mask])  
    
    # Create a mask for border-connected NaNs
    border_connected_nans = np.isin(labeled, border_connected_labels)
    
    # Replace these NaNs with average pixel in the copied image
    modified_image[border_connected_nans] = np.nanmean(image)
    
    return modified_image

In [27]:
def refine_center(array, initial_center, max_iter=50, radius=20, min_radius=4, convergence_threshold=1e-3, thresh_perc = 80):
   
    center = np.array(initial_center)

    for iteration in range(max_iter):
        # Restrict search to a circular region
        x, y = np.meshgrid(np.arange(array.shape[1]), np.arange(array.shape[0]))

        # Find the distance between all pixels and the center
        distances = np.sqrt((x - center[0])**2 + (y - center[1])**2)

        # Dynamically adjust the mask size based on iteration
        current_radius = max(radius * (1 - iteration / max_iter), min_radius)
        
        # Only interested in pixels within the radius
        mask_dist = (distances <= current_radius)
        
        # Handle NaNs: Replace NaN values in the array with 0 for calculations
        masked_array_dist = np.nan_to_num(array[mask_dist], nan=0)
        
        # Only interested in the brightest 80% of pixels
        flattened_array = masked_array_dist.flatten()

        # Determine bright pixels
        intensity_threshold = np.nanpercentile(flattened_array, thresh_perc)
    
        # Create a mask for pixels within the radius and above the intensity threshold
        mask = (distances <= current_radius) & (array >= intensity_threshold)

        # Handle NaNs: Replace NaN values in the array with 0 for calculations
        masked_array = np.nan_to_num(array[mask], nan=0)

        # Recompute weighted centroid within this region
        total_intensity = np.sum(masked_array)

        if total_intensity == 0:  # To avoid division by zero
            #Intensity == 0 at iteration {iteration}. Exiting.
            break

        x_center = np.sum(x[mask] * masked_array) / total_intensity
        y_center = np.sum(y[mask] * masked_array) / total_intensity

        new_center = np.array([x_center, y_center])

        # Check for convergence
        if np.allclose(new_center, center, atol=convergence_threshold * (iteration + 1) / max_iter):
            #Converged at iteration
            break

        # Update the center for the next iteration
        center = new_center

    return center

## Determine Asteroid Flux

In [28]:
def Flux_Calculation(image_filter, image_parameters, image_data, center, asteroid_name, observation, exposureTimeSeconds, folder_path, produce_image, extra_pixels = 15):
    # Crop the image near the asteroid

    filters_psf = { "F560W":  [3.87,  10.78, 18.39, 1.436],
                    "F770W":  [4.22,  8.92,  14.64, 1.442],
                    "F1000W": [4.60,  6.70,  11.58, 1.453],
                    "F1130W": [4.92,  7.06,  11.22, 1.465],
                    "F1280W": [5.18,  7.61,  10.99, 1.483],
                    "F1500W": [5.69,  8.63,  11.45, 1.497],
                    "F1800W": [6.29,  10.09, 12.52, 1.506],
                    "F2100W": [7.91,  11.90, 16.07, 1.492],
                    "F2550W": [9.18,  14.03, 17.71, 1.506],
                    "FND":    [5.27,  8.98,  13.74, 1.455],
                    "F1065C": [10.07, 17.77, 24.80, 1.464],
                    "F1140C": [10.68, 19.10, 27.06, 1.461],
                    "F1550C": [14.21, 25.52, 36.77, 1.459],
                    "F2300C": [13.27, 16.84, 27.25, 1.470]}
    
    miri_sensativity = {"F560W":  0.13, #microJansky
                        "F770W":  0.24,
                        "F1000W": 0.46,
                        "F1130W": 1.02,
                        "F1280W": 0.83,
                        "F1500W": 1.18,
                        "F1800W": 2.42,
                        "F2100W": 4.70,
                        "F2550W": 15.3,}

    apertureRange, innerAnnulus, outerAnnulus, correctionFactor = filters_psf[image_filter]
    
    rows, cols = image_data.shape

    # Calculate crop boundaries with bounds checking using numpy.clip and additional buffer pixels
    col_start, col_end = np.clip([int(center[0] - outerAnnulus - extra_pixels), int(center[0] + outerAnnulus + extra_pixels)], 0, cols - 1)
    row_start, row_end = np.clip([int(center[1] - outerAnnulus - extra_pixels), int(center[1] + outerAnnulus + extra_pixels)], 0, rows - 1)

    # Extract cropped region
    cropped_image = image_data[row_start:row_end, col_start:col_end]
    
    center_crop = np.array([center[0] - col_start, center[1] - row_start])

    current_pixel_units, sr_to_pix_conversion, arcsec_to_pix_conversion = image_parameters
    
    image_shape = cropped_image.shape

    #add 0.4 to the inner annulus radius to remove partial pixels
    pixels_in_aperture =      get_pixels_within_radius(image_shape, center_crop, apertureRange)
    pixels_in_inner_annulus = get_pixels_within_radius(image_shape, center_crop, innerAnnulus + 0.4)
    pixels_in_outer_annulus = get_pixels_within_radius(image_shape, center_crop, outerAnnulus)
    
    #pixels_in_annulus = [unique for unique in pixels_in_outer_annulus if not is_in_array(unique, pixels_in_inner_annulus)]
    pixels_in_annulus = np.array([unique for unique in pixels_in_outer_annulus if not is_in_array(unique, pixels_in_inner_annulus)])
    
    pixel_aperture_sumtotal, pixel_aperture_count, pixel_aperture_average, ap_NAN_percent  = get_pixel_value(cropped_image, pixels_in_aperture)
    pixel_annulus_sumtotal,  pixel_annulus_count,  pixel_annulus_average, ann_NAN_percent  = get_pixel_value(cropped_image, pixels_in_annulus)
    
    #Determine the average background value per pixel
    anulus_average_MJy = (float(pixel_annulus_sumtotal) * float(sr_to_pix_conversion)) / float(pixel_annulus_count)
    
    #subtract the backgound amount (background/pixel * aperture pixel amount) from the total aperture amount
    aperture_MJy = (float(pixel_aperture_sumtotal) * float(sr_to_pix_conversion)) - (float(anulus_average_MJy) * float(pixel_aperture_count))
    
    #Convert the units from MJy to mJy since they are faint (liekly)
    aperture_mJy =  aperture_MJy * 1e9

    annulus_average = pixel_annulus_average * float(sr_to_pix_conversion) * 1e9
    image_median = np.nanmedian(cropped_image) * float(sr_to_pix_conversion) * 1e9
    
    
    if pixel_annulus_count == 0 or pixel_annulus_sumtotal == 0 or pixel_aperture_count == 0:
        signal_to_noise_ratio = 0
    else:
        if image_filter in miri_sensativity:
            flux = aperture_mJy
            exposureTime = exposureTimeSeconds
            miriSensativity = miri_sensativity[image_filter]
            signal_to_noise_ratio = 10*((flux*1000)/miriSensativity)*np.sqrt(exposureTime/10000)
                        
        else:                     
            noise_in_aperture = (float(anulus_average_MJy) * float(pixel_aperture_count))
            sky_subtracted_source_signal = (float(pixel_aperture_sumtotal) * float(sr_to_pix_conversion)) - noise_in_aperture
            signal_to_noise_ratio = sky_subtracted_source_signal / noise_in_aperture
    
    if produce_image:
        # Visualization
        fig, ax = plt.subplots()
        # Array with Estimated Center
        cax = ax.imshow(cropped_image)
        ax.scatter([center_crop[0]], [center_crop[1]], c='white', s=30, label='Estimated Center')

        circle1 = Circle((center_crop[0], center_crop[1]), apertureRange, edgecolor='tab:green', facecolor='none', linewidth=4, label=f"Aperture")
        circle2 = Circle((center_crop[0], center_crop[1]), innerAnnulus,  edgecolor='tab:red',   facecolor='none', linewidth=4, label=f"Annulus")
        circle3 = Circle((center_crop[0], center_crop[1]), outerAnnulus,  edgecolor='tab:red',   facecolor='none', linewidth=4)

        # Add the circle to the plot
        ax.add_patch(circle1)
        ax.add_patch(circle2)
        ax.add_patch(circle3)

        ax.scatter([], [], c='green', marker = 's')
        ax.scatter([], [], c='red', marker = 's')
        ax.set_title(f"Asteroid {asteroid_name}, Filter: {image_filter}, Flux: {round(aperture_mJy * correctionFactor,4)}mJy, SNR: {round(signal_to_noise_ratio,2)}")
        ax.legend()
        plt.gca().invert_yaxis()

        cbar = plt.colorbar(cax)
        cbar.set_label(f'Intensity ({current_pixel_units})', rotation=270, labelpad=15)
        
        plt.savefig(f"{folder_path}/FluxRegion_{observation}_{asteroid_name}.png", bbox_inches='tight')
        #plt.show()
        plt.close()

    
    flux_output = f"{round(aperture_mJy * correctionFactor,4)}"
    
    if signal_to_noise_ratio == 0:
        flux_error = "100.0"
    elif signal_to_noise_ratio >= 20:
        flux_error = "5.0"
    else:
        flux_error = str(round(100 / float(signal_to_noise_ratio), 1)) 
    
    return(flux_output, flux_error, annulus_average/image_median, signal_to_noise_ratio, cropped_image, [col_start, row_start], ap_NAN_percent, ann_NAN_percent)

## Output Image Generation

In [20]:
def produceOriginalImage(imageData, WCS, obsID, filePath):
    # Produce and save the original full observation image
    
    output_file_path = f'{filePath}/Original_{obsID}.png'

    fig, ax = plt.subplots(figsize=(10, 10), subplot_kw={'projection': WCS})
    ax.set_xlabel('RA')
    ax.set_ylabel('DEC')
    ax.set_title(f'{obsID},  i2d Image')

    # Normalize image data for better presentation
    norm = simple_norm(imageData, 'sqrt')
    ax.imshow(imageData, cmap='viridis', norm=norm)

    plt.savefig(output_file_path, bbox_inches='tight')
    plt.close(fig)

In [21]:
def produceOverlayImage(imageData, WCS, obsID, asteroidList, Xlist, Ylist, filePath):
    # Produce and save the full observation image with identification region for known asteroids, and cutout subplots
    
    output_file_path = f'{filePath}/Overlay_{obsID}.png'
    
    # Parameters for subplot size
    subplot_width = 7  # Width of each subplot
    subplot_height = 8  # Height of each subplot
    n_subplots = len(asteroidList) + 1  # Number of subplots

    # Calculate the total figure size
    fig_width = subplot_width * n_subplots
    fig_height = subplot_height

    
    # Set figure size based on the number of asteroids, each getting their own subplot
    fig, axes = plt.subplots(1, n_subplots, figsize=(fig_width, fig_height), subplot_kw={'projection': WCS})
    
    # First subplot is for the full observation image with bounding box around asteroid
    ax1 = axes[0]  
    ax1.set_xlabel('RA')
    ax1.set_ylabel('DEC')
    ax1.set_title(f'{obsID},  i2d Image')

    # Normalize image data for better presentation
    norm = simple_norm(imageData, 'sqrt')
    ax1.imshow(imageData, cmap='viridis', norm=norm, aspect='auto', zorder=1)

    # Define asteroid-specific colors for plotting, these must contrast the background well (max = 13 asteorids in an image)
    colors = ['orangered', 'maroon', 'firebrick', 'goldenrod', 'tomato', 'chocolate', 
              'tab:red', 'tab:orange', 'red', 'orange',  'black', 'yellow', 'blueviolet']

    # Loop through asteroids and overlay positions, each getting their own subplot cutout and overlay
    for indx, asteroid in enumerate(asteroidList):
        ax_ = axes[indx + 1] 
        ax_.set_xlabel('RA')
        ax_.set_ylabel('DEC')
        ax_.set_title(f'{obsID} Asteroid {asteroid}')
        

        # Define X and Y positions of the enclosed Asteroids
        X = Xlist[indx]
        Y = Ylist[indx]
        
        # Add buffer space around the asteroid position
        extraPixels = 20  

        # Get positions of the asteroid bounds for crop region
        if len(X) > 1:
            P1 = [X[0], Y[0]]
            P2 = [X[-1], Y[-1]]
        else:
            P1 = [X[0], Y[0]]
            P2 = [X[0], Y[0]]

        # Define the default bounds with extra space 
        lowerx = round(min(P1[0], P2[0])) - extraPixels
        upperx = round(max(P1[0], P2[0])) + extraPixels
        lowery = round(min(P1[1], P2[1])) - extraPixels
        uppery = round(max(P1[1], P2[1])) + extraPixels
        
        rows, cols = imageData.shape

        # Calculate crop boundaries with bounds checking using numpy.clip and additional buffer pixels
        col_start, col_end = np.clip([lowerx, upperx], 0, cols - 1)
        row_start, row_end = np.clip([lowery, uppery], 0, rows - 1)
        
        # Extract cropped region
        cropped_image = imageData[row_start:row_end, col_start:col_end]
        
        # Normalize image data for better presentation
        ax_.imshow(cropped_image, cmap='viridis', zorder=1)

        # Plot the bounding box and asteroid positions
        ax1.plot([lowerx, upperx, upperx, lowerx, lowerx], 
                 [lowery, lowery, uppery, uppery, lowery], 
                 alpha=0.7, c=colors[indx], linewidth=2, zorder=2, label=asteroid)
        
        ax_.scatter(X - col_start, Y - row_start, s=15, alpha=0.8, c=colors[indx], zorder=2, label='JPL Coordinates')
        #ax_.set_xlim(lowerx, upperx)
        #ax_.set_ylim(lowery, uppery)
        ax_.legend()

    ax1.legend()
    plt.tight_layout()

    plt.savefig(output_file_path, bbox_inches='tight')
    plt.close()

In [22]:
def highContrastImage(cropped_image, asteroidName, filePath, observation):
    # Plot the high-contrast zoomed image to assist in asteroid visual identification
    
    output_file_path = f'{filePath}/HighContrastZoom_{observation}_{asteroidName}.png'
    
    plt.figure(figsize=(12, 12)) 
    plt.title(f"{observation},  Cutout of Asteroid {asteroidName}")
    plt.xlabel('Pixel Columns')
    plt.ylabel('Pixel Rows')

    # Normalize image data for better presentation
    cax = plt.imshow(cropped_image, cmap='viridis')
    plt.colorbar(cax)
    plt.gca().invert_yaxis()

    plt.savefig(output_file_path, bbox_inches='tight')
    plt.close()

In [23]:
def highContrastImageWithOverlay(cropped_image, asteroidName, X,Y, filePath, observation):
    # Plot the high-contrast zoomed image to assist in asteroid visual identification, with asteroid overlay
    
    output_file_path = f'{filePath}/HighContrastZoomOverlay_{observation}_{asteroidName}.png'
    
    plt.figure(figsize=(12, 12)) 
    plt.title(f"{observation},  Cutout of Asteroid {asteroidName}")
    plt.xlabel('Pixel Columns')
    plt.ylabel('Pixel Rows')

    # Normalize image data for better presentation
    cax = plt.imshow(cropped_image, cmap='viridis')
    
    # Overlay the Asteroid probe points (JPL), and a directional arrow for multiple points
    if len(X) > 1:
        
        direction_x, direction_y = X[-1] - X[0], Y[-1] - Y[0]
        distance = np.hypot(direction_x, direction_y)

        unit_x, unit_y = direction_x / distance, direction_y / distance
        start_x, start_y = X[0] - unit_x * distance * 0.4, Y[0]  - unit_y * distance * 0.4
        end_x, end_y = X[-1] + unit_x * distance * 0.7, Y[-1] + unit_y * distance * 0.7

        # Add JPL probe coordinates 
        plt.scatter(X, Y, color='orange', alpha=0.7, label='JPL Coordinates')
        
        # Draw a faint arrow pointing through the data points
        plt.arrow(start_x, start_y, end_x - start_x, end_y - start_y,
                  color='orange', alpha=0.3, width=0.8, head_width=1.5, head_length=1.6)
        
    else:
        # Plot only the JPL Coordinate
        plt.scatter(X, Y, color='orange', alpha=0.5, label='JPL Coordinates')
       
    plt.colorbar(cax)
    plt.gca().invert_yaxis()
    plt.legend()

    plt.savefig(output_file_path, bbox_inches='tight')
    plt.close()

## Main Function

In [40]:
def main(propRange, instrumentName, dataType, volume, additionalFilters=[], folderName = 'Results', level3_CSV_Input_Path = '', produce_image = True):
    
    """
    OUTPUTS:
    Level 3:
            2 Level 3 Asteroid Detection CSV 
                    1, Full Documentation on all observations
                    2, Documentation on ONLY Observations containing Known-Asteroids
    
    Level 2:
            2 Level 2 Asteroid Detection CSV 
                    1, Full Documentation on all observations
                    2, Documentation on ONLY Observations containing Known-Asteroids
                    
            Folders named after proposals containing Known-Asteroids
                    Plot of Observation
                    Plot of Observation with Known-Asteroid Overlay
                    
                    Plot of Observation Cropped around Known-Asteroid
                    Plot of Observation Cropped around Known-Asteroid with Overlay
                    
                    Plot of Flux Region Determined (Aperture and Annulus)
    """ 
    
    # Main function to run the JWST Known-Asteroid Detection
    if '/' in level3_CSV_Input_Path:
        folder_path = os.path.dirname(level3_CSV_Input_Path)
    else:
        folder_path = './'

    current_time = datetime.now()
    timestamp = current_time.strftime("%Y%m%d_%H%M%S")
    log_filename = f"{folder_path}/Asteroid_Detection_log_{timestamp}.txt"

    # Configure logging to write to a file
    logging.basicConfig(
        filename=log_filename,  # Log file
        level=logging.INFO,     # Logging level
        force=True,             #prevent overwriting data
        format="%(message)s"    # Log message format
    )

    # Log messages (these will go to the file only)
    logging.info(f"This run was performed at {timestamp}\n")
    
    # Generate the output folder for the results
    generateFolder(folderName)           
            
    ##### ----- LEVEL 3 ----- #####   
    
    # If the Level 3 CSV has already been generated then dont rerun it
    if level3_CSV_Input_Path == '':
        print('Performing Level 3 Search')
        start_time = time.time()
        logging.info('Performing Level 3 Search')
        # Perform an archive search for level 3 observations meeting the qualifications defined in the main function
        Level_3_Archive_DF = level3_Archive_Query(propRange, instrumentName, dataType, volume, additionalFilters)
        
        if Level_3_Archive_DF.empty:
            print(f"Archive does not contain observations in the range {propRange}")
            logging.info(f'Archive does not contain observations in the range {propRange}')
            return
        
        # Itterate through the level 3 archive data and check images for asteroids
        Level_3_Archive_DF['Asteroids'] = Level_3_Archive_DF.progress_apply(lambda row: pd.Series(level3_asteroid_search(row)), axis=1)

        # Prepare Data Frame for CSV presentation
        Level_3_Archive_CSV = (
            Level_3_Archive_DF.rename(columns={
                'proposal_id': 'Proposal', 
                'observationid': 'Observation', 
                'dataproducttype': 'Data Type',
                'intent': 'Intent',
                'instrument_name': 'Instrument',
                'energy_bandpassname': 'Filter',
                'target_moving': 'Moving', 
                'position_bounds_spoly': 'Polygon Boundary', 
                'time_bounds_lower': 'Exposure Start', 
                'time_bounds_upper': 'Exposure End',
                'members': 'Level 2 Members'
            })

            .assign(**{
                'Exposure Start': lambda df: df['Exposure Start'].apply(MJDconversion),                                     # Convert from MJD to 'yyyy-mm-dd HH:MM:SS'
                'Exposure End': lambda df: df['Exposure End'].apply(MJDconversion),                                         # Convert from MJD to 'yyyy-mm-dd HH:MM:SS'
                'Moving': lambda df: df['Moving'].replace({0: 'No', 1: 'Yes'}),                                             # Adds Yes/No if the observation is 'moving'
                'Datalabs': lambda df: df.apply(lambda row: checkDataExists(row['Proposal'], row['Observation']),axis=1),   # Add 'data_exists' column tracking if the img is on Datalabs
                'Level 2 Members': lambda df: df['Level 2 Members'].apply(members_string)})                                 # Configure the 'members' format
            .sort_values(by='Observation', ascending=True)                                                                  # Sort by Observation ID
            .reset_index(drop=True))                                                                                        # Reset the index

        # Save CSV
        Level_3_Archive_CSV.to_csv(f"{folderName}/Level3_Asteroid_Search_Full.csv",index=False)

        # Reduce dataframe to only observations with asteroids, save CSV
        Level_3_Archive_CSV_Only_Asteroids = Level_3_Archive_CSV[Level_3_Archive_CSV['Asteroids'].str.contains(r'[a-zA-Z]', na=False)].copy()
        Level_3_Archive_CSV_Only_Asteroids.to_csv(f"{folderName}/Level3_Asteroid_Search.csv",index=False)
        
        logging.info(f'Finished the Level 3 Search:')
        logging.info(f'From {len(Level_3_Archive_CSV)} Searched Observations')
        logging.info(f'{len(Level_3_Archive_CSV_Only_Asteroids)} Observations contained atleast 1 Asteroid')
        logging.info(f'This took {round((time.time()- start_time)/60)} minutes\n')
           
    else:
        print(f'Pulling Level 3 Data from CSV:  "{level3_CSV_Input_Path}"')
        logging.info(f'Pulling Level 3 Data from CSV:  "{level3_CSV_Input_Path}"')
        
        # Convert the CSV into a dataframe
        Level_3_Archive_CSV = pd.read_csv(level3_CSV_Input_Path)
        
        # Reduce dataframe to only observations with asteroids
        Level_3_Archive_CSV_Only_Asteroids = Level_3_Archive_CSV[Level_3_Archive_CSV['Asteroids'].str.contains(r'[a-zA-Z]', na=False)].copy()
        
        logging.info(f'From {len(level3_CSV_Input_Path)} Searched Observations')
        logging.info(f'{len(Level_3_Archive_CSV_Only_Asteroids)} Observations contained atleast 1 Asteroid\n')
        logging.info('----------------------------------------------------------------------------/n')
        
        
        
    ##### ----- LEVEL 2 ----- #####
    

    print('Performing Level 2 Search')
    logging.info('Performing Level 2 Search\n')
    # Prepare Members for Level 2 inspection
    Level_3_Archive_CSV_Only_Asteroids['Level 2 Members'] = Level_3_Archive_CSV_Only_Asteroids['Level 2 Members'].str.split(', ')

    Members_DF = Level_3_Archive_CSV_Only_Asteroids.explode('Level 2 Members').reset_index(drop=True)
    Members_DF = Members_DF[['Proposal','Level 2 Members']]
                     
    #convert proposals to numericals
    Members_DF['Proposal'] = pd.to_numeric(Members_DF['Proposal'], errors='coerce')
    
    Members_Range_DF = Members_DF[(Members_DF['Proposal'] >= propRange[0]) & (Members_DF['Proposal'] <= propRange[1])]

    if Members_Range_DF.empty:
        print(f"CSV does not contain observations in the range {propRange}")
        logging.info(f'CSV does not contain observations in the range {propRange}\n')
        return
 
    # Remove duplicate entries for 'members' and 'Proposal'
    Unique_Members_DF = Members_Range_DF.drop_duplicates().reset_index(drop=True)
    Unique_Members_List = Unique_Members_DF['Level 2 Members'].tolist()

    # Query the archive data for the level 2 member observations
    Level_2_Archive_DF = level2_Archive_Query(Unique_Members_List, instrumentName, dataType, volume, additionalFilters)
        
    # Generate a dictionary relating proposal ID to asteroids found in level 3 cone search
    Proposal_Asteroid_Dict = {}
                     
    start_time2 = time.time()

    for _, row in Level_3_Archive_CSV_Only_Asteroids.iterrows():
        proposal = str(row['Proposal'])
        asteroids = [asteroid.strip() for asteroid in str(row['Asteroids']).split(', ') if asteroid.strip()]
        if proposal not in Proposal_Asteroid_Dict:
            # Initialize as a list if the key doesn't exist
            Proposal_Asteroid_Dict[proposal] = []  
        Proposal_Asteroid_Dict[proposal].extend(asteroids)

    # Have the asteroids contained be unique
    Proposal_Asteroid_Dict = {key: list(set(value)) for key, value in Proposal_Asteroid_Dict.items()}

    # Itterate through the level 2 archive data and check images for asteroids    
    Level_2_Archive_DF[['Exposure Length (m)', 'Asteroids', 'Common Name', 'JPL Classification', 'JPL Radius (km)', 'JPL Geo Albedo', 
                        'JPL Phase Angle (Deg)', 'JPL Sun-Asteroid Dist (AU)','JPL Asteroid-JWST Dist (AU)', 'JPL Midpoint (RA,DEC)', 
                        'JPL Pos Rates (arcsec/hr)', 'JPL Pos Uncertainty (arcsec)','Readout Mode','Sub Array Configuation','Array Data Type','Dither Type', 
                        'BrightSky Pix Numbers','Pixel Units','Flux Density (MJy/sr)','Pixel Area (arcsec2/Pix)','JPL Visual Magnitude (mag)',
                        'JPL Surface Brightness (mag/arcsec2)','Asteroid Flux (mJy)', 'Flux Error %' ,'Annulus / Img Median', 'S/N', 'JPL Asteroid Distance (pix)',
                        'JPL Asteroid Distance (arcsec/exptime)','JPL Center Offset (arcsec)','Aperture NAN Perc','Annulus NAN Perc', 
                        'Streak Flag']] = Level_2_Archive_DF.progress_apply(lambda row: pd.Series(level2_asteroid_search(row, Proposal_Asteroid_Dict, produce_image, folderName)), axis=1)
    
    
    # Prepare Data Frame for CSV presentation
    Level_2_Archive_CSV = (
        Level_2_Archive_DF.rename(columns={
            'proposal_id': 'Proposal', 
            'observationid': 'Observation', 
            'dataproducttype': 'Data Type',
            'intent': 'Intent',
            'instrument_name': 'Instrument',
            'energy_bandpassname': 'Filter', 
            'target_name': 'Target Name',
            'target_moving': 'Target Moving', 
            'position_bounds_spoly': 'Polygon Boundary', 
            'time_bounds_lower': 'Exposure Start', 
            'time_bounds_upper': 'Exposure End',
        })
        
        .assign(**{
            'Exposure Start': lambda df: df['Exposure Start'].apply(MJDconversion),                                     # Convert from MJD to 'yyyy-mm-dd HH:MM:SS'
            'Exposure End': lambda df: df['Exposure End'].apply(MJDconversion),                                         # Convert from MJD to 'yyyy-mm-dd HH:MM:SS'
            'Target Moving': lambda df: df['Target Moving'].replace({0: 'No', 1: 'Yes'}),                                             # Adds Yes/No if the observation is 'moving'
            'Datalabs': lambda df: df.apply(lambda row: checkDataExists(row['Proposal'], row['Observation']),axis=1)})  # Add 'data_exists' column based on checkDataExists
        .sort_values(by='Observation', ascending=True)                                                                  # Sort by Observation
        .reset_index(drop=True))                                                                                   # Reset the index
    
    # Save CSV
    Level_2_Archive_CSV.to_csv(f"{folderName}/Level2_Asteroid_Search_Full.csv",index=False)

    # Reduce dataframe to only observations with asteroids, save CSV
    Level_2_Archive_CSV_Only_Asteroids = Level_2_Archive_CSV[Level_2_Archive_CSV['Asteroids'].str.strip().astype(bool)].copy()
    Level_2_Archive_CSV_Only_Asteroids.to_csv(f"{folderName}/Level2_Asteroid_Search.csv",index=False)
    
    logging.info(f'Finished the Level 2 Search:')
    logging.info(f'From {len(Level_2_Archive_CSV)} Searched Observations')
    logging.info(f'{len(Level_2_Archive_CSV_Only_Asteroids)} Observations contained atleast 1 Asteroid')
    logging.info(f'This took {(time.time() - start_time2)/60} minutes\n')
    logging.info(f'For more statistics run "Asteroid_Analysis" using this level 2 output')


## Run The Pipeline

In [43]:
%%time

# Constants and parameters
starting_proposal = 1000
ending_proposal = 7000

instrument = 'MIRI/IMAGE'
data_type = 'image'
volume = 'archive'
folder_name = 'Results'

proposal_range = [starting_proposal,ending_proposal]

main(proposal_range, instrument, data_type, volume, folderName=folder_name, level3_CSV_Input_Path = 'Results/Level3_New_Obs.csv', produce_image = True)

#Kernel keeps restarting at 1177, 1161
# check the uncertainty in position for far and close (1727 streak accidentally picks up a different source)2015 XK95

Pulling Level 3 Data from CSV:  "Results/Level3_New_Obs.csv"
Performing Level 2 Search


  0%|          | 0/135 [00:00<?, ?it/s]

CPU times: user 1min 7s, sys: 12.4 s, total: 1min 20s
Wall time: 15min 20s
