In [118]:
# Asteroid Analysis

The Asteroid Analysis code performs advanced evaluation on data from the Level 3 and 'Level2_Asteroid_Search.csv'. It applies thresholds on key metrics such as asteroid streaks, Signal-to-Noise Ratio (SNR), and other factors to identify reliable flux measurements. Observations are classified as 'accurate' if they meet the required thresholds, while 'potential' observations are flagged for further processing.

Additionally, the code analyzes asteroid distribution and frequency, identifying trends such as the most frequently observed asteroids, their ecliptic positions, filter usage, and observation timelines. These results are visualized in graphics, which are saved to the Analysis folder.

In [55]:
import pandas as pd
import ast
from collections import Counter
from datetime import datetime
import matplotlib.pyplot as plt
from adjustText import adjust_text
import numpy as np
import re
from mpl_toolkits.mplot3d import Axes3D
from astroquery.esa.jwst import Jwst
from astropy.coordinates import SkyCoord
import astropy.units as u
from astropy.time import Time
import sys
import os
import textwrap
import shutil

## Utility Functions

In [57]:
def extract_filter_number(filter_value):
        match = re.search(r'F(\d+)W', filter_value)
        return int(match.group(1)) if match else None

In [58]:
def convert_ra_dec_to_galactic(ra, dec):
    sky_coord = SkyCoord(ra=ra * u.deg, dec=dec * u.deg, frame='icrs')
    galactic_coord = sky_coord.galactic
    return galactic_coord.l.deg, galactic_coord.b.deg  # Return Galactic (l, b)

In [61]:
def generateFolder(folderName):
    # Check if folder exists, and if not generates a folder
    if not os.path.exists(folderName):
        os.mkdir(folderName)

In [None]:
def is_within_ecliptic_range(ra, dec, ecliptic_lat_range):
    #Check if a given RA/Dec is within a certain ecliptic latitude range.

    ecl_lon, ecl_lat = convert_ra_dec_to_ecliptic(ra, dec)
    
    return ecliptic_lat_range[0] <= ecl_lat <= ecliptic_lat_range[1]

In [56]:
class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

In [59]:
def ecliptic_distance(ra, dec):
    
    # Convert RA/Dec to SkyCoord in ICRS frame
    sky_coord = SkyCoord(ra=ra * u.deg, dec=dec * u.deg, frame='icrs')
    
    # Convert to ecliptic coordinates
    ecliptic_coord = sky_coord.barycentrictrueecliptic
    
    # Absolute ecliptic latitude = ecliptic distance
    return (ecliptic_coord.lat.deg)

In [None]:
def convert_ra_dec_to_ecliptic(ra, dec):
    #Convert RA and Dec (ICRS) to ecliptic latitude and longitude.
    
    # Create a SkyCoord object in ICRS frame
    sky_coord = SkyCoord(ra=ra*u.deg, dec=dec*u.deg, frame='icrs')
    
    # Convert to ecliptic coordinates
    ecliptic_coord = sky_coord.barycentrictrueecliptic
    
    # Return ecliptic longitude and latitude
    return ecliptic_coord.lon.deg, ecliptic_coord.lat.deg

## Determine Which Observations Contain Asteroids

In [74]:
def filter_DF(DF_, ecliptic_range, prop_Start, prop_End):
    
    propFilteredDF = DF_[(DF_['Proposal'] >= prop_Start) & (DF_['Proposal'] <= prop_End)]
    
    asteroidContainedDF = propFilteredDF[propFilteredDF['Asteroids'].str.contains(r'[a-zA-Z]', na=False)].copy()
    
    if ecliptic_range:
        # Parse the position_bounds_spoly column to extract RA and Dec values
        polygons = asteroidContainedDF['Polygon Boundary'].astype(str).str.replace('Polygon ', '', regex=False)
        raList = polygons.apply(lambda x: [float(val) for val in x.split()[::2]])
        decList = polygons.apply(lambda x: [float(val) for val in x.split()[1::2]])

        # Check if each entry is within the ecliptic range
        inEcliptic = [is_within_ecliptic_range(np.mean(ra), np.mean(dec), ecliptic_lat_range=(-ecliptic_range, ecliptic_range)) for ra, dec in zip(raList, decList)]

        # Filter the DataFrame to include only rows within the ecliptic
        filteredDF = asteroidContainedDF[inEcliptic].copy()
        
        return(filteredDF)
    
    else:
        return(asteroidContainedDF)

In [77]:
def AsteroidObsFrequency(DF_1,prop_start, prop_end):
    #determine the amount of asteroid obervations in the previous CSV

    #check for the total amount of rows in the dataframe
    totalRowCount = len(DF_1)
    
    #Find all rows that contain a letter (ie not nan) which indicates an asteroid was found
    asteroidRows = DF_1['Asteroids'].str.contains(r'[a-zA-Z]', na=False)
    
    #Find sum of those rows
    asteroidRowsTotal = asteroidRows.sum()
    
    # Count the total number of individual asteroids
    totalAsteroids = DF_1.loc[asteroidRows, 'Asteroids'].str.split(', ').apply(len).sum()

    #find the unique proposals
    uniqueProposalTotal = DF_1['Proposal'].nunique()
    
    return totalRowCount, asteroidRowsTotal, totalAsteroids, uniqueProposalTotal,

## Compare Asteroid Observation to Total Archive within an Ecliptic Range

In [75]:
def totalObservationCount(calLevel, moving, prop_start, prop_end, eclipticCheck = False):
    # Define query parameters
    queryTopics = ['proposal_id', 'position_bounds_spoly']
    queryFilters = [
        f'jwst.archive.calibrationlevel = {calLevel}',
        f"jwst.archive.dataproducttype = 'image'",
        f"jwst.archive.proposal_id <= '{prop_end}'",
        f"jwst.archive.proposal_id >= '{prop_start}'",
        f"jwst.archive.instrument_name = 'MIRI/IMAGE'"
    ]
    if moving != 2:
        queryFilters.append(f'jwst.archive.target_moving = {moving}')
    
    # Construct query string
    readout_string = ', '.join(queryTopics)
    filter_string = ' AND '.join(queryFilters)
    query_string = f"SELECT {readout_string} FROM jwst.archive WHERE ({filter_string})"
    
    # Execute query
    print(query_string)
    job = Jwst.launch_job(query_string, async_job=True)
    result = job.get_results()
    df = result.to_pandas().sort_values(by='proposal_id').reset_index(drop=True)
    
    # Process results
    propList = df['proposal_id'].tolist()
    uniqueProposalTotal = df['proposal_id'].nunique()
    
    if eclipticCheck:
        polygons = df['position_bounds_spoly'].astype(str).str.replace('Polygon ', '', regex=False)
        raList = polygons.apply(lambda x: [float(val) for val in x.split()[::2]])
        decList = polygons.apply(lambda x: [float(val) for val in x.split()[1::2]])
        
        inEcliptic = [
            is_within_ecliptic_range(np.mean(ra), np.mean(dec), ecliptic_lat_range=(-eclipticCheck, eclipticCheck))
            for ra, dec in zip(raList, decList)
        ]
        return len(propList), uniqueProposalTotal, sum(inEcliptic)
    
    return len(propList), uniqueProposalTotal, len(propList)

In [76]:
def countPerFilter(cal_level, moving, prop_start, prop_end):
    # List of filters and initialization of the dictionary
    filter_list = ['F560W', 'F770W', 'F1000W', 'F1130W', 'F1280W',
                   'F1500W', 'F1800W', 'F2100W', 'F2550W', 'FND',
                   'F1065C', 'F1140C', 'F1550C', 'F2300C']
    filter_dict = {filt: 0 for filt in filter_list}

    # Base query parameters
    query_topics = ['proposal_id', 'position_bounds_spoly']
    base_filters = [
        f'jwst.archive.calibrationlevel = {cal_level}',
        f"jwst.archive.dataproducttype = 'image'",
        f"jwst.archive.proposal_id <= '{prop_end}'",
        f"jwst.archive.proposal_id >= '{prop_start}'",
        f"jwst.archive.instrument_name = 'MIRI/IMAGE'"
    ]
    if moving != 2:
        base_filters.append(f'jwst.archive.target_moving = {moving}')

    # Iterate through each filter and process observations
    for filt in filter_list:
        query_filters = base_filters + [f"jwst.archive.energy_bandpassname = '{filt}'"]
        query_string = f"""
            SELECT {', '.join(query_topics)}
            FROM jwst.archive
            WHERE {' AND '.join(query_filters)}
        """

        # Execute the query
        job = Jwst.launch_job(query_string, async_job=True)
        result = job.get_results()
        df = result.to_pandas()

        # Update dictionary with the count of observations
        filter_dict[filt] = len(df)

    return filter_dict

## Reconfigure And Sort Dataframe To Account For Observations With Multiple Asteroids 

In [None]:
def sort_filters(filter_set):
    def extract_number(filter_name):
        match = re.match(r'F(\d+)W', filter_name)
        return int(match.group(1)) if match else float('inf')  # Non-matching filters get highest value
    
    return ', '.join(sorted(filter_set, key=extract_number))

In [79]:
def separate_asteroids(DF):
    list_columns = ['Asteroids', 'Common Name', 'JPL Classification', 'JPL Radius (km)', 'JPL Geo Albedo', 'JPL Phase Angle (Deg)', 
                    'JPL Sun-Asteroid Dist (AU)', 'JPL Asteroid-JWST Dist (AU)', 'JPL Midpoint (RA,DEC)', 'JPL Pos Rates (arcsec/hr)',
                    'JPL Pos Uncertainty (arcsec)', 'JPL Visual Magnitude (mag)', 'JPL Surface Brightness (mag/arcsec2)', 'Asteroid Flux (mJy)','Flux Error %', 
                    'Annulus / Img Median', 'S/N', 'JPL Asteroid Distance (pix)', 'JPL Asteroid Distance (arcsec/exptime)', 'JPL Center Offset (arcsec)', 
                    'Aperture NAN Perc', 'Annulus NAN Perc', 'Streak Flag']
    
    expanded_rows = []
    
    for _, row in DF.iterrows():
        # Split values in the specified columns
        split_values = {col: (row[col].split('), ') if '(' in str(row[col]) else str(row[col]).split(', '))if pd.notna(row[col]) else []for col in list_columns}
        
        # Transform 'JPL Midpoint (RA,DEC)' into the desired format
        split_values['JPL Midpoint (RA DEC)'] = ['(' + val.replace(',', ' ').replace('(', '').replace(')', '') + ')' if '(' in val else val.replace(',', ' ')for val in split_values['JPL Midpoint (RA,DEC)']]
        
        split_values['JPL Pos Rates (arcsec/hr)'] = ['(' + val.replace(',', ' ').replace('(', '').replace(')', '') + ')' if '(' in val else val.replace(',', ' ')for val in split_values['JPL Pos Rates (arcsec/hr)']]
        
        # Determine the maximum number of rows needed for the split columns
        max_len = max(len(split_values[col]) for col in list_columns)
        
        # Expand rows
        for i in range(max_len):
            new_row = row.copy()
            for col in list_columns:
                new_row[col] = split_values[col][i] if i < len(split_values[col]) else None
            expanded_rows.append(new_row)
    
        
    # Create a new DataFrame from expanded rows
    expandedDF = pd.DataFrame(expanded_rows)
    
    # Clean specified columns by replacing 'NA' and 'nan' with an empty string
    columns_to_clean = ['JPL Radius (km)', 'JPL Geo Albedo']
    expandedDF[columns_to_clean] = expandedDF[columns_to_clean].replace(['NA', 'nan'], '')

    # Copy the original DataFrame and add the new 'JPL Midpoint (RA DEC)' column
    expandedDF_with_changes = expandedDF.copy()
    expandedDF_with_changes['JPL Midpoint (RA,DEC)'] = expandedDF_with_changes['JPL Midpoint (RA,DEC)'].apply(lambda x: f"({x.replace(',', ' ').replace('(', '').replace(')', '')})" if pd.notna(x) else x)
    expandedDF_with_changes['JPL Pos Rates (arcsec/hr)'] = expandedDF_with_changes['JPL Pos Rates (arcsec/hr)'].apply(lambda x: f"({x.replace(',', ' ').replace('(', '').replace(')', '')})" if pd.notna(x) else x)
    expandedDF_with_changes.rename(columns={'JPL Midpoint (RA,DEC)': 'JPL Midpoint (RA DEC)'}, inplace=True)
    
    return expandedDF, expandedDF_with_changes

## Perform Asteroid Specific Analysis

In [78]:
def AsteroidCountFrequency(DF):
    asteroidDataDict = {}
    asteroidTimeDict = {}
    asteroidMovingDict = {}
    
    for _, row in DF.iterrows():
        asteroidString = row['Asteroids']
        commonNameString = row['Common Name']
        classificationString = row['JPL Classification']
        radiusString = row['JPL Radius (km)']
        albedoString = row['JPL Geo Albedo']
        VisMagString = row['JPL Visual Magnitude (mag)']
        SurfBrightString = row['JPL Surface Brightness (mag/arcsec2)']
        filterString = row['Filter']
        proposalString = row['Proposal']
        movingString = row['Target Moving']
        SNString = row['S/N']
        exp_start_time = datetime.strptime(row['Exposure Start'], '%Y-%m-%d %H:%M:%S.%f')
        exp_end_time = datetime.strptime(row['Exposure End'], '%Y-%m-%d %H:%M:%S.%f')
        
        asteroidList = [asteroid.strip() for asteroid in asteroidString.split(',') if asteroid.strip()]
        classList = [classification.strip() for classification in classificationString.split(',') if classification.strip()]
        commonNameList = [commonName.strip() for commonName in commonNameString.split(',') if commonName.strip()]
        radiusList = [radius.strip() for radius in str(radiusString).split(',') if radius.strip()]
        albedoList = [albedo.strip() for albedo in str(albedoString).split(',') if albedo.strip()]
        VisMagList = [Vis.strip() for Vis in str(VisMagString).split(',') if Vis.strip()]
        SurfBrightList = [Surf.strip() for Surf in str(SurfBrightString).split(',') if Surf.strip()]
        SNlist = [sn.strip() for sn in str(SNString).split(',') if sn.strip()]
        
        for indx, asteroid in enumerate(asteroidList):
            if asteroid not in asteroidDataDict:
                asteroidDataDict[asteroid] = {
                    'Common Name': None,
                    'Frequency': 0,
                    'Classification': None,
                    'Radius': None,
                    'Geo_Albedo': None,
                    'VisMag': None,
                    'SurfBright': None,
                    'S/N' : [],
                    'Filter_Type': set(),
                    'Proposal': set()
                }
                asteroidTimeDict[asteroid] = {
                    'earliest_time': exp_start_time,
                    'latest_time': exp_end_time
                }
                asteroidMovingDict[asteroid] = set()
            
            asteroidDataDict[asteroid]['Common Name'] = commonNameList[indx] if indx < len(commonNameList) else None
            asteroidDataDict[asteroid]['Frequency'] += 1
            asteroidDataDict[asteroid]['Classification'] = classList[indx] if indx < len(classList) else None
            asteroidDataDict[asteroid]['Radius']         = radiusList[indx] if indx < len(radiusList) else None
            asteroidDataDict[asteroid]['Geo_Albedo']     = albedoList[indx] if indx < len(albedoList) else None
            asteroidDataDict[asteroid]['VisMag']         = VisMagList[indx] if indx < len(VisMagList) else None
            asteroidDataDict[asteroid]['SurfBright']     = SurfBrightList[indx] if indx < len(SurfBrightList) else None
            asteroidDataDict[asteroid]['S/N'].append(float(SNlist[indx])) if indx < len(SNlist) else None
            
            if pd.notna(filterString):
                asteroidDataDict[asteroid]['Filter_Type'].add(filterString.strip())
            if pd.notna(proposalString):
                asteroidDataDict[asteroid]['Proposal'].add(str(proposalString))
            if pd.notna(movingString):
                asteroidMovingDict[asteroid].add(movingString.strip())
            
            if exp_start_time < asteroidTimeDict[asteroid]['earliest_time']:
                asteroidTimeDict[asteroid]['earliest_time'] = exp_start_time
            if exp_end_time > asteroidTimeDict[asteroid]['latest_time']:
                asteroidTimeDict[asteroid]['latest_time'] = exp_end_time
    
    asteroidDataList = []
    for asteroid, data in asteroidDataDict.items():
        earliest_time = asteroidTimeDict[asteroid]['earliest_time']
        latest_time = asteroidTimeDict[asteroid]['latest_time']
        time_difference = str(latest_time - earliest_time).split('.')[0]  # Convert to HH:MM:SS format
        
        moving_status = asteroidMovingDict[asteroid]
        if moving_status == {'Yes'}:
            moving_label = 'Moving'
        elif moving_status == {'No'}:
            moving_label = 'Non-moving'
        else:
            moving_label = 'Both'
            
        # Compute the average S/N
        avg_SN = round(sum(data['S/N']) / len(data['S/N']), 2) if data['S/N'] else None
        
        asteroidDataList.append({
            'Asteroid': asteroid,
            'Common Name': data['Common Name'],
            'Frequency': data['Frequency'],
            'Classification': data['Classification'],
            'Radius (km)': data['Radius'],
            'Geo_Albedo': data['Geo_Albedo'],
            'Visual Mag': data['VisMag'],
            'Surface Bright (mag/arcsec2)': data['SurfBright'],
            'Average S/N': avg_SN,
            'Filter_Count': len(data['Filter_Type']),
            'Filters': sort_filters(data['Filter_Type']),
            'Proposals': ', '.join(data['Proposal']),
            'Moving Target': moving_label,
            'Earliest Observation': earliest_time.strftime('%Y-%m-%d %H:%M:%S'),
            'Latest Observation': latest_time.strftime('%Y-%m-%d %H:%M:%S'),
            'Observation Time Difference': time_difference
        })
    
    asteroid_df = pd.DataFrame(asteroidDataList)
    asteroid_df = asteroid_df.sort_values(by='Frequency', ascending=False).reset_index(drop=True)
    
    columns_to_clean = ['Radius (km)', 'Geo_Albedo', 'Visual Mag','Surface Bright (mag/arcsec2)']
    asteroid_df[columns_to_clean] = asteroid_df[columns_to_clean].replace(['NA', 'nan', '<NA>'], '')
    
    return asteroid_df

## Generate Plots

In [80]:
def plotResults(seperatedDF, asteroidDF, bandpassDict, folder_path):
    
    classification_names = set()
    filter_names = set()
    
    classifications = []
    distance_to_ecliptic = []
    Signal_to_Noise = []
    Filter = []
    ra = []
    dec = []
    
    for _, row in seperatedDF.iterrows():
        filter_ = row['Filter']
        classification = row['JPL Classification']
        position = row['JPL Midpoint (RA DEC)'].split(' ')
        distance = ecliptic_distance(float(position[0].replace('(','')), float(position[-1].replace(')','')))
        SN = float(row['S/N'])
        
        classification_names.add(str(classification))
        filter_names.add(str(filter_))
        if str(filter_) == 'None':
            print(filter_,classification,SN)
        
        classifications.append(str(classification))
        distance_to_ecliptic.append(float(distance))
        Signal_to_Noise.append(float(SN))
        Filter.append(str(filter_))
        ra.append(position[0].replace('(',''))
        dec.append(position[-1].replace(')',''))
        

    # Count occurrences of each classification
    classification_counts = Counter(classifications)

    # Sort classification names by frequency (most frequent first)
    sorted_classification_names = sorted(classification_names, key=lambda x: classification_counts[x], reverse=True)

    # Sort filter names alphabetically
    filter_names = sorted(list(filter_names))
    
    ### Plot 1: 2D Cartesian Plot (RA vs DEC) with the full ecliptic line
    plt.figure(figsize=(10, 8))
    
    for indx in range(len(sorted_classification_names)):
        ra_ = []
        dec_ = []
        
        for i in range(len(classifications)):
            if classifications[i] == sorted_classification_names[indx]:
                ra_.append(float(ra[i]))
                dec_.append(float(dec[i]))
                
        plt.scatter(ra_, dec_, s = 30, alpha = 0.6, zorder = 3, label = sorted_classification_names[indx])

    # Plot the full ecliptic as a sine wave spanning 0 to 360 degrees RA
    ecliptic_ra = np.linspace(0, 360, 1000)
    ecliptic_dec = 23.44 * np.sin(np.radians(ecliptic_ra))  # Earth's axial tilt (23.44 degrees)

    plt.plot(ecliptic_ra, ecliptic_dec, color='black', linestyle='dashed', alpha = 0.4, label='Ecliptic Line', zorder = 2)
    
    # Labels and grid
    plt.title('Location of Observed Asteroids')
    plt.xlabel('Right Ascension (Deg)')
    plt.ylabel('Declination (Deg)')
    plt.grid(True, zorder = 1)
    plt.legend(loc='upper right', bbox_to_anchor=(0.92, 1))
    plt.xlim(0, 360)  # Ensure the RA spans the full 0-360 degree range
    plt.savefig(f'{folder_path}/AsteroidLocation2D.png', bbox_inches='tight')
    plt.close()
    #plt.close()
    
    
    fig = plt.figure(figsize=(10, 8))
    ax = fig.add_subplot(111, projection='aitoff')

    for indx in range(len(sorted_classification_names)):
        ra_ = []
        dec_ = []

        for i in range(len(classifications)):
            if classifications[i] == sorted_classification_names[indx]:
                ra_.append(np.radians(float(ra[i]) - 180))  # Convert RA to radians and shift
                dec_.append(np.radians(float(dec[i])))  # Convert DEC to radians

        ax.scatter(ra_, dec_, s=30, alpha=0.6, zorder=3, label=sorted_classification_names[indx])

    # Generate the ecliptic curve in equatorial coordinates
    ecl_lon = np.linspace(0, 360, 1000)  # Ecliptic longitude
    ecl_lat = np.zeros_like(ecl_lon)  # Ecliptic latitude = 0 (ecliptic plane)

    # Convert ecliptic coordinates to equatorial (RA, DEC)
    ecliptic_coords = SkyCoord(lon=ecl_lon * u.deg, lat=ecl_lat * u.deg, frame="geocentrictrueecliptic")
    equatorial_coords = ecliptic_coords.transform_to("icrs")

    ecliptic_ra_aitoff = np.radians(equatorial_coords.ra.deg - 180)  # Shift for Aitoff
    ecliptic_dec_aitoff = np.radians(equatorial_coords.dec.deg)

    ax.plot(ecliptic_ra_aitoff, ecliptic_dec_aitoff, color='black', linestyle='dashed', alpha=0.6, label='Ecliptic Line', zorder=2)

    # Labels and grid
    ax.set_title('Location of Observed Asteroids', y=1.08)
    ax.set_xticklabels(['14h', '16h', '18h', '20h', '22h', '0h', '2h', '4h', '6h', '8h', '10h'])
    ax.grid(True, zorder=1)
    ax.legend(loc='upper right', bbox_to_anchor=(0.98, 1.25))
    plt.savefig(f'{folder_path}/AsteroidLocation3D.png', bbox_inches='tight')
    plt.close()
    #plt.close()
    

    


    bins = np.histogram_bin_edges(distance_to_ecliptic, bins=30)

    # Plotting the stacked frequency chart
    plt.figure(figsize=(10, 5))
    
    distance_data = []
    
    for indx in range(len(sorted_classification_names)):
        distances_ = []
        
        for i in range(len(classifications)):
            if classifications[i] == sorted_classification_names[indx]:
                distances_.append(distance_to_ecliptic[i])
                
        distance_data.append(distances_)
        
    plt.hist(distance_data, bins=bins, stacked=True, edgecolor='black', alpha=0.9, label=sorted_classification_names, zorder=2)

    plt.xlim(bins[0], bins[-1])

    # Adding labels and reference line
    plt.axvline(0, color='k', linestyle='dashed', label='Ecliptic Plane', zorder = 3)  # Ecliptic reference line
    
    plt.grid(zorder = 1)
    plt.xlabel('Distance from Ecliptic (Deg)')
    plt.ylabel('Frequency')
    plt.title('Asteroid Distances from the Ecliptic')
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'{folder_path}/AsteroidDistance_Hist.png', bbox_inches='tight')
    plt.close()
    #plt.close() 





    
    # Prepare data for plotting
    different_filters = []

    for filter_name in filter_names:  # Count observations per filter
        filters_val = sum(1 for f in Filter if f == filter_name)
        different_filters.append(filters_val)

    # Sort filters by frequency
    sorted_data = sorted(zip(different_filters, filter_names), reverse=True)
    filter_freq_list, filter_name_list = zip(*sorted_data)
    filter_freq_list, filter_name_list = list(filter_freq_list), list(filter_name_list)

    # Calculate observation fractions
    filter_total = [bandpassDict[x] for x in filter_name_list]
    obs_total = 36790  # Total number of observations from asteroidDF
    filter_fraction_per_total = [x / obs_total for x in filter_freq_list]
    filter_fraction_per_filter = [x / y for x, y in zip(filter_freq_list, filter_total)]

    # Plot setup
    bar_width = 0.4
    x = np.arange(len(filter_name_list))

    fig, ax1 = plt.subplots(figsize=(10, 5))

    # Left axis: Observation / Total
    bars1 = ax1.bar(
        x - bar_width / 2, filter_fraction_per_total, width=bar_width, color='blue',
        edgecolor='black', label='Obs / Total', zorder=3
    )
    ax1.set_ylabel('Observation / Total', fontsize=12, color='blue')
    ax1.tick_params(axis='y', labelcolor='blue')
    ax1.set_ylim(0, max(filter_fraction_per_total) * 1.1)

    # Right axis: Observation / Filter Total
    ax2 = ax1.twinx()
    bars2 = ax2.bar(
        x + bar_width / 2, filter_fraction_per_filter, width=bar_width, color='green',
        edgecolor='black', label='Obs / Filter Total', zorder=3
    )
    ax2.set_ylabel('Observation / Filter Total', fontsize=12, color='green')
    ax2.tick_params(axis='y', labelcolor='green')
    ax2.set_ylim(0, max(filter_fraction_per_filter) * 1.1)
    
     # Add absolute frequencies as text above each bar
    for i, count in enumerate(filter_freq_list):
        ax1.text(
            x[i]-0.2, filter_fraction_per_total[i] , str(count),
            ha='center', va='bottom', fontsize=10, color='black'
        )

    # Common x-axis
    plt.xticks(x, filter_name_list, ha='right', fontsize=10)
    ax1.set_xlabel('Image Filters', fontsize=12)

    # Title and grid
    plt.title('Observations per Filter Compared to Totals', fontsize=14)
    ax1.grid(axis='y', alpha=0.5, color='k', linestyle='--', zorder=1)

    # Legends
    ax1.legend(loc='upper center')
    ax2.legend(loc='upper right')

    # Save and show plot
    plt.tight_layout()
    plt.savefig(f'{folder_path}/AsteroidFilters.png', bbox_inches='tight')
    plt.close()
    
    
    
    
    
    # Count occurrences for each classification
    unique_classifications = set(classifications)
    classification_counts = {
        classification: classifications.count(classification)
        for classification in unique_classifications
    }

    # Sort classifications by frequency (descending)
    sorted_classification_names = sorted(
        classification_counts.keys(),
        key=lambda x: classification_counts[x],
        reverse=True
    )
    sorted_counts = [classification_counts[name] for name in sorted_classification_names]
    sorted_frequencies = [count / obs_total for count in sorted_counts]

    # Plot setup
    bar_width = 0.5
    x = np.arange(len(sorted_classification_names))

    plt.figure(figsize=(10, 5))
    
    # Bar plot with unique colors
    for i, (freq, count) in enumerate(zip(sorted_frequencies, sorted_counts)):
        plt.bar(
            x[i], freq, width=bar_width,  # Use tab20 colormap for consistent unique colors
            edgecolor='black', label = sorted_classification_names[i], zorder=2
        )
        # Add absolute frequencies as text above each bar
        plt.text(
            x[i], freq, str(count),
            ha='center', va='bottom', fontsize=10, color='black'
        )

    # Adding labels and title
    plt.xticks(x, sorted_classification_names, ha='center', fontsize=10, rotation=20)
    plt.xlabel('Asteroid Classifications', fontsize=12)
    plt.ylabel('Observation / Total', fontsize=12)
    
    plt.title('Asteroid Observations by Classification', fontsize=14)

    # Grid for better readability
    plt.grid(axis='y', alpha=0.5, color='k', linestyle='--', zorder=1)
    
    plt.yscale('log')
    
    plt.legend()

    # Save and show plot
    plt.tight_layout()
    plt.savefig(f'{folder_path}/ClassificationFrequencies.png', bbox_inches='tight')
    plt.close()

## Threshold Observations into 'Accurate' and 'Potential'

In [81]:
def filterBadObs(row, SNR_cutoff = 15, aperture_nan_cutoff = 5, annulus_nan_cutoff = 10):
    
    manual_filter_dict = {'1998 BC1':['jw01522002001_0210d_00001_mirimage'],
                          '2000 WR106':['jw01254055001_02101_00001_mirimage'],
                          '2003 EL61' :['jw01273003001_03105_00001_mirimage','jw01273003001_03105_00002_mirimage',
                                        'jw01273003001_03107_00001_mirimage','jw01273003001_03107_00002_mirimage'],
                          '2003 QP78' :['jw02046021001_02107_00001_mirimage','jw02046021001_02107_00002_mirimage','jw02046021001_02107_00003_mirimage'],
                          '2012 SJ16' :['jw01714001011_02101_00001_mirimage'],
                          '2015 XN511':['jw03707123002_02101_00001_mirimage', 'jw03707123002_02101_00002_mirimage','jw03707123002_02101_00003_mirimage',
                                        'jw03707123002_02101_00004_mirimage', 'jw03707123003_02101_00003_mirimage','jw03707123003_02101_00004_mirimage'],
                          '2016 GT196':['jw02107013002_02101_00001_mirimage','jw02107013002_02101_00003_mirimage','jw02107013002_02101_00004_mirimage'],
                          '2021 UE8'  :['jw01854021001_02101_00001_mirimage','jw01854021001_02102_00001_mirimage','jw01854021001_02103_00001_mirimage'],
                          '2023 RK160':['jw02987003017_02101_00001_mirimage','jw02987003017_02101_00002_mirimage','jw02987003017_02101_00003_mirimage']}
                          
                          
    # Extract values from the row
    SNR = float(row['S/N'])
    Ap_Nan_Perc = float(row['Aperture NAN Perc'])
    Ann_Nan_Perc = float(row['Annulus NAN Perc'])
    streak_flag = str(row['Streak Flag'])
    asteroid_name = str(row['Asteroids'])
    observationID = str(row['Observation'])

    # Initialize the error flag
    error_flag = ''

    # Check SNR cutoff
    if SNR < SNR_cutoff:
        error_flag += f"Low SNR ({SNR_cutoff})"

    # Check Aperture NAN percentage
    if Ap_Nan_Perc > aperture_nan_cutoff:
        if error_flag:
            error_flag += ", "
        if Ann_Nan_Perc > annulus_nan_cutoff:
            error_flag += f"Aperture NAN ({aperture_nan_cutoff}%)"
        else:
            error_flag += f"Aperture Capped ({aperture_nan_cutoff}%)"

    # Check streak flag
    if streak_flag == 'Yes':
        if error_flag:
            error_flag += ", "
        error_flag += "Asteroid Streak"

    # Check manual filter
    if asteroid_name in manual_filter_dict:
        if observationID in manual_filter_dict[asteroid_name]:
            if error_flag:
                error_flag += ", "
            error_flag += "Manual Flag"

    return error_flag

## Move Previously Generated Asteroid Images 

In [82]:
def move_images(row, OUTPUTS_DIR, TRUE_OBS_DIR):
    proposal_folder = os.path.join(OUTPUTS_DIR, str(row['Proposal']))
    asteroid_folder = os.path.join(TRUE_OBS_DIR, str(row['Asteroids']))

    # Create the asteroid folder if it doesn't exist
    os.makedirs(asteroid_folder, exist_ok=True)

    # Check if the proposal folder exists
    if os.path.exists(proposal_folder):
        # Get all files in the proposal folder
        files = os.listdir(proposal_folder)

        # Find the files containing the observation ID
        for file in files:
            #print(str(row['Observation']), file)
            if f'FluxRegion_{str(row["Observation"])}_{str(row["Asteroids"])}' in file:
                source_file = os.path.join(proposal_folder, file)
                destination_file = os.path.join(asteroid_folder, file)

                # Copy the file to the asteroid folder
                shutil.copy(source_file, destination_file)
                
            if f'Overlay_{str(row["Observation"])}' in file and not 'Zoom' in file:
                source_file = os.path.join(proposal_folder, file)
                destination_file = os.path.join(asteroid_folder, file)

                # Copy the file to the asteroid folder
                shutil.copy(source_file, destination_file)

## Generate Flux Specific Dataframe

In [111]:
def flux_dataframe(flux_obs_DF, folder_path):
    flux_obs_DF['Epoch'] = pd.to_datetime(flux_obs_DF['Exposure Start']) + (pd.to_datetime(flux_obs_DF['Exposure End']) - pd.to_datetime(flux_obs_DF['Exposure Start'])) / 2

    # Create the new DataFrame DF2 with selected and renamed columns
    DF_for_flux = pd.DataFrame({
        'Asteroid ID': flux_obs_DF['Asteroids'],                                    # Column 'Asteroids' -> 'Asteroid ID'
        'Asteroid Name': flux_obs_DF['Common Name'],                                # Column 'Common Name' -> 'Asteroid Name'
        'Asteroid Classification': flux_obs_DF['JPL Classification'],               # Column 'JPL Classification' -> 'Asteroid Classification'
        'Epoch': flux_obs_DF['Epoch'].dt.strftime('%Y-%m-%d %H:%M:%S.%f').str[:-3], # Format 'Epoch' as string with 3 decimals
        'Image Filter': flux_obs_DF['Filter'],                                      # Column 'Filter' -> 'Image Filter'
        'Wavelength (µm)': flux_obs_DF['Filter'].apply(lambda f: int(''.join(filter(str.isdigit, f))) / 100 if pd.notnull(f) else None),
        'Flux (mJy)': flux_obs_DF['Asteroid Flux (mJy)'],                           # Column 'Asteroid Flux (mJy)' -> 'Flux (mJy)'
        'Flux Error (%)': flux_obs_DF['Flux Error %'],                              # Column 'Flux Error %' -> 'Flux Error (%)'
    }).sort_values('Asteroid ID', ascending=True)
    
    # Add a new column 'Wavelength (µm)'
    DF_for_flux['Wavelength (µm)'] = DF_for_flux['Image Filter'].apply(
        lambda f: int(''.join(filter(str.isdigit, f))) / 100 if pd.notnull(f) else None)
    
    return(DF_for_flux)

## Main Function

In [112]:
def main(csvPath, calLevel, prop_start, prop_end, ecliptic_range, movingCheck = 'No'):
    
    pd.set_option('display.max_rows', None)  
    pd.set_option('display.width', 2000)
    
    if '/' in csvPath:
        folder_path = os.path.dirname(csvPath) + '/Analysis'
    else:
        folder_path = './Analysis'
        
    generateFolder(folder_path)
    
    # Load the CSV file
    df_original = pd.read_csv(csvPath)
    
    df_proposal_filtered = df_original[df_original['Proposal'].astype(str).apply(lambda x: x.isdigit() and prop_start <= int(x) <= prop_end)].reset_index(drop=True)
     
    if movingCheck == 'No':
        df_final = df_proposal_filtered[df_proposal_filtered['Moving'] == 'No']
        moving = 0
    
    elif movingCheck == 'Yes':
        df_final = df_proposal_filtered[df_proposal_filtered['Moving'] == 'Yes']
        moving = 1
        
    elif movingCheck == 'Both':
        df_final = df_proposal_filtered
        moving = 2
    
    with HiddenPrints():
        totalObs, uniqueProps, obsInEcliptic =  totalObservationCount(calLevel, moving, prop_start, prop_end, eclipticCheck = ecliptic_range)
        bandpassDict = countPerFilter(calLevel, moving, prop_start, prop_end)
    
    totalRowCount, asteroidRowsTotal, totalAsteroids, uniqueProposalTotal = AsteroidObsFrequency(df_final, prop_start, prop_end)
    
    
    #Filter by ecliptic
    filteredDF = filter_DF(df_original, ecliptic_range, prop_start, prop_end)
    filterRowCount, asteroidRowsfilter, filterAsteroids, uniqueProposalfilter = AsteroidObsFrequency(filteredDF, prop_start, prop_end)
    #filteredDF = filter_DF(df_original, False)
    
    asteroidFreqDF = AsteroidCountFrequency(filteredDF)
    
    print(f'The Level2 CSV contains a total of {totalRowCount} Observations (rows) from {uniqueProps} unique proposals, of that {asteroidRowsTotal} observations contain usable asteroid information, with {totalAsteroids} asteroid recordings \n')
    
    print(f'Within the Proposal Range ({prop_start} - {prop_end}) there are {totalObs} total Level {calLevel} Observations and {obsInEcliptic} Observations within {ecliptic_range if ecliptic_range else 90} Degrees of the Ecliptic')
    print(' ')
    print(f'In total, that is 1 asteroid observed in every {round(float(totalObs)/float(totalAsteroids),2)} Observations in the level 2 dataset')# within this ecliptic range')
    print(f'In ecliptic, that is 1 asteroid observed in every {round(float(obsInEcliptic)/float(filterAsteroids),2)} Observations in the level 2 dataset')# within this ecliptic range')
    print(' ')
    print(f'NOTE: those are recent numbers for the amount of {calLevel} observations and will likely by higher then the amount available when the asteroid search was ran\n')
    
    print('\n-----------------------------------------------------------\n')
    
    print(asteroidFreqDF)
    
    # Define the OUTPUTS folder path
    OUTPUTS_DIR = folder_path
    TRUE_OBS_DIR = os.path.join(OUTPUTS_DIR, "True Obs")
    POTENTIAL_OBS_DIR = os.path.join(OUTPUTS_DIR, "Potential Obs")
    
    asteroidFreqDF.to_csv(f"{folder_path}/Asteroid_Details.csv",index=False)

    seperatedDF, printableDF = separate_asteroids(filteredDF)
    
    printableDF.to_csv(f"{folder_path}/Asteroid_Observations_Seperated.csv",index=False)
    
    printableDF['Flags'] = printableDF.apply(filterBadObs, axis=1)
    
    good_Observations = printableDF[printableDF['Flags'] == ""].drop(columns=['Flags']).sort_values('Observation', ascending=True)

    bad_Observations = printableDF[printableDF['Flags'] != ""].sort_values('Observation', ascending=True)
    
    good_Observations.to_csv(f"{folder_path}/Asteroid_Observations_Accurate.csv",index=False)
    bad_Observations.to_csv(f"{folder_path}/Asteroid_Observations_Potential.csv",index=False)

    # Create the True Obs folder if it doesn't exist
    os.makedirs(TRUE_OBS_DIR, exist_ok=True)
    os.makedirs(POTENTIAL_OBS_DIR, exist_ok=True)
    
    good_Observations.apply(lambda row: move_images(row, OUTPUTS_DIR, TRUE_OBS_DIR), axis=1)
    bad_Observations.apply(lambda row: move_images(row, OUTPUTS_DIR, POTENTIAL_OBS_DIR), axis=1)
    
    DF_for_flux = flux_dataframe(good_Observations, folder_path)
    DF_for_flux.to_csv(f"{folder_path}/Flux_Evaluation.csv",index=False)
    
    #plotResults(good_Observations, asteroidFreqDF, bandpassDict)
    plotResults(printableDF, asteroidFreqDF, bandpassDict,folder_path)
    
    
    #print(AsteroidCountFrequency(good_Observations))
    
        
main('Results/Level2_Asteroid_Search_Full.csv', 2, 1000, 7000, False, movingCheck = 'Both')

The Level2 CSV contains a total of 5657 Observations (rows) from 368 unique proposals, of that 678 observations contain usable asteroid information, with 728 asteroid recordings 

Within the Proposal Range (1000 - 7000) there are 37792 total Level 2 Observations and 37792 Observations within 90 Degrees of the Ecliptic
 
In total, that is 1 asteroid observed in every 51.91 Observations in the level 2 dataset
In ecliptic, that is 1 asteroid observed in every 51.91 Observations in the level 2 dataset
 
NOTE: those are recent numbers for the amount of 2 observations and will likely by higher then the amount available when the asteroid search was ran


-----------------------------------------------------------

      Asteroid      Common Name  Frequency            Classification Radius (km) Geo_Albedo Visual Mag Surface Bright (mag/arcsec2)  Average S/N  Filter_Count                                            Filters Proposals Moving Target Earliest Observation   Latest Observation Observa