In [6]:
import pandas as pd
import sys

# --- Configuration ---
# Input file names
NSC_FILE = '/Users/ananthmugundhan/Documents/Research work/Space_data/hst_nsc.csv'
HST_MAST_FILE = '/Users/ananthmugundhan/Documents/Research work/Space_data/HST_2025-09-17T14_58_13-07_00.csv'

# Output file name
OUTPUT_FILE = 'hst_nsc_with_filters.csv'

# Columns to use for matching and checking
NSC_RA_COL = 'RA'
NSC_DEC_COL = 'dec'
HST_RA_COL = 'file_user_0_RA'
HST_DEC_COL = 'file_user_1_dec'
HST_FILTER_COL = 'sci_spec_1234'

# Filters to check for
FILTER_1 = 'F814W'
FILTER_2 = 'F606W'

# --- Main Script ---

def process_files():
    """
    Main function to load, process, and merge filter data.
    """
    print("Starting the filter matching process...")

    # --- Step 1: Load the CSV files into pandas DataFrames ---
    try:
        print(f"Loading primary file: {NSC_FILE}")
        df_nsc = pd.read_csv(NSC_FILE)
        
        print(f"Loading HST MAST query results: {HST_MAST_FILE}")
        # Use low_memory=False to prevent potential dtype guessing errors on large files
        df_hst = pd.read_csv(HST_MAST_FILE, low_memory=False)
        
    except FileNotFoundError as e:
        print(f"Error: Could not find a required file: {e.filename}", file=sys.stderr)
        print("Please make sure both 'hst_nsc.csv' and 'HST_2025-09-17T14_58_13-07_00.csv' are in the same directory as the script.", file=sys.stderr)
        return

    # --- Step 2: Create a lookup map from the HST data ---
    # This map will store coordinates and the filters found for them.
    # Key: (rounded_ra, rounded_dec), Value: set of filter names (e.g., {'F814W', 'F606W'})
    print("Building filter lookup map from HST data...")
    filter_map = {}
    
    # We round coordinates to handle potential floating point inaccuracies
    # 5 decimal places is usually sufficient for astronomical coordinate matching.
    precision = 5

    for index, row in df_hst.iterrows():
        try:
            # Get coordinate and filter info, ensuring filter data is a string
            ra = float(row[HST_RA_COL])
            dec = float(row[HST_DEC_COL])
            filters_str = str(row[HST_FILTER_COL])

            # Create a key with rounded coordinates
            coord_key = (round(ra, precision), round(dec, precision))

            # Initialize the set for this coordinate if it's the first time we see it
            if coord_key not in filter_map:
                filter_map[coord_key] = set()

            # Check for the presence of our target filters and add them to the set
            if FILTER_1 in filters_str:
                filter_map[coord_key].add(FILTER_1)
            if FILTER_2 in filters_str:
                filter_map[coord_key].add(FILTER_2)

        except (ValueError, TypeError):
            # This handles cases where RA/Dec might be non-numeric in some rows
            # print(f"Skipping row {index+2} in {HST_MAST_FILE} due to invalid coordinate data.")
            continue
    
    print(f"Filter map built. Found filter data for {len(filter_map)} unique coordinates.")

    # --- Step 3: Use the map to add new columns to the nsc DataFrame ---
    print(f"Matching coordinates and adding '{FILTER_1}' and '{FILTER_2}' columns to the output.")

    # We define a function that can be applied to each row of the nsc dataframe
    def check_filter_presence(row, filter_name):
        try:
            ra = float(row[NSC_RA_COL])
            dec = float(row[NSC_DEC_COL])
            coord_key = (round(ra, precision), round(dec, precision))

            # Look up the coordinate in our map
            found_filters = filter_map.get(coord_key, set())

            # Return 'yes' if the filter is in the set, otherwise 'no'
            return 'yes' if filter_name in found_filters else 'no'
        except (ValueError, TypeError):
            # Handle non-numeric RA/Dec in the source file
            return 'no'

    # Apply the function for each filter to create the new columns
    df_nsc[FILTER_1] = df_nsc.apply(check_filter_presence, args=(FILTER_1,), axis=1)
    df_nsc[FILTER_2] = df_nsc.apply(check_filter_presence, args=(FILTER_2,), axis=1)

    # --- Step 4: Save the final result to a new CSV file ---
    try:
        print(f"Saving the updated data to '{OUTPUT_FILE}'...")
        df_nsc.to_csv(OUTPUT_FILE, index=False)
        print("\nProcess complete!")
        print(f"The new file '{OUTPUT_FILE}' has been created with the added filter columns.")
    except Exception as e:
        print(f"Error: Could not save the output file. Reason: {e}", file=sys.stderr)


if __name__ == '__main__':
    process_files()


Starting the filter matching process...
Loading primary file: /Users/ananthmugundhan/Documents/Research work/Space_data/hst_nsc.csv
Loading HST MAST query results: /Users/ananthmugundhan/Documents/Research work/Space_data/HST_2025-09-17T14_58_13-07_00.csv
Building filter lookup map from HST data...
Filter map built. Found filter data for 174 unique coordinates.
Matching coordinates and adding 'F814W' and 'F606W' columns to the output.
Saving the updated data to 'hst_nsc_with_filters.csv'...

Process complete!
The new file 'hst_nsc_with_filters.csv' has been created with the added filter columns.
