# Reduce catalog based on magnitude quality cuts
## January 2024
### Bethany Ludwig

In [1]:
import pandas as pd 
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import seaborn as sns
import numpy as np 
import os, glob
import matplotlib

from astropy.io import ascii
from scipy import stats
from astropy.coordinates import SkyCoord

## Load in photometry

In [2]:
# Read in lmc and smc files 
df = pd.read_csv('C:/Projects/0_Data/SUMS_CompleteCatalog/Candidates/0_combined_colors.csv') 

def check_stars(df):
    # Make sure we have all the discovery stars 
    discovery_names = ['Star_1','Star_2','Star_3','Star_4','Star_5','Star_6','Star_7','Star_8','Star_9','Star_10','Star_11','Star_12','Star_13','Star_14','Star_15','Star_16','Star_17','Star_18','Star_19','Star_20','Star_21','Star_22','Star_23','Star_24','Star_25']
    c = 0
    for star in discovery_names:
        if star not in df.discovery_name.unique():
            print(star)
            c += 1
    if c == 0:
        print ("All Discovery Stars Found")


print(df.shape)

(117277, 85)


  df = pd.read_csv('C:/Projects/0_Data/SUMS_CompleteCatalog/Candidates/0_combined_colors.csv')


- If a source is blue in only one filter require that it doesn't overlap the zams

In [3]:
# Define the color combination labels
color_labels = ['uvw2 - b','uvw2 - v', 'uvw2 - i', 
                'uvw1 - b', 'uvw1 - v','uvw1 - i', 
                'uvm2 - b', 'uvm2 - v', 'uvm2 - i']

index_one_color_overlaps = []
for index, row in df.iterrows():
       # Only one color combination is blue
       if row['n_blue'] == 1:
              # Which color combination is blue?
              color_combo = row[color_labels][row[color_labels] == 'blue'].index[0]
              # Add the overlap label to the color combo
              overlap_combo = color_combo + ' overlap'
              # If the overlap label is yes, add the index to the list
              if row[overlap_combo] == 'yes':
                     index_one_color_overlaps.append(index)

# Remove these indices from the catalog
df = df.drop(index_one_color_overlaps).reset_index(drop=True)

print('Blue in only one filter and it overlaps the ZAMS')
print(f'How many we have: ',df.shape[0])
print(f'How many we lost: ',len(index_one_color_overlaps))
check_stars(df)

Blue in only one filter and it overlaps the ZAMS
How many we have:  92431
How many we lost:  24846
All Discovery Stars Found


- Set a brightness limit of 14 mag

In [5]:
# Get the indices of stars that are brighter than Ylva's models 
bright_cutoff = 14
# Calculate in all UV filters in case one is missing
index_bright_uv = list(df[(df['uvm2_dered'] < bright_cutoff)].index) + list(df[(df['uvw2_dered'] < bright_cutoff)].index) + list(df[(df['uvw1_dered'] < bright_cutoff)].index)
# Drop duplicates 
index_bright_uv = list(set(index_bright_uv))

# Remove these indices from the catalog
df = df.drop(index_bright_uv).reset_index(drop=True)

print('Stars brighter than Ylva\'s models')
print(f'How many we have: ',df.shape[0])
print(f'How many we lost: ',len(index_bright_uv))
check_stars(df)

Stars brighter than Ylva's models
How many we have:  92010
How many we lost:  421
All Discovery Stars Found


- Set a faintness limit of 19 mag

In [6]:
# Get the indices of stars that are brighter than Ylva's models 
faint_cutoff = 19
# Calculate in all UV filters in case one is missing
index_faint_uv = list(df[(df['uvm2_dered'] > faint_cutoff)].index) + list(df[(df['uvw2_dered'] > faint_cutoff)].index) + list(df[(df['uvw1_dered'] > faint_cutoff)].index)
# Drop duplicates 
index_faint_uv = list(set(index_faint_uv))

# Remove these indices from the catalog
df = df.drop(index_faint_uv).reset_index(drop=True)

print('Stars fainter in UV than 19 mag')
print(f'How many we have: ',df.shape[0])
print(f'How many we lost: ',len(index_faint_uv))
check_stars(df)

Stars fainter in UV than 19 mag
How many we have:  38950
How many we lost:  53060
All Discovery Stars Found


- Require all UV and Optical Magnitudes to be within 5 sigma, or 0.217 Mag Error

In [7]:
print('Stars with mag error within 5sigma')
# If mag error is above our limit set both mag and mag error to 0
max_mag_err = 0.217

mag_cols = ['uvw2_dered','uvm2_dered','uvw1_dered','U_dered','B_dered','V_dered','I_dered']
error_cols = ['uvw2_err','uvm2_err','uvw1_err','e_U','e_B','e_V','e_I']
color_cols = [['uvw2 - b','uvw2 - v','uvw2 - i'],
                 ['uvm2 - b','uvm2 - v','uvm2 - i'],
                 ['uvw1 - b','uvw1 - v','uvw1 - i'],
                 ['u','u','u'],
                 ['uvw2 - b','uvm2 - b','uvw1 - b'],
                 ['uvw2 - v','uvm2 - v','uvw1 - v'],
                 ['uvw2 - i','uvm2 - i','uvw1 - i']]

for mag,err,color in zip(mag_cols,error_cols,color_cols):
    
    # Columns we want to set to nan
    overlap = [label + ' overlap' for label in color]
    cols = [mag,err] + color + overlap
    # We don't calculate color for U
    if mag == 'U_dered':
        cols = [mag,err]
        
    # If the mag is above the mag error, set the mag, error, and color columns to nan. 
    df.loc[df[err] > max_mag_err,cols] = np.nan

# Convert 0's back to NaNs 
df = df.replace(0,np.nan)

# Calculate how many nans exist in a row 
n_mag_err = df[mag_cols].isna().sum(axis=1)

# If all 7 mag columns are 0 drop
print(f'How many we lost because all magnitudes had high errors: ',df[(n_mag_err == 7)].shape[0])
df = df[~(n_mag_err == 7)].reset_index(drop=True)

# Recalculate n_blue 
n_blue = df[color_labels].isin(['blue']).sum(axis=1)
df['n_blue_initial'] = df['n_blue']
df['n_blue'] = n_blue

# if n_blue is now 0 then drop it 
print(f'How many we lost because all blue colors were associated with high magnitude errors: ',df[df['n_blue'] == 0].shape[0])
df = df[df['n_blue'] != 0].reset_index(drop=True)

print(f'How many we have: ',df.shape[0])
check_stars(df)

Stars with mag error within 5sigma
How many we lost because all magnitudes had high errors:  103
How many we lost because all blue colors were associated with high magnitude errors:  9849
How many we have:  28998
All Discovery Stars Found


- Require that sources have at least two optical and at least two UV

In [9]:
optical_columns = ['U_dered','B_dered','V_dered','I_dered']
uv_columns = ['uvw2_dered','uvm2_dered','uvw1_dered']

enough_points = []
for ind,row in df.iterrows():
    # How many optical mags are there
    n_optical = row[optical_columns].count()
    # How many UV mags are there 
    n_uv = row[uv_columns].count()
    # Need two in both 
    if n_optical >= 2 and n_uv >= 2: 
        enough_points.append(ind)

print('Stars without enough magnitudes in UV and Optical')
print(f'How many we have: ',len(enough_points))
print(f'How many we lost: ',df.shape[0] - len(enough_points))

df = df[df.index.isin(enough_points)].reset_index(drop=True)        
check_stars(df)

Stars without enough magnitudes in UV and Optical
How many we have:  20580
How many we lost:  8418
All Discovery Stars Found


- Require that each source be 'very blue' in at least one filter

In [10]:
# Recalculate 'very blue' 
for index, row in df.iterrows():   
       # Which color combinations are blue?
       color_combos = list(row[color_labels][row[color_labels] == 'blue'].index)
       # If none are blue skip
       if len(color_combos) == 0:
              df.loc[index, 'very_blue'] = 0       
              continue
       # How many of these color combinates don't overlap? 
       n_no_overlap = 0
       for color_combo in color_combos:
              overlap_combo = color_combo + ' overlap'
              if row[overlap_combo] == 'no':
                     n_no_overlap += 1
       df.loc[index, 'very_blue'] = n_no_overlap

n_lost = df.shape[0] - df[df['very_blue'] >= 1].shape[0]

df = df[df['very_blue'] >= 1].reset_index(drop=True)
print('Stars without at least one very blue color')
print(f'How many we have: ',df.shape[0])
print(f'How many we lost: ',n_lost)
check_stars(df)

Stars without at least one very blue color
How many we have:  17342
How many we lost:  3238
Star_15


In [11]:
# Save
df.to_csv('C:/Projects/0_Data/SUMS_CompleteCatalog/Candidates/1_magnitude_reduced.csv')