# Combine lmc and smc color catalogs together
## January 2024
### Bethany Ludwig

In [1]:
import pandas as pd 
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import seaborn as sns
import numpy as np 
import os, glob
import matplotlib

from astropy.io import ascii
from matplotlib import rc
from scipy import stats
from astropy.coordinates import SkyCoord
np.seterr(invalid='ignore')

# Set dir depending on machine 
directory = '/home/bethany/Projects/0_Data/0_SUMS_Catalogs/CompleteCatalog/Crossmatched/'
# directory = '/Volumes/Untitled/0_Data/SUMS_Catalogs/Step5/'
save_directory = '/home/bethany/Projects/0_Data/0_SUMS_Catalogs/CandidateCatalog/'

- Combine catalogs 
- Add galaxy column 
- Remove unnecessary columns 
- Add Discovery Star columns

In [2]:
lmc = pd.read_csv(f'{directory}lmc_step5_crossmatch.csv') 
smc = pd.read_csv(f'{directory}smc_step5_crossmatch.csv') 
print('Start size: ', lmc.shape[0], smc.shape[0], lmc.shape[0] + smc.shape[0] )
# Add column to indicate what galaxy they are in 
lmc['galaxy'] = 'lmc'
smc['galaxy'] = 'smc'

# Combine together
df = pd.concat([lmc, smc])

# Drop any unnamed columns
df = df.drop([col for col in df.columns if 'Unnamed' in col], axis=1)

# Reset index
df = df.reset_index(drop=True)

# Add Discovery Name
discovery_ra = [15.24876,14.25651,14.41704,16.00199,77.20574,76.19464,82.00481,86.86671,81.58229,77.16778,77.84628,83.11642,81.8678,80.916,84.05355,83.89011,81.69701,80.48712,78.34581,84.13771,15.48006,15.84282,15.92665,12.68916,15.97596]
discovery_dec = [-72.62048,-72.60092,-71.98791,-72.27852,-69.0916,-69.04014,-69.99687,-69.1021,-69.37607,-69.18455,-69.8981,-70.30682,-68.68436,-68.61127,-69.45609,-70.31835,-68.82376,-69.42087,-69.37446,-69.44082,-72.45029,-72.10988,-72.02792,-73.26791,-72.12816]
discovery_coord = SkyCoord(discovery_ra, discovery_dec, unit='deg')
discovery_name = ['Star_1','Star_2','Star_3','Star_4','Star_5','Star_6','Star_7','Star_8','Star_9','Star_10','Star_11','Star_12','Star_13','Star_14','Star_15','Star_16','Star_17','Star_18','Star_19','Star_20','Star_21','Star_22','Star_23','Star_24','Star_25']

df_co = SkyCoord(df['ra'], df['dec'], unit='deg')

for name, coordinate in zip(discovery_name, discovery_coord):
    distances = coordinate.separation(df_co).arcsecond
    min_distance = np.min(distances)
    row = df[distances == min_distance]
    # If no matching source is found, print the minimum distance and the name of the source
    if min_distance > 0.1:
        print(min_distance)
        print(name)
    # Otherwise add it to the dataframe
    else:
        df.loc[row.index,'discovery_name'] = name
        
def check_stars(df):
    # Make sure we have all the discovery stars 
    discovery_names = ['Star_1','Star_2','Star_3','Star_4','Star_5','Star_6','Star_7','Star_8','Star_9','Star_10','Star_11','Star_12','Star_13','Star_14','Star_15','Star_16','Star_17','Star_18','Star_19','Star_20','Star_21','Star_22','Star_23','Star_24','Star_25']
    c = 0
    for star in discovery_names:
        if star not in df.discovery_name.unique():
            print(star)
            c += 1
    if c == 0:
        print ("All Discovery Stars Found")
check_stars(df)

Start size:  471507 263362 734869
All Discovery Stars Found


- Calculate how many combinations the source is blue in  
- Remove if not blue in any filter


In [3]:
# Define the color combination labels
color_labels = ['uvw2 - b','uvw2 - v', 'uvw2 - i', 
                'uvw1 - b', 'uvw1 - v','uvw1 - i', 
                'uvm2 - b', 'uvm2 - v', 'uvm2 - i']

# Count how many color combinations are 'blue' 
n_blue = df[color_labels].isin(['blue']).sum(axis=1)
df['n_blue'] = n_blue

# Remove sources that are not blue in any filter
n_not_blue = len(df[df['n_blue'] == 0])
df = df[~(df['n_blue'] == 0)].reset_index(drop=True)

# Count how many color combinations are 'overlap'
df['n_overlap'] = df[color_labels].isin(['overlap']).sum(axis=1)

# Count how many color combinations are 'red'
df['n_red'] = df[color_labels].isin(['red']).sum(axis=1)

print('Blue in more than one filter')
print(f'How many we have: ',df.shape[0])
print(f'How many we lost: ',n_not_blue)
check_stars(df)

df.to_csv(save_directory+'0_combined_colors.csv', index=False)
print('End size: ', df.shape[0])
print('Saved')

Blue in more than one filter
How many we have:  69992
How many we lost:  664877
Star_15
Star_19
End size:  69992
Saved
