In [2]:
# Create the final SUMS candidate catalog
# - I checked for duplicates in topcat but there were none 

import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
from astropy.coordinates import SkyCoord

# Set dir depending on machine 
data_dir = '/home/bethany/Projects/0_Data/'

# Read in the full catalog so we can assign the correct SUMS_ID
full_df = pd.read_csv(data_dir + "0_SUMS_Catalogs/FinalCatalogs/SUMS_UV_Catalog.csv")
full_coords = SkyCoord(ra=full_df['RA'], dec=full_df['Dec'], unit='deg')

# Read in the candidate catalog 
df = pd.read_csv(data_dir + "0_SUMS_Catalogs/CandidateCatalog/3_stripped_star_candidates.csv")

# Sort by RA
df = df.sort_values(by='ra').reset_index(drop=True)

# Add ID Column 
candidate_coords = SkyCoord(ra=df['ra'], dec=df['dec'], unit='deg')
sums_ids = []
for coord in candidate_coords:
    sep = coord.separation(full_coords).arcsec
    min_sep = np.min(sep)
    if min_sep > 0.3:
        print('Warning: No match found')
    idx = np.argmin(sep)
    sums_ids.append(full_df['SUMS_ID'].iloc[idx])

df['SUMS_ID'] = sums_ids


# Resort columns
cols = [ # Basic info
        'SUMS_ID', 'ra', 'dec', 'cut',
        # UV Photometry
        'uvw2', 'uvw2_err', 'uvm2', 'uvm2_err',  'uvw1', 'uvw1_err',
        # Optical Photometry
        'U', 'e_U', 'B', 'e_B', 'V', 'e_V', 'I', 'e_I',
        # Standard Deviation 
        'uvw2_std', 'uvm2_std', 'uvw1_std',
        # Flux Frac 
        'uvw2_flux_frac', 'uvm2_flux_frac', 'uvw1_flux_frac',
        # Residual Frac
        'uvw2_resid_frac',  'uvm2_resid_frac', 'uvw1_resid_frac',
        # Dist Moved
        'uvw2_dist_moved',  'uvm2_dist_moved', 'uvw1_dist_moved',
        # Dist to nearest neighbor 
        'uvw2_closest_min',  'uvm2_closest_min', 'uvw1_closest_min',
        # Num5 
        'uvw2_num5','uvm2_num5', 'uvw1_num5', 
        # Num2.5
        'uvw2_num2p5',  'uvm2_num2p5', 'uvw1_num2p5',
        # N observations 
        'uvw2_num_obs', 'uvm2_num_obs',  'uvw1_num_obs']
df = df[cols]

# Rename Columns
df.columns = ['SUMS_ID', 'RA', 'Dec', 'Ranking',
              
              'UVW2', 'UVW2_err', 'UVM2', 'UVM2_err', 'UVW1', 'UVW1_err',

              'U', 'U_err', 'B', 'B_err', 'V', 'V_err', 'I', 'I_err',

              'UVW2_std', 'UVM2_std', 'UVW1_std',

              'UVW2_flux_frac', 'UVM2_flux_frac', 'UVW1_flux_frac',

              'UVW2_resid_frac', 'UVM2_resid_frac', 'UVW1_resid_frac',

              'UVW2_dist_moved', 'UVM2_dist_moved', 'UVW1_dist_moved',

              'UVW2_dist_neighbor', 'UVM2_dist_neighbor', 'UVW1_dist_neighbor',

              'UVW2_n5', 'UVM2_n5', 'UVW1_n5',

              'UVW2_n2p5', 'UVM2_n2p5', 'UVW1_n2p5',

              'UVW2_nobs', 'UVM2_nobs', 'UVW1_nobs']

# Some Columns need to be integers
integer_cols = ['UVW2_n5', 'UVM2_n5', 'UVW1_n5',

              'UVW2_n2p5', 'UVM2_n2p5', 'UVW1_n2p5',

              'UVW2_nobs', 'UVM2_nobs', 'UVW1_nobs']

for col in integer_cols:
    # Convert nans to -99 
    df[col] = df[col].fillna(-99)
    # Convert to int
    df[col] = df[col].astype(int)
    # Convert -99 back to nan
    df[col] = df[col].replace(-99, np.nan)
    
# Report same SF as MCPS
sig_fig_cols = ['UVW2', 'UVM2', 'UVW1','UVW2_err', 'UVM2_err', 'UVW1_err','UVW2_std', 'UVM2_std', 'UVW1_std',

              'UVW2_flux_frac', 'UVM2_flux_frac', 'UVW1_flux_frac',

              'UVW2_resid_frac', 'UVM2_resid_frac', 'UVW1_resid_frac',

              'UVW2_dist_moved', 'UVM2_dist_moved', 'UVW1_dist_moved',

              'UVW2_dist_neighbor', 'UVM2_dist_neighbor', 'UVW1_dist_neighbor']


for col in sig_fig_cols:
  df[col] = df[col].round(3)


print('Final Size:', df.shape[0])
# Save final catalog 
df.to_csv(data_dir + '0_SUMS_Catalogs/FinalCatalogs/SUMS_UV_Candidate_Catalog.csv', index=False)

Final Size: 829


In [8]:
df[df.Ranking == 'VB-G']

Unnamed: 0,SUMS_ID,RA,Dec,Ranking,UVW2,UVW2_err,UVM2,UVM2_err,UVW1,UVW1_err,...,UVW2_n5,UVM2_n5,UVW1_n5,UVW2_n2p5,UVM2_n2p5,UVW1_n2p5,UVW2_nobs,UVM2_nobs,UVW1_nobs,\hdots
2,SUMS$\_$8dcf5,10.788915,-73.38922,VB-G,19.733,0.142,,,19.322,0.136,...,1.0,,2.0,0,0,0,1.0,,1.0,\hdots
8,SUMS$\_$94852,11.838675,-73.11092,VB-G,,,18.935,0.183,18.680,0.095,...,,3.0,1.0,0,0,0,,1.0,3.0,\hdots
10,SUMS$\_$95a87,11.962635,-73.14296,VB-G,17.956,0.048,18.157,0.064,18.318,0.066,...,3.0,3.0,3.0,0,0,0,5.0,4.0,4.0,\hdots
13,SUMS$\_$97da1,12.184785,-73.41532,VB-G,19.006,0.126,,,19.143,0.160,...,1.0,,1.0,0,0,0,2.0,,3.0,\hdots
21,SUMS$\_$9bc0e,12.554100,-73.58448,VB-G,18.360,0.073,18.182,0.102,18.469,0.122,...,2.0,2.0,2.0,0,0,0,2.0,1.0,1.0,\hdots
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
808,SUMS$\_$832c6,85.523865,-69.20575,VB-G,18.730,0.099,18.485,0.083,18.586,0.090,...,6.0,6.0,6.0,0,0,0,1.0,2.0,2.0,\hdots
809,SUMS$\_$83936,85.572735,-69.49336,VB-G,19.462,0.126,,,19.228,0.209,...,2.0,,2.0,0,0,0,2.0,,1.0,\hdots
812,SUMS$\_$842c0,85.664055,-69.06073,VB-G,19.589,0.089,19.524,0.144,19.740,0.180,...,1.0,1.0,1.0,0,0,0,4.0,3.0,2.0,\hdots
823,SUMS$\_$89c90,87.059040,-70.22966,VB-G,,,20.016,0.175,19.488,0.138,...,,1.0,1.0,0,0,0,,1.0,1.0,\hdots


In [9]:
# Choose a few rows that have data and are more typical
# Only show some columns otherwise it's too wide
# Add a columns of 'vdots' to split 
df['\\hdots'] = ['\\hdots']*df.shape[0]
selection = df.loc[[1,808,0,3],['SUMS_ID', 'RA', 'Dec','Ranking', 
              
               'UVW2', 'UVW2_err', #'UVM2', 'UVM2_err', 'UVW1', 'UVW1_err',

                '\\hdots',

            #   'U', 'U_err', 'B', 'B_err', 'V', 'V_err', 'I', 'I_err',

            #   'UVW2_std', 'UVM2_std', 'UVW1_std',

            #   'UVW2_flux_frac', 'UVM2_flux_frac', 'UVW1_flux_frac',

            #   'UVW2_resid_frac', 'UVM2_resid_frac', 'UVW1_resid_frac',

            #   'UVW2_dist_moved', 'UVM2_dist_moved', 'UVW1_dist_moved',

       #        'UVW2_dist_neighbor', 'UVM2_dist_neighbor', 'UVW1_dist_neighbor',

            #   'UVW2_n5', 'UVM2_n5', 'UVW1_n5',

           #   'UVW2_n2p5', 'UVM2_n2p5', 
             'UVW1_n2p5',

              'UVW2_nobs', 'UVM2_nobs', 'UVW1_nobs']]

# Format the floats
for col in [ 'UVW2', 'UVW2_err']:
  selection[col] = selection[col].apply(lambda x: '{:.5g}'.format(x))
# Format the integers 
for col in ['UVW2_nobs', 'UVM2_nobs', 'UVW1_nobs','UVW1_n2p5']:
  selection[col] = selection[col].apply(lambda x: '{:.0f}'.format(x))


# Anything with underscores needs a \\ in front of it 
df['SUMS_ID'] = df['SUMS_ID'].apply(lambda x: x.replace('_','$\\_$'))
columns = selection.columns
columns = [col.replace('_','$\\_$') for col in columns]
selection.columns = columns


print(selection.to_latex(index=False))

\begin{tabular}{lrrllllllll}
\toprule
SUMS$\_$ID & RA & Dec & Ranking & UVW2 & UVW2$\_$err & \hdots & UVW1$\_$n2p5 & UVW2$\_$nobs & UVM2$\_$nobs & UVW1$\_$nobs \\
\midrule
SUMS$\_$8cdd1 & 10.548510 & -73.243620 & VB-E & 18.407 & 0.028 & \hdots & 0 & 7 & 7 & 7 \\
SUMS$\_$832c6 & 85.523865 & -69.205750 & VB-G & 18.73 & 0.099 & \hdots & 0 & 1 & 2 & 2 \\
SUMS$\_$8c796 & 10.414935 & -73.377410 & B-E & 18.378 & 0.046 & \hdots & 1 & 2 & 1 & 3 \\
SUMS$\_$8dfe7 & 10.831395 & -73.659790 & B-G & 17.442 & 0.034 & \hdots & 1 & 2 & 2 & 2 \\
\bottomrule
\end{tabular}

