In [52]:
# Create the final SUMS catalog
# - I checked for duplicates that we may have introduced by self-matching the step 4 catalogs in topcat. 
# - There are some very close sources (~0.1 arcsec) but they do have different MCPS photometry.
# - There were 4 of these in the SMC and 3 in the LMC.

import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
from astropy.coordinates import SkyCoord

# Set dir depending on machine 
data_dir = '/home/bethany/Projects/0_Data/'

# Read in Step 4 Complete Catalog
l_df = pd.read_csv(data_dir + "0_SUMS_Catalogs/CompleteCatalog/Step4/lmc_photometry.csv")
s_df = pd.read_csv(data_dir + "0_SUMS_Catalogs/CompleteCatalog/Step4/smc_photometry.csv")

# Sort by RA
l_df = l_df.sort_values(by='ra').reset_index(drop=True)
s_df = s_df.sort_values(by='ra').reset_index(drop=True)

# Add ID Column 
l_df['SUMS_ID'] =  ['SUMS_'+ str(hex(i))[2:] for i in np.arange(100000, 100000 + l_df.shape[0])]
s_df['SUMS_ID'] =  ['SUMS_'+ str(hex(i))[2:] for i in np.arange(100000 +l_df.shape[0],100000 +l_df.shape[0]+s_df.shape[0])]


# Combine 
df = pd.concat([l_df, s_df]).reset_index(drop=True)


# Drop Unnamed cols 
df = df.drop(columns=df.columns[df.columns.str.contains('Unnamed')])

# Resort columns
cols = [ # Basic info
        'SUMS_ID', 'ra', 'dec', 
        # UV Photometry
        'uvw2_mag', 'uvw2_mag_err', 'uvm2_mag', 'uvm2_mag_err',  'uvw1_mag', 'uvw1_mag_err',
        # Optical Photometry
        'U', 'e_U', 'B', 'e_B', 'V', 'e_V', 'I', 'e_I',
        # Standard Deviation 
        'uvw2_mag_std', 'uvm2_mag_std', 'uvw1_mag_std',
        # Flux Frac 
        'uvw2_flux_frac', 'uvm2_flux_frac', 'uvw1_flux_frac',
        # Residual Frac
        'uvw2_resid_frac',  'uvm2_resid_frac', 'uvw1_resid_frac',
        # Dist Moved
        'uvw2_dist_moved',  'uvm2_dist_moved', 'uvw1_dist_moved',
        # Dist to nearest neighbor 
        'uvw2_closest_min',  'uvm2_closest_min', 'uvw1_closest_min',
        # Num5 
        'uvw2_num5','uvm2_num5', 'uvw1_num5', 
        # Num2.5
        'uvw2_num2p5',  'uvm2_num2p5', 'uvw1_num2p5',
        # N observations 
        'uvw2_num_obs', 'uvm2_num_obs',  'uvw1_num_obs']
df = df[cols]

# Rename Columns
df.columns = ['SUMS_ID', 'RA', 'Dec', 
              
              'UVW2', 'UVW2_err', 'UVM2', 'UVM2_err', 'UVW1', 'UVW1_err',

              'U', 'U_err', 'B', 'B_err', 'V', 'V_err', 'I', 'I_err',

              'UVW2_std', 'UVM2_std', 'UVW1_std',

              'UVW2_flux_frac', 'UVM2_flux_frac', 'UVW1_flux_frac',

              'UVW2_resid_frac', 'UVM2_resid_frac', 'UVW1_resid_frac',

              'UVW2_dist_moved', 'UVM2_dist_moved', 'UVW1_dist_moved',

              'UVW2_dist_neighbor', 'UVM2_dist_neighbor', 'UVW1_dist_neighbor',

              'UVW2_n5', 'UVM2_n5', 'UVW1_n5',

              'UVW2_n2p5', 'UVM2_n2p5', 'UVW1_n2p5',

              'UVW2_nobs', 'UVM2_nobs', 'UVW1_nobs']

# Report same SF as MCPS
sig_fig_cols = ['UVW2', 'UVM2', 'UVW1','UVW2_err', 'UVM2_err', 'UVW1_err','UVW2_std', 'UVM2_std', 'UVW1_std',

              'UVW2_flux_frac', 'UVM2_flux_frac', 'UVW1_flux_frac',

              'UVW2_resid_frac', 'UVM2_resid_frac', 'UVW1_resid_frac',

              'UVW2_dist_moved', 'UVM2_dist_moved', 'UVW1_dist_moved',

              'UVW2_dist_neighbor', 'UVM2_dist_neighbor', 'UVW1_dist_neighbor']


for col in sig_fig_cols:
  df[col] = df[col].round(3)

print('Final Size:', df.shape[0])
# Save final catalog 
df = df.reset_index(drop=True)
df.to_csv(data_dir + '0_SUMS_Catalogs/FinalCatalogs/SUMS_UV_Catalog.csv', index=False)

Final Size: 734869


In [45]:
df[~(df.UVW2.isnull()) & ~(df.UVM2.isnull()) & ~(df.UVW1.isnull()) & (df.UVM2_nobs > df.UVM2_nobs.mean())& (df.UVW1_n2p5 > 1)]

Unnamed: 0,SUMS_ID,RA,Dec,UVW2,UVW2_err,UVM2,UVM2_err,UVW1,UVW1_err,U,...,UVW1_dist_neighbor,UVW2_n5,UVM2_n5,UVW1_n5,UVW2_n2p5,UVM2_n2p5,UVW1_n2p5,UVW2_nobs,UVM2_nobs,UVW1_nobs
6768,SUMS_1a110,75.810045,-68.91412,17.861,0.084,17.724,0.103,17.689,0.096,17.848,...,2.248,3.0,3.0,3.0,2.0,2.0,2.0,5.0,4.0,4.0
7198,SUMS_1a2be,75.824385,-68.93420,17.896,0.086,17.968,0.132,18.438,0.190,,...,1.915,5.0,6.0,4.0,2.0,2.0,2.0,5.0,4.0,4.0
7268,SUMS_1a304,75.826575,-68.90744,17.794,0.082,17.823,0.123,17.848,0.118,18.048,...,2.099,2.0,2.0,2.0,2.0,2.0,2.0,5.0,4.0,4.0
7462,SUMS_1a3c6,75.831855,-68.88358,16.074,0.032,15.998,0.039,16.152,0.044,16.236,...,2.281,5.0,4.0,5.0,2.0,2.0,2.0,5.0,4.0,4.0
7488,SUMS_1a3e0,75.832770,-68.92988,16.357,0.035,16.254,0.046,16.332,0.042,16.735,...,1.316,2.0,2.0,2.0,2.0,2.0,2.0,5.0,4.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
730428,SUMS_cabdc,16.972770,-72.27609,13.966,0.016,14.006,0.018,14.303,0.017,15.142,...,1.577,3.0,3.0,3.0,2.0,2.0,2.0,8.0,8.0,8.0
731240,SUMS_caf08,17.023380,-72.35056,17.124,0.037,16.986,0.043,17.269,0.044,17.510,...,1.912,4.0,4.0,4.0,2.0,2.0,2.0,8.0,6.0,8.0
731804,SUMS_cb13c,17.059695,-72.22011,14.537,0.021,14.633,0.025,14.930,0.021,15.722,...,1.637,2.0,2.0,2.0,2.0,2.0,2.0,6.0,6.0,6.0
731938,SUMS_cb1c2,17.069805,-72.36417,15.886,0.029,15.899,0.039,16.167,0.040,16.906,...,1.876,5.0,5.0,5.0,2.0,2.0,2.0,5.0,4.0,5.0


In [51]:
# Choose a few rows that have data and are more typical
# Only show some columns otherwise it's too wide
# Add a columns of 'vdots' to split 
df['\\hdots'] = ['\\hdots']*df.shape[0]
selection = df.loc[[1235,789,732138,734700],['SUMS_ID', 'RA', 'Dec', 
              
               'UVW2', 'UVW2_err', 'UVM2', #'UVM2_err', 'UVW1', 'UVW1_err',

                '\\hdots',

            #   'U', 'U_err', 'B', 'B_err', 'V', 'V_err', 'I', 'I_err',

            #   'UVW2_std', 'UVM2_std', 'UVW1_std',

            #   'UVW2_flux_frac', 'UVM2_flux_frac', 'UVW1_flux_frac',

            #   'UVW2_resid_frac', 'UVM2_resid_frac', 'UVW1_resid_frac',

            #   'UVW2_dist_moved', 'UVM2_dist_moved', 'UVW1_dist_moved',

       #        'UVW2_dist_neighbor', 'UVM2_dist_neighbor', 'UVW1_dist_neighbor',

            #   'UVW2_n5', 'UVM2_n5', 'UVW1_n5',

           #   'UVW2_n2p5', 'UVM2_n2p5', 
             'UVW1_n2p5',

              'UVW2_nobs', 'UVM2_nobs', 'UVW1_nobs']]

# Format the floats
for col in [ 'UVW2', 'UVW2_err', 'UVM2']:
  selection[col] = selection[col].apply(lambda x: '{:.5g}'.format(x))
# Format the integers 
for col in ['UVW2_nobs', 'UVM2_nobs', 'UVW1_nobs','UVW1_n2p5']:
  selection[col] = selection[col].apply(lambda x: '{:.0f}'.format(x))


# Anything with underscores needs a \\ in front of it 
df['SUMS_ID'] = df['SUMS_ID'].apply(lambda x: x.replace('_','$\\_$'))
columns = selection.columns
columns = [col.replace('_','$\\_$') for col in columns]
selection.columns = columns


print(selection.to_latex(index=False))

\begin{tabular}{lrrllllllll}
\toprule
SUMS$\_$ID & RA & Dec & UVW2 & UVW2$\_$err & UVM2 & \hdots & UVW1$\_$n2p5 & UVW2$\_$nobs & UVM2$\_$nobs & UVW1$\_$nobs \\
\midrule
SUMS$\_$18b73 & 75.583170 & -70.516640 & 13.676 & 0.028 & 13.704 & \hdots & 1 & 3 & 4 & 3 \\
SUMS$\_$189b5 & 75.533445 & -70.477690 & 12.624 & 0.025 & 12.547 & \hdots & 0 & 3 & 4 & 3 \\
SUMS$\_$cb28a & 17.084355 & -72.142140 & 13.885 & 0.018 & 13.921 & \hdots & 2 & 6 & 6 & 6 \\
SUMS$\_$cbc8c & 17.569800 & -72.243030 & 14.851 & 0.022 & 14.835 & \hdots & 0 & 6 & 4 & 6 \\
\bottomrule
\end{tabular}

