# Generating LaTeX tables from information collated for LoVoCCS BCGs

## Import statements

In [1]:
from ident_run_setup import cosmo, load_history, proj_name

import json
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 500)

## Loading relevant information files

### BCG identification framework history

In [2]:
cur_history = load_history()

### X-LoVoCCS cluster sample

In [3]:
samp = pd.read_csv(cur_history['static_samp_file'])
samp.head(3)

Unnamed: 0,LoVoCCSID,common_name,cent_im_ra,cent_im_dec,redshift,MCXC_R500,MCXC_RA,MCXC_DEC,manual_xray_ra,manual_xray_dec,MCXC_Lx500_0.1_2.4,name,ang_prop_ratio
0,1,A2029,227.7343,5.745471,0.0766,1.3344,227.73,5.72,227.7343,5.745471,8.726708999999999e+44,LoVoCCS-1,86.035164
1,2,A401,44.74,13.58,0.0739,1.2421,44.74,13.58,,,6.088643e+44,LoVoCCS-2,83.258258
2,4A,A85North,10.45875,-9.301944,0.0555,1.2103,10.45875,-9.301944,,,5.100085e+44,LoVoCCS-4A,63.861748


### BCG candidate sample

In [4]:
bcg_samp = pd.read_csv("outputs/bcg_output_sample.csv")

# Make some modifications so that the dataframe doesn't distinguish between DESI and LoVoCCS source data ra-decs; I'm 
#  putting the column names as 'ra-BCG{N}' (even though I think it is ugly) because we attempt to use the wide-to-tall
#  pandas method later and it is easier this way around
bcg_samp = bcg_samp.rename(columns={'BCG1_desi-ls_ra': 'ra-BCG1', 'BCG1_desi-ls_dec': 'dec-BCG1', 
                                    'BCG2_desi-ls_ra': 'ra-BCG2', 'BCG2_desi-ls_dec': 'dec-BCG2',
                                    'BCG3_desi-ls_ra': 'ra-BCG3', 'BCG3_desi-ls_dec': 'dec-BCG3',
                                    'BCG4_desi-ls_ra': 'ra-BCG4', 'BCG4_desi-ls_dec': 'dec-BCG4'})

# A399 and A401 have LoVoCCS-photometry defined coords, as they don't appear in the legacy survey. Need to combine
#  the DESI and LoVoCCS columns just into a single column per BCG candidate 
bcg_samp['ra-BCG1'] = np.nanmax(bcg_samp[['ra-BCG1', 'BCG1_lovoccs_ra']].values, axis=1) 
bcg_samp['dec-BCG1'] = np.nanmax(bcg_samp[['dec-BCG1', 'BCG1_lovoccs_dec']].values, axis=1) 
bcg_samp['ra-BCG2'] = np.nanmax(bcg_samp[['ra-BCG2', 'BCG2_lovoccs_ra']].values, axis=1) 
bcg_samp['dec-BCG2'] = np.nanmax(bcg_samp[['dec-BCG2', 'BCG2_lovoccs_dec']].values, axis=1) 

# Getting rid of the now unnecessary LoVoCCS columns
bcg_samp.drop(columns=['BCG1_lovoccs_ra', 'BCG1_lovoccs_dec', 'BCG2_lovoccs_ra', 'BCG2_lovoccs_dec'], inplace=True)
# Also the 'no_bcg_cand' flag column, doesn't matter for this
bcg_samp.drop(columns=['no_bcg_cand'], inplace=True)

bcg_samp.head(15)

  bcg_samp['ra-BCG2'] = np.nanmax(bcg_samp[['ra-BCG2', 'BCG2_lovoccs_ra']].values, axis=1)
  bcg_samp['dec-BCG2'] = np.nanmax(bcg_samp[['dec-BCG2', 'BCG2_lovoccs_dec']].values, axis=1)


Unnamed: 0,cluster_name,ra-BCG1,dec-BCG1,ra-BCG2,dec-BCG2,ra-BCG3,dec-BCG3,ra-BCG4,dec-BCG4
0,LoVoCCS-1,227.733824,5.744883,,,,,,
1,LoVoCCS-2,44.740836,13.582646,,,,,,
2,LoVoCCS-4A,10.460194,-9.302871,,,,,,
3,LoVoCCS-4B,10.429048,-9.439317,,,,,,
4,LoVoCCS-5,303.113338,-56.8265,302.710346,-56.673695,303.50667,-57.027568,303.49407,-57.039226
5,LoVoCCS-7,330.470382,-59.945214,,,,,,
6,LoVoCCS-9,67.802961,-61.453626,67.414206,-61.176134,,,,
7,LoVoCCS-10,194.843512,-4.196002,,,,,,
8,LoVoCCS-11,137.134448,-9.62978,137.329532,-9.698835,,,,
9,LoVoCCS-12,206.867783,-32.864949,,,,,,


### Fiducial BCG candidate spectroscopic redshifts

In [5]:
bcg1_fidz = pd.read_csv("outputs/fiducial_cand_redshift_tables/BCG1_fiducial_specz.csv").rename(columns={'name': 'cluster_name'})
bcg2_fidz = pd.read_csv("outputs/fiducial_cand_redshift_tables/BCG2_fiducial_specz.csv").rename(columns={'name': 'cluster_name'})
bcg3_fidz = pd.read_csv("outputs/fiducial_cand_redshift_tables/BCG3_fiducial_specz.csv").rename(columns={'name': 'cluster_name'})
bcg4_fidz = pd.read_csv("outputs/fiducial_cand_redshift_tables/BCG4_fiducial_specz.csv").rename(columns={'name': 'cluster_name'})

### Spectrum inspection notes

#### RCSEDv2 and DESI

In [6]:
bcg1_emline_notes = pd.read_json("outputs/rcsedv2_desidr1_spec_notes/bcg1_emline_notes.json").T.reset_index(names='cluster_name')
bcg2_emline_notes = pd.read_json("outputs/rcsedv2_desidr1_spec_notes/bcg2_emline_notes.json").T.reset_index(names='cluster_name')
bcg3_emline_notes = pd.read_json("outputs/rcsedv2_desidr1_spec_notes/bcg3_emline_notes.json").T.reset_index(names='cluster_name')
bcg4_emline_notes = pd.read_json("outputs/rcsedv2_desidr1_spec_notes/bcg4_emline_notes.json").T.reset_index(names='cluster_name')

#### MUSE, ALMA, and KMOS

In [7]:
bcg1_eso_notes = pd.read_json("outputs/eso_cube_notes/bcg1_eso_cube_notes.json").T.reset_index(names='cluster_name')
bcg2_eso_notes = pd.read_json("outputs/eso_cube_notes/bcg2_eso_cube_notes.json").T.reset_index(names='cluster_name')
bcg3_eso_notes = pd.read_json("outputs/eso_cube_notes/bcg3_eso_cube_notes.json").T.reset_index(names='cluster_name')
bcg4_eso_notes = pd.read_json("outputs/eso_cube_notes/bcg4_eso_cube_notes.json").T.reset_index(names='cluster_name')

#### MANGA

In [8]:
bcg1_manga_notes = pd.read_json("outputs/manga_cube_notes/bcg1_manga_cube_notes.json").T.reset_index(names='cluster_name')
bcg2_manga_notes = pd.read_json("outputs/manga_cube_notes/bcg2_manga_cube_notes.json").T.reset_index(names='cluster_name')
bcg3_manga_notes = pd.read_json("outputs/manga_cube_notes/bcg3_manga_cube_notes.json").T.reset_index(names='cluster_name')
bcg4_manga_notes = pd.read_json("outputs/manga_cube_notes/bcg4_manga_cube_notes.json").T.reset_index(names='cluster_name')

### Results of cross-matching with other catalogs

#### VLASS QL Ep.1

In [9]:
bcg1_vlass = pd.read_csv("outputs/vlass_galex_crossmatches/bcg1_cands_vlass_searchrad10.0arcsec.csv")
bcg2_vlass = pd.read_csv("outputs/vlass_galex_crossmatches/bcg2_cands_vlass_searchrad10.0arcsec.csv")
bcg3_vlass = pd.read_csv("outputs/vlass_galex_crossmatches/bcg3_cands_vlass_searchrad10.0arcsec.csv")
bcg4_vlass = pd.read_csv("outputs/vlass_galex_crossmatches/bcg4_cands_vlass_searchrad10.0arcsec.csv")

#### GALEX UV Catalog AIS GR6+7

In [10]:
bcg1_galex = pd.read_csv("outputs/vlass_galex_crossmatches/bcg1_cands_galex_searchrad10.0arcsec.csv")
bcg2_galex = pd.read_csv("outputs/vlass_galex_crossmatches/bcg2_cands_galex_searchrad10.0arcsec.csv")
bcg3_galex = pd.read_csv("outputs/vlass_galex_crossmatches/bcg3_cands_galex_searchrad10.0arcsec.csv")
bcg4_galex = pd.read_csv("outputs/vlass_galex_crossmatches/bcg4_cands_galex_searchrad10.0arcsec.csv")

## Combining information

In [11]:
all_bcg_info = bcg_samp.copy()

### Adding redshifts

In [12]:
all_bcg_info = pd.merge(all_bcg_info, bcg1_fidz[['cluster_name', 'z', 'z_err', 'survey', 'source']], how='outer', 
                        on='cluster_name', ).rename(columns={'z': 'z-BCG1', 'z_err': 'z_err-BCG1', 'survey': 'z_survey-BCG1', 
                                                             'source': 'z_lit_source-BCG1'})
all_bcg_info = pd.merge(all_bcg_info, bcg2_fidz[['cluster_name', 'z', 'z_err', 'survey', 'source']], how='outer', 
                        on='cluster_name').rename(columns={'z': 'z-BCG2', 'z_err': 'z_err-BCG2', 'survey': 'z_survey-BCG2', 
                                                           'source': 'z_lit_source-BCG2'})
all_bcg_info = pd.merge(all_bcg_info, bcg3_fidz[['cluster_name', 'z', 'z_err', 'survey', 'source']], how='outer', 
                        on='cluster_name').rename(columns={'z': 'z-BCG3', 'z_err': 'z_err-BCG3', 'survey': 'z_survey-BCG3', 
                                                           'source': 'z_lit_source-BCG3'})
all_bcg_info = pd.merge(all_bcg_info, bcg4_fidz[['cluster_name', 'z', 'z_err', 'survey', 'source']], how='outer', 
                        on='cluster_name').rename(columns={'z': 'z-BCG4', 'z_err': 'z_err-BCG4', 'survey': 'z_survey-BCG4', 
                                                           'source': 'z_lit_source-BCG4'})

### Adding emission-line notes

In [13]:
all_bcg_info = pd.merge(all_bcg_info, bcg1_emline_notes, on='cluster_name').rename(columns={'em_line': 'em_line-BCG1', 
                                                                                            'notes': 'eml_notes-BCG1'})
all_bcg_info = pd.merge(all_bcg_info, bcg2_emline_notes, on='cluster_name').rename(columns={'em_line': 'em_line-BCG2', 
                                                                                            'notes': 'eml_notes-BCG2'})
all_bcg_info = pd.merge(all_bcg_info, bcg3_emline_notes, on='cluster_name').rename(columns={'em_line': 'em_line-BCG3', 
                                                                                            'notes': 'eml_notes-BCG3'})
all_bcg_info = pd.merge(all_bcg_info, bcg4_emline_notes, on='cluster_name').rename(columns={'em_line': 'em_line-BCG4', 
                                                                                            'notes': 'eml_notes-BCG4'})

### Adding ESO cube notes

In [14]:
all_bcg_info = pd.merge(all_bcg_info, bcg1_eso_notes, on='cluster_name').rename(columns={'em_line': 'eso_em_line-BCG1', 
                                                                                         'notes': 'eso_notes-BCG1',
                                                                                         'muse': 'num_muse-BCG1',
                                                                                         'alma': 'num_alma-BCG1',
                                                                                         'kmos': 'num_kmos-BCG1'})
all_bcg_info = pd.merge(all_bcg_info, bcg2_eso_notes, on='cluster_name').rename(columns={'em_line': 'eso_em_line-BCG2', 
                                                                                         'notes': 'eso_notes-BCG2',
                                                                                         'muse': 'num_muse-BCG2',
                                                                                         'alma': 'num_alma-BCG2',
                                                                                         'kmos': 'num_kmos-BCG2'})
all_bcg_info = pd.merge(all_bcg_info, bcg3_eso_notes, on='cluster_name').rename(columns={'em_line': 'eso_em_line-BCG3', 
                                                                                         'notes': 'eso_notes-BCG3',
                                                                                         'muse': 'num_muse-BCG3',
                                                                                         'alma': 'num_alma-BCG3',
                                                                                         'kmos': 'num_kmos-BCG3'})
all_bcg_info = pd.merge(all_bcg_info, bcg4_eso_notes, on='cluster_name').rename(columns={'em_line': 'eso_em_line-BCG4', 
                                                                                         'notes': 'eso_notes-BCG4',
                                                                                         'muse': 'num_muse-BCG4',
                                                                                         'alma': 'num_alma-BCG4',
                                                                                         'kmos': 'num_kmos-BCG4'})

### Creating the wide-to-long **GIGA TABLE**

In [15]:
GIGA_COLS = ['ra', 'dec', 'em_line', 'eml_notes', 'z', 'z_err', 'z_survey', 'eso_em_line', 'eso_notes', 'num_muse', 
             'num_alma', 'num_kmos', 'z_lit_source']

# This 'wide_to_long' call essentially stacks up the BCG1 ra-dec, BCG2 ra-dec, etc. (all in columns currently) into a 
#  multi-index set of rows - this more closely resembles how the output data will be formatted in the paper columns, and
#  is also just another neater way of ordering it
# Didn't want to specify the suffixes like that, but either I'm crap at regex (very likely) or Pandas wasn't behaving
long_bcg_info = pd.wide_to_long(all_bcg_info, GIGA_COLS, 'cluster_name', 
                                'candidate', sep='-', suffix='(!?BCG1|BCG2|BCG3|BCG4)')
# Was tempted to use the 'key' parameter to split the LoVoCCS- bit off of the values we're sorting by, but then remembered that
#  the X-LoVoCCS ids can have alphabetical characters in them - so settled for indexing on the original order of the BCG
#  sample cluster name
long_bcg_info = long_bcg_info.sort_index(level=0, inplace=False).loc[bcg_samp['cluster_name'].values]
# And we get rid of the mention of any BCG candidate that doesn't actually exist for a given cluster
long_bcg_info.dropna(inplace=True, subset=['ra', 'dec'])

# Removing any 'tool' entries in the source columns
long_bcg_info['z_lit_source'] = long_bcg_info['z_lit_source'].replace('tool', '')

long_bcg_info

Unnamed: 0_level_0,Unnamed: 1_level_0,ra,dec,em_line,eml_notes,z,z_err,z_survey,eso_em_line,eso_notes,num_muse,num_alma,num_kmos,z_lit_source
cluster_name,candidate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoVoCCS-1,BCG1,227.733824,5.744883,False,,0.073512,,hectospec,False,,4,,,
LoVoCCS-2,BCG1,44.740836,13.582646,,,0.074518,0.000500,cfa,,,,,,
LoVoCCS-4A,BCG1,10.460194,-9.302871,True,Single emission line in SDSS spectrum I think.,0.055359,0.000016,sdss,True,,7,,4,
LoVoCCS-4B,BCG1,10.429048,-9.439317,,,0.056123,,fast,,,,,,
LoVoCCS-5,BCG1,303.113338,-56.826500,,,0.055378,0.000147,cfa,False,,1,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LoVoCCS-131,BCG1,137.649589,-10.582130,,,0.092100,0.000200,,,,,,,https://ui.adsabs.harvard.edu/abs/1995AJ....10...
LoVoCCS-131,BCG2,137.741475,-10.564283,False,Something in 6dF but I think it is a poorly de...,0.090662,,6df,,,,,,
LoVoCCS-134,BCG1,329.606320,-60.391193,,,0.075697,0.000220,,,,,,,ENACS
LoVoCCS-134,BCG2,329.608421,-60.426073,,,0.075309,0.000220,,,,,,,ENACS


In [16]:
long_bcg_info['z_lit_source'].value_counts()

z_lit_source
                                                                    77
ENACS                                                                6
REFLEX spectroscopic redshifts                                       3
https://ui.adsabs.harvard.edu/abs/1991AJ....102.1581B/abstract       1
https://ui.adsabs.harvard.edu/abs/2016MNRAS.460.1758H/abstract       1
https://ui.adsabs.harvard.edu/abs/2009A%26A...499..357G/abstract     1
https://ui.adsabs.harvard.edu/abs/2022MNRAS.513.3013Y/abstract       1
https://ui.adsabs.harvard.edu/abs/2019ApJS..240...39G/abstract       1
https://ui.adsabs.harvard.edu/abs/1995ApJS...96..343Q/abstract       1
https://ui.adsabs.harvard.edu/abs/2003MNRAS.339..652K/abstract       1
https://ui.adsabs.harvard.edu/abs/1995AJ....109...14O/abstract       1
Name: count, dtype: int64

## BCG candidate summary 

In [18]:
SPECZ_DECI_PL = 4
POS_DECI_PL = 4

# Chosen columns for the candidate summary table
BCG_CANDS_COLS = ['ra', 'dec', 'z', 'z_err', 'z_survey', 'z_lit_source']

# Get just the columns that we wish to use for this LaTeX table
cands_info = long_bcg_info.loc[:, BCG_CANDS_COLS]

# We're going to combine the survey and literature source columns related to redshifts
lit_sp_src_msk = cands_info['z_lit_source'].values != ''
# There will only be a literature source if there was no survey-based spectrum
cands_info.loc[lit_sp_src_msk, 'z_survey'] = cands_info['z_lit_source'].values[lit_sp_src_msk]
# Some of the sources are URLs, which we'll turn into bibcodes for brevity, as I'm pretty sure I put
#  them all in as ADS URLS - I'll check to make sure they are ADS urls actually
cands_info['z_survey'] = cands_info['z_survey'].apply(lambda x: x.split('.edu/abs/')[-1].split('/abstract')[0].replace('%', '\%')
                                                      if isinstance(x, str) and 'https' in x and 'adsabs' in x else x)
# Don't need this anymore
del cands_info['z_lit_source']

# We're also going to combine the redshift and redshift error columns - this will pre-make them into a string
#  but that doesn't matter here
str_z_info = cands_info.loc[:, ['z', 'z_err']].round(SPECZ_DECI_PL).astype('str').values
str_z_info = (str_z_info[:, 0] + r'$\pm$' + str_z_info[:, 1]).astype(str)
str_z_info = np.char.replace(str_z_info, '$\\pm$nan', '')
cands_info.loc[:, 'z_str'] = str_z_info
# Remove the original 'z' and 'z_err' columns
cands_info = cands_info.drop(columns=['z', 'z_err'])

# Round the RA-Decs
cands_info[['ra', 'dec']] = cands_info[['ra', 'dec']].round(POS_DECI_PL)

# Organise the column order again
cands_info = cands_info[['ra', 'dec', 'z_str', 'z_survey']]

# -------- Make the intial LaTeX table --------
cands_latex = cands_info.to_latex(multirow=False, float_format="%.{i}f".format(i=POS_DECI_PL))

# Make the full cluster names be the X-LoVoCCS IDS
cands_latex = cands_latex.replace('LoVoCCS-', '')
# Cut off the full table definition and just retain the entries - we'll probably put this in an ApJ deluxe table
cands_latex = cands_latex.split('\\midrule\n')[-1].split('\n\\bottomrule')[0]

# This should add small separations between different cluster's entries in the table, but not between individual
#  BCG candidates - just an aethsetical thing + I think it makes it easier to read
# Also adds a LaTeX comment divider between each cluster's entries - makes the latex easier to read I think
split_cands_latex = cands_latex.split('\n') + ['']
cands_latex = "".join([(tab_row+'[1.5mm]\n%\n') if split_cands_latex[tab_row_ind+1][:2] != ' &' else (tab_row+'\n')
                       for tab_row_ind, tab_row in enumerate(split_cands_latex[:-1])])

# This should replace all NaN values with '-'
cands_latex = cands_latex.replace('NaN', '-').replace('nan', '-')

# We make the survey names look a little nicer
prettier_survs = {'sdss': "SDSS", 'eboss': 'eBOSS', '6df': '6dF', 'fast': 'FAST', 'desi_dr1': 'DESI-DR1', 
                  'cfa': 'CfA', 'hectospec': 'Hectospec'}
for cur_surv, pretty_surv in prettier_survs.items():
    cands_latex = cands_latex.replace(cur_surv, pretty_surv)    

print(cands_latex)

1 & BCG1 & 227.7338 & 5.7449 & 0.0735 & Hectospec \\[1.5mm]
%
2 & BCG1 & 44.7408 & 13.5826 & 0.0745$\pm$0.0005 & CfA \\[1.5mm]
%
4A & BCG1 & 10.4602 & -9.3029 & 0.0554$\pm$0.0 & SDSS \\[1.5mm]
%
4B & BCG1 & 10.4290 & -9.4393 & 0.0561 & FAST \\[1.5mm]
%
5 & BCG1 & 303.1133 & -56.8265 & 0.0554$\pm$0.0001 & CfA \\
 & BCG2 & 302.7103 & -56.6737 & 0.0563$\pm$0.0002 & CfA \\
 & BCG3 & 303.5067 & -57.0276 & 0.0575$\pm$0.0002 & CfA \\
 & BCG4 & 303.4941 & -57.0392 & 0.0524 & 6dF \\[1.5mm]
%
7 & BCG1 & 330.4704 & -59.9452 & 0.0998$\pm$0.0002 & CfA \\[1.5mm]
%
9 & BCG1 & 67.8030 & -61.4536 & 0.0604$\pm$0.0002 & CfA \\
 & BCG2 & 67.4142 & -61.1761 & 0.0667$\pm$0.0002 & CfA \\[1.5mm]
%
10 & BCG1 & 194.8435 & -4.1960 & 0.0846$\pm$0.0001 & DESI-DR1 \\[1.5mm]
%
11 & BCG1 & 137.1344 & -9.6298 & 0.0548$\pm$0.0001 & CfA \\
 & BCG2 & 137.3295 & -9.6988 & 0.0542$\pm$0.0001 & CfA \\[1.5mm]
%
12 & BCG1 & 206.8678 & -32.8649 & 0.0385 & 6dF \\[1.5mm]
%
13 & BCG1 & 49.4904 & -44.2378 & 0.0758$\pm$0.0002 & CfA 

  cands_info['z_survey'] = cands_info['z_survey'].apply(lambda x: x.split('.edu/abs/')[-1].split('/abstract')[0].replace('%', '\%')
