# CDFS SWIRE master catalogue

This notebook presents the merge of the various pristine catalogues to produce HELP master catalogue on CDFS SWIRE.

In [None]:
from herschelhelp_internal import git_version
print("This notebook was run with herschelhelp_internal version: \n{}".format(git_version()))

In [None]:
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'

import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))

import os
import time

from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.table import Column, Table
import numpy as np
from pymoc import MOC

from herschelhelp_internal.masterlist import merge_catalogues, specz_merge, nb_merge_dist_plot
from herschelhelp_internal.utils import coords_to_hpidx, ebv, gen_help_id, inMoc

In [None]:
TMP_DIR = os.environ.get('TMP_DIR', "./data_tmp")
OUT_DIR = os.environ.get('OUT_DIR', "./data")
SUFFIX = os.environ.get('SUFFIX', time.strftime("_%Y%m%d"))

try:
    os.makedirs(OUT_DIR)
except FileExistsError:
    pass

## I - Reading the prepared pristine catalogues

In [None]:
atlas = Table.read("{}/ATLAS.fits".format(TMP_DIR))
combo = Table.read("{}/COMBO.fits".format(TMP_DIR))
#fireworks = Table.read("{}/Fireworks.fits".format(TMP_DIR))
ps1 = Table.read("{}/PS1.fits".format(TMP_DIR))
servs = Table.read("{}/SERVS.fits".format(TMP_DIR))
swire= Table.read("{}/SWIRE.fits".format(TMP_DIR))
video= Table.read("{}/VISTA-VIDEO.fits".format(TMP_DIR))
vhs= Table.read("{}/VISTA-VHS.fits".format(TMP_DIR))
des= Table.read("{}/DES.fits".format(TMP_DIR))
candels= Table.read("{}/CANDELS.fits".format(TMP_DIR))

## II - Merging tables

We first merge the optical catalogues and then add the infrared ones: PS1, COMBO, ATLAS, VIDEO, VHS, SERVS, SWIRE. Fireworks is no longer included.

At every step, we look at the distribution of the distances to the nearest source in the merged catalogue to determine the best crossmatching radius.

### PanSTARRS

In [None]:
master_catalogue = ps1
master_catalogue['ps1_ra'].name = 'ra'
master_catalogue['ps1_dec'].name = 'dec'

### Add Fireworks

We are no longer including Fireworks under Mattia's advice. I leave the code in the notebook commented out in case the user wishes to include it.

In [None]:
#nb_merge_dist_plot(
#    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
#    SkyCoord(fireworks['fireworks_ra'], fireworks['fireworks_dec'])
#)

In [None]:
# Given the graph above, we use 0.8 arc-second radius
#master_catalogue = merge_catalogues(master_catalogue, fireworks, "fireworks_ra", "fireworks_dec", radius=0.8*u.arcsec)

### Add COMBO

In [None]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(combo['combo_ra'], combo['combo_dec'])
)

In [None]:
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, combo, "combo_ra", "combo_dec", radius=0.8*u.arcsec)

### Add ATLAS

In [None]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(atlas['atlas_ra'], atlas['atlas_dec'])
)

In [None]:
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, atlas, "atlas_ra", "atlas_dec", radius=0.8*u.arcsec)

### Add VIDEO

In [None]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(video['video_ra'], video['video_dec'])
)

In [None]:
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, video, "video_ra", "video_dec", radius=0.8*u.arcsec)

### Add VHS

In [None]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(vhs['vhs_ra'], vhs['vhs_dec'])
)

In [None]:
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, vhs, "vhs_ra", "vhs_dec", radius=0.8*u.arcsec)

### Add SERVS

In [None]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(servs['servs_ra'], servs['servs_dec'])
)

In [None]:
# Given the graph above, we use 1 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, servs, "servs_ra", "servs_dec", radius=1.*u.arcsec)

### Add SWIRE

In [None]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(swire['swire_ra'], swire['swire_dec'])
)

In [None]:
# Given the graph above, we use 1 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, swire, "swire_ra", "swire_dec", radius=1.*u.arcsec)

### Add DES

DES and CANDELS are added at the end because they were not in teh original masterlist. By adding them at the end we ensure that the original HELP ids are maintained.

In [None]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(des['des_ra'], des['des_dec'])
)

In [None]:
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, des, "des_ra", "des_dec", radius=0.8*u.arcsec)

### Add CANDELS

In [None]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(candels['candels_ra'], candels['candels_dec'])
)

In [None]:
# Given the graph above, we use 0.8 arc-second radius
master_catalogue = merge_catalogues(master_catalogue, candels, "candels_ra", "candels_dec", radius=0.8*u.arcsec)

### Cleaning

When we merge the catalogues, astropy masks the non-existent values (e.g. when a row comes only from a catalogue and has no counterparts in the other, the columns from the latest are masked for that row). We indicate to use NaN for masked values for floats columns, False for flag columns and -1 for ID columns.

In [None]:
for col in master_catalogue.colnames:
    if "m_" in col or "merr_" in col or "f_" in col or "ferr_" in col or "stellarity" in col:
        master_catalogue[col].fill_value = np.nan
    elif "flag" in col:
        master_catalogue[col].fill_value = 0
    elif "id" in col:
        master_catalogue[col].fill_value = -1
        
master_catalogue = master_catalogue.filled()

In [None]:
master_catalogue[:10].show_in_notebook()

## III - Merging flags and stellarity

Each pristine catalogue contains a flag indicating if the source was associated to a another nearby source that was removed during the cleaning process.  We merge these flags in a single one.

In [None]:
flag_cleaned_columns = [column for column in master_catalogue.colnames
                        if 'flag_cleaned' in column]

flag_column = np.zeros(len(master_catalogue), dtype=bool)
for column in flag_cleaned_columns:
    flag_column |= master_catalogue[column]
    
master_catalogue.add_column(Column(data=flag_column, name="flag_cleaned"))
master_catalogue.remove_columns(flag_cleaned_columns)

Each pristine catalogue contains a flag indicating the probability of a source being a Gaia object (0: not a Gaia object, 1: possibly, 2: probably, 3: definitely).  We merge these flags taking the highest value.

In [None]:
flag_gaia_columns = [column for column in master_catalogue.colnames
                     if 'flag_gaia' in column]

master_catalogue.add_column(Column(
    data=np.max([master_catalogue[column] for column in flag_gaia_columns], axis=0),
    name="flag_gaia"
))
master_catalogue.remove_columns(flag_gaia_columns)

Each prisitine catalogue may contain one or several stellarity columns indicating the probability (0 to 1) of each source being a star.  We merge these columns taking the highest value.

In [None]:
stellarity_columns = [column for column in master_catalogue.colnames
                      if 'stellarity' in column]

master_catalogue.add_column(Column(
    data=np.nanmax([master_catalogue[column] for column in stellarity_columns], axis=0),
    name="stellarity"
))
master_catalogue.remove_columns(stellarity_columns)

## IV - Adding E(B-V) column

In [None]:
master_catalogue.add_column(
    ebv(master_catalogue['ra'], master_catalogue['dec'])
)

## V - Adding HELP unique identifiers and field columns

In [None]:
master_catalogue.add_column(Column(gen_help_id(master_catalogue['ra'], master_catalogue['dec']),
                                   name="help_id"))
master_catalogue.add_column(Column(np.full(len(master_catalogue), "CDFS-SWIRE", dtype='<U18'),
                                   name="field"))

In [None]:
# Check that the HELP Ids are unique
if len(master_catalogue) != len(np.unique(master_catalogue['help_id'])):
    print("The HELP IDs are not unique!!!")
else:
    print("OK!")

# VI - Cross-matching with the spec-z catalogue

In [None]:
specz =  Table.read("../../dmu23/dmu23_CDFS-SWIRE/data/CDFS_SWIRE-specz-v2.3.fits")

In [None]:
nb_merge_dist_plot(
    SkyCoord(master_catalogue['ra'], master_catalogue['dec']),
    SkyCoord(specz['ra'] * u.deg, specz['dec'] * u.deg)
)

In [None]:
master_catalogue = specz_merge(master_catalogue, specz, radius=1. * u.arcsec)

## VII - Choosing between multiple values for the same filter

### VII.a SERVS vs SWIRE vs CANDELS

Both SERVS and SWIRE provide IRAC1 and IRAC2 fluxes. SERVS is deeper but tends to under-estimate flux of bright sources (Mattia said over 2000 µJy) as illustrated by this comparison of SWIRE, SERVS, and Spitzer-EIP fluxes. On a small section there are also CANDELS forced irac fluxes which since thay are very deep we always take preferentially.

In [None]:
seip = Table.read("../../dmu0/dmu0_SEIP/data/SEIP_CDFS-SWIRE.fits")
seip_coords = SkyCoord(seip['ra'], seip['dec'])
idx, d2d, _ = seip_coords.match_to_catalog_sky(SkyCoord(master_catalogue['ra'], master_catalogue['dec']))
mask = d2d <= 2 * u.arcsec

In [None]:
fig, ax = plt.subplots()
ax.scatter(seip['i1_f_ap1'][mask], master_catalogue[idx[mask]]['f_ap_servs_irac1'], label="SERVS", s=2.)
ax.scatter(seip['i1_f_ap1'][mask], master_catalogue[idx[mask]]['f_ap_swire_irac1'], label="SWIRE", s=2.)
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel("SEIP flux [μJy]")
ax.set_ylabel("SERVS/SWIRE flux [μJy]")
ax.set_title("IRAC 1")
ax.legend()
ax.axvline(2000, color="black", linestyle="--", linewidth=1.)
ax.plot(seip['i1_f_ap1'][mask], seip['i1_f_ap1'][mask], linewidth=.1, color="black", alpha=.5);

In [None]:
fig, ax = plt.subplots()
ax.scatter(seip['i2_f_ap1'][mask], master_catalogue[idx[mask]]['f_ap_servs_irac2'], label="SERVS", s=2.)
ax.scatter(seip['i2_f_ap1'][mask], master_catalogue[idx[mask]]['f_ap_swire_irac2'], label="SWIRE", s=2.)
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel("SEIP flux [μJy]")
ax.set_ylabel("SERVS/SWIRE flux [μJy]")
ax.set_title("IRAC 2")
ax.legend()
ax.axvline(2000, color="black", linestyle="--", linewidth=1.)

ax.plot(seip['i1_f_ap2'][mask], seip['i1_f_ap2'][mask], linewidth=.1, color="black", alpha=.5);

When both SWIRE and SERVS fluxes are provided, we use the SERVS flux below 2000 μJy and the SWIRE flux over.

We create a table indicating for each source the origin on the IRAC1 and IRAC2 fluxes that will be saved separately.

In [None]:
irac_origin = Table()
irac_origin.add_column(master_catalogue['help_id'])

In [None]:
# IRAC1 aperture flux and magnitudes
has_servs = ~np.isnan(master_catalogue['f_ap_servs_irac1'])
has_swire = ~np.isnan(master_catalogue['f_ap_swire_irac1'])
has_both = has_servs & has_swire



print("{} sources with SERVS flux".format(np.sum(has_servs)))
print("{} sources with SWIRE flux".format(np.sum(has_swire)))
print("{} sources with SERVS and SWIRE flux".format(np.sum(has_both)))

has_servs_above_limit = has_servs.copy()
has_servs_above_limit[has_servs] = master_catalogue['f_ap_servs_irac1'][has_servs] > 2000


use_swire = ((has_swire & ~has_servs) | (has_both & has_servs_above_limit))
use_servs =  ((has_servs & ~(has_both & has_servs_above_limit)))

print("{} sources for which we use SERVS".format(np.sum(use_servs)))
print("{} sources for which we use SWIRE".format(np.sum(use_swire)))


f_ap_irac = np.full(len(master_catalogue), np.nan)
f_ap_irac[use_servs] = master_catalogue['f_ap_servs_irac1'][use_servs]
f_ap_irac[use_swire] = master_catalogue['f_ap_swire_irac1'][use_swire]


ferr_ap_irac = np.full(len(master_catalogue), np.nan)
ferr_ap_irac[use_servs] = master_catalogue['ferr_ap_servs_irac1'][use_servs]
ferr_ap_irac[use_swire] = master_catalogue['ferr_ap_swire_irac1'][use_swire]


m_ap_irac = np.full(len(master_catalogue), np.nan)
m_ap_irac[use_servs] = master_catalogue['m_ap_servs_irac1'][use_servs]
m_ap_irac[use_swire] = master_catalogue['m_ap_swire_irac1'][use_swire]


merr_ap_irac = np.full(len(master_catalogue), np.nan)
merr_ap_irac[use_servs] = master_catalogue['merr_ap_servs_irac1'][use_servs]
merr_ap_irac[use_swire] = master_catalogue['merr_ap_swire_irac1'][use_swire]


master_catalogue.add_column(Column(data=f_ap_irac, name="f_ap_irac_i1"))
master_catalogue.add_column(Column(data=ferr_ap_irac, name="ferr_ap_irac_i1"))
master_catalogue.add_column(Column(data=m_ap_irac, name="m_ap_irac_i1"))
master_catalogue.add_column(Column(data=merr_ap_irac, name="merr_ap_irac_i1"))

master_catalogue.remove_columns(['f_ap_servs_irac1', 'f_ap_swire_irac1', 'ferr_ap_servs_irac1',
                                     'ferr_ap_swire_irac1', 'm_ap_servs_irac1', 'm_ap_swire_irac1',
                                     'merr_ap_servs_irac1', 'merr_ap_swire_irac1'])

origin = np.full(len(master_catalogue), '     ', dtype='<U5')
origin[use_servs] = "SERVS"
origin[use_swire] = "SWIRE"

irac_origin.add_column(Column(data=origin, name="IRAC1_ap"))

In [None]:
# IRAC1 total flux and magnitudes
has_servs = ~np.isnan(master_catalogue['f_servs_irac1'])
has_swire = ~np.isnan(master_catalogue['f_swire_irac1'])
has_both = has_servs & has_swire

has_candels = ~np.isnan(master_catalogue['f_candels-irac_i1'])

print("{} sources with SERVS flux".format(np.sum(has_servs)))
print("{} sources with SWIRE flux".format(np.sum(has_swire)))
print("{} sources with SERVS and SWIRE flux".format(np.sum(has_both)))
print("{} sources with CANDELS flux".format(np.sum(has_candels)))

has_servs_above_limit = has_servs.copy()
has_servs_above_limit[has_servs] = master_catalogue['f_servs_irac1'][has_servs] > 2000

use_candels = has_candels
use_swire = ~has_candels & ((has_swire & ~has_servs) | (has_both & has_servs_above_limit))
use_servs = ~has_candels & ((has_servs & ~(has_both & has_servs_above_limit)))

print("{} sources for which we use SERVS".format(np.sum(use_servs)))
print("{} sources for which we use SWIRE".format(np.sum(use_swire)))
print("{} sources for which we use CANDELS".format(np.sum(use_candels)))

f_irac = np.full(len(master_catalogue), np.nan)
f_irac[use_servs] = master_catalogue['f_servs_irac1'][use_servs]
f_irac[use_swire] = master_catalogue['f_swire_irac1'][use_swire]
f_irac[use_candels] = master_catalogue['f_candels-irac_i1'][use_candels]

ferr_irac = np.full(len(master_catalogue), np.nan)
ferr_irac[use_servs] = master_catalogue['ferr_servs_irac1'][use_servs]
ferr_irac[use_swire] = master_catalogue['ferr_swire_irac1'][use_swire]
ferr_irac[use_candels] = master_catalogue['ferr_candels-irac_i1'][use_candels]

flag_irac = np.full(len(master_catalogue), False, dtype=bool)
flag_irac[use_servs] = master_catalogue['flag_servs_irac1'][use_servs]
flag_irac[use_swire] = master_catalogue['flag_swire_irac1'][use_swire]
flag_irac[use_candels] = master_catalogue['flag_candels-irac_i1'][use_candels]

m_irac = np.full(len(master_catalogue), np.nan)
m_irac[use_servs] = master_catalogue['m_servs_irac1'][use_servs]
m_irac[use_swire] = master_catalogue['m_swire_irac1'][use_swire]
m_irac[use_candels] = master_catalogue['m_candels-irac_i1'][use_candels]

merr_irac = np.full(len(master_catalogue), np.nan)
merr_irac[use_servs] = master_catalogue['merr_servs_irac1'][use_servs]
merr_irac[use_swire] = master_catalogue['merr_swire_irac1'][use_swire]
merr_irac[use_candels] = master_catalogue['merr_candels-irac_i1'][use_candels]

master_catalogue.add_column(Column(data=f_irac, name="f_irac_i1"))
master_catalogue.add_column(Column(data=ferr_irac, name="ferr_irac_i1"))
master_catalogue.add_column(Column(data=m_irac, name="m_irac_i1"))
master_catalogue.add_column(Column(data=merr_irac, name="merr_irac_i1"))
master_catalogue.add_column(Column(data=flag_irac, name="flag_irac_i1"))

master_catalogue.remove_columns(['f_servs_irac1', 'f_swire_irac1', 'ferr_servs_irac1',
                                 'ferr_swire_irac1', 'm_servs_irac1', 'flag_servs_irac1', 'm_swire_irac1',
                                 'merr_servs_irac1', 'merr_swire_irac1', 'flag_swire_irac1',
                                 
                                'f_candels-irac_i1',
                                 'ferr_candels-irac_i1',
                                 'm_candels-irac_i1',
                                 'merr_candels-irac_i1',
                                'flag_candels-irac_i1'])

origin = np.full(len(master_catalogue), '     ', dtype='<U5')
origin[use_servs] = "SERVS"
origin[use_swire] = "SWIRE"
origin[use_candels] = "CANDELS"
irac_origin.add_column(Column(data=origin, name="IRAC1_total"))

In [None]:
# IRAC2 aperture flux and magnitudes
has_servs = ~np.isnan(master_catalogue['f_ap_servs_irac2'])
has_swire = ~np.isnan(master_catalogue['f_ap_swire_irac2'])
has_both = has_servs & has_swire

print("{} sources with SERVS flux".format(np.sum(has_servs)))
print("{} sources with SWIRE flux".format(np.sum(has_swire)))
print("{} sources with SERVS and SWIRE flux".format(np.sum(has_both)))

has_servs_above_limit = has_servs.copy()
has_servs_above_limit[has_servs] = master_catalogue['f_ap_servs_irac2'][has_servs] > 2000

use_swire = (has_swire & ~has_servs) | (has_both & has_servs_above_limit)
use_servs = (has_servs & ~(has_both & has_servs_above_limit))

print("{} sources for which we use SERVS".format(np.sum(use_servs)))
print("{} sources for which we use SWIRE".format(np.sum(use_swire)))

f_ap_irac = np.full(len(master_catalogue), np.nan)
f_ap_irac[use_servs] = master_catalogue['f_ap_servs_irac2'][use_servs]
f_ap_irac[use_swire] = master_catalogue['f_ap_swire_irac2'][use_swire]

ferr_ap_irac = np.full(len(master_catalogue), np.nan)
ferr_ap_irac[use_servs] = master_catalogue['ferr_ap_servs_irac2'][use_servs]
ferr_ap_irac[use_swire] = master_catalogue['ferr_ap_swire_irac2'][use_swire]

m_ap_irac = np.full(len(master_catalogue), np.nan)
m_ap_irac[use_servs] = master_catalogue['m_ap_servs_irac2'][use_servs]
m_ap_irac[use_swire] = master_catalogue['m_ap_swire_irac2'][use_swire]

merr_ap_irac = np.full(len(master_catalogue), np.nan)
merr_ap_irac[use_servs] = master_catalogue['merr_ap_servs_irac2'][use_servs]
merr_ap_irac[use_swire] = master_catalogue['merr_ap_swire_irac2'][use_swire]

master_catalogue.add_column(Column(data=f_ap_irac, name="f_ap_irac_i2"))
master_catalogue.add_column(Column(data=ferr_ap_irac, name="ferr_ap_irac_i2"))
master_catalogue.add_column(Column(data=m_ap_irac, name="m_ap_irac_i2"))
master_catalogue.add_column(Column(data=merr_ap_irac, name="merr_ap_irac_i2"))

master_catalogue.remove_columns(['f_ap_servs_irac2', 'f_ap_swire_irac2', 'ferr_ap_servs_irac2',
                                 'ferr_ap_swire_irac2', 'm_ap_servs_irac2', 'm_ap_swire_irac2',
                                 'merr_ap_servs_irac2', 'merr_ap_swire_irac2'])

origin = np.full(len(master_catalogue), '     ', dtype='<U5')
origin[use_servs] = "SERVS"
origin[use_swire] = "SWIRE"
irac_origin.add_column(Column(data=origin, name="IRAC2_ap"))

In [None]:
# IRAC2 total flux and magnitudes
has_servs = ~np.isnan(master_catalogue['f_servs_irac2'])
has_swire = ~np.isnan(master_catalogue['f_swire_irac2'])
has_both = has_servs & has_swire

has_candels = ~np.isnan(master_catalogue['f_candels-irac_i2'])

print("{} sources with SERVS flux".format(np.sum(has_servs)))
print("{} sources with SWIRE flux".format(np.sum(has_swire)))
print("{} sources with SERVS and SWIRE flux".format(np.sum(has_both)))
print("{} sources with CANDELS flux".format(np.sum(has_candels)))

has_servs_above_limit = has_servs.copy()
has_servs_above_limit[has_servs] = master_catalogue['f_servs_irac2'][has_servs] > 2000

use_candels = has_candels
use_swire = ~has_candels & ((has_swire & ~has_servs) | (has_both & has_servs_above_limit))
use_servs = ~has_candels & ((has_servs & ~(has_both & has_servs_above_limit)))

print("{} sources for which we use SERVS".format(np.sum(use_servs)))
print("{} sources for which we use SWIRE".format(np.sum(use_swire)))
print("{} sources for which we use CANDELS".format(np.sum(use_candels)))

f_irac = np.full(len(master_catalogue), np.nan)
f_irac[use_servs] = master_catalogue['f_servs_irac2'][use_servs]
f_irac[use_swire] = master_catalogue['f_swire_irac2'][use_swire]
f_irac[use_candels] = master_catalogue['f_candels-irac_i2'][use_candels]

ferr_irac = np.full(len(master_catalogue), np.nan)
ferr_irac[use_servs] = master_catalogue['ferr_servs_irac2'][use_servs]
ferr_irac[use_swire] = master_catalogue['ferr_swire_irac2'][use_swire]
ferr_irac[use_candels] = master_catalogue['ferr_candels-irac_i2'][use_candels]

flag_irac = np.full(len(master_catalogue), False, dtype=bool)
flag_irac[use_servs] = master_catalogue['flag_servs_irac2'][use_servs]
flag_irac[use_swire] = master_catalogue['flag_swire_irac2'][use_swire]
flag_irac[use_candels] = master_catalogue['flag_candels-irac_i2'][use_candels]

m_irac = np.full(len(master_catalogue), np.nan)
m_irac[use_servs] = master_catalogue['m_servs_irac2'][use_servs]
m_irac[use_swire] = master_catalogue['m_swire_irac2'][use_swire]
m_irac[use_candels] = master_catalogue['m_candels-irac_i2'][use_candels]

merr_irac = np.full(len(master_catalogue), np.nan)
merr_irac[use_servs] = master_catalogue['merr_servs_irac2'][use_servs]
merr_irac[use_swire] = master_catalogue['merr_swire_irac2'][use_swire]
merr_irac[use_candels] = master_catalogue['merr_candels-irac_i2'][use_candels]

master_catalogue.add_column(Column(data=f_irac, name="f_irac_i2"))
master_catalogue.add_column(Column(data=ferr_irac, name="ferr_irac_i2"))
master_catalogue.add_column(Column(data=m_irac, name="m_irac_i2"))
master_catalogue.add_column(Column(data=merr_irac, name="merr_irac_i2"))
master_catalogue.add_column(Column(data=flag_irac, name="flag_irac_i2"))

master_catalogue.remove_columns(['f_servs_irac2', 'f_swire_irac2', 'ferr_servs_irac2',
                                 'ferr_swire_irac2', 'm_servs_irac2', 'flag_servs_irac2', 'm_swire_irac2',
                                 'merr_servs_irac2', 'merr_swire_irac2', 'flag_swire_irac2',
                                
                                'f_candels-irac_i2',
                                'ferr_candels-irac_i2',
                                'm_candels-irac_i2',
                                'merr_candels-irac_i2',
                                'flag_candels-irac_i2'])

origin = np.full(len(master_catalogue), '     ', dtype='<U5')
origin[use_servs] = "SERVS"
origin[use_swire] = "SWIRE"
origin[use_candels] = "CANDELS"
irac_origin.add_column(Column(data=origin, name="IRAC2_total"))

In [None]:
has_candels_irac_i3 = ~np.isnan(master_catalogue['f_candels-irac_i3'])
has_candels_irac_i4 = ~np.isnan(master_catalogue['f_candels-irac_i4'])
has_swire_irac_i3 = ~np.isnan(master_catalogue['f_irac3'])
has_swire_irac_i4 = ~np.isnan(master_catalogue['f_irac4'])

use_candels_irac_i3 = has_candels_irac_i3 & ~has_swire_irac_i3
use_candels_irac_i4 = has_candels_irac_i4 & ~has_swire_irac_i4

for col in master_catalogue.colnames:
    if '_irac3' in col:
        master_catalogue[col].name = col.replace('_irac3', '_irac_i3')
    if '_irac4' in col:
        master_catalogue[col].name = col.replace('_irac4', '_irac_i4')

master_catalogue['f_irac_i3'][use_candels_irac_i3] = master_catalogue['f_candels-irac_i3'][use_candels_irac_i3]
master_catalogue['ferr_irac_i3'][use_candels_irac_i3] = master_catalogue['ferr_candels-irac_i3'][use_candels_irac_i3]
master_catalogue['m_irac_i3'][use_candels_irac_i3] = master_catalogue['m_candels-irac_i3'][use_candels_irac_i3]
master_catalogue['merr_irac_i3'][use_candels_irac_i3] = master_catalogue['merr_candels-irac_i3'][use_candels_irac_i3]
master_catalogue['flag_irac_i3'][use_candels_irac_i3] = master_catalogue['flag_candels-irac_i3'][use_candels_irac_i3]

master_catalogue['f_irac_i4'][use_candels_irac_i3] = master_catalogue['f_candels-irac_i4'][use_candels_irac_i3]
master_catalogue['ferr_irac_i4'][use_candels_irac_i3] = master_catalogue['ferr_candels-irac_i4'][use_candels_irac_i3]
master_catalogue['m_irac_i4'][use_candels_irac_i3] = master_catalogue['m_candels-irac_i4'][use_candels_irac_i3]
master_catalogue['merr_irac_i4'][use_candels_irac_i3] = master_catalogue['merr_candels-irac_i4'][use_candels_irac_i3]
master_catalogue['flag_irac_i4'][use_candels_irac_i3] = master_catalogue['flag_candels-irac_i4'][use_candels_irac_i3]

origin = np.full(len(master_catalogue), '     ', dtype='<U5')
origin[has_swire_irac_i3] = "SWIRE"
origin[use_candels_irac_i3] = "CANDELS"
irac_origin.add_column(Column(data=origin, name="IRAC3_total"))

origin = np.full(len(master_catalogue), '     ', dtype='<U5')
origin[has_swire_irac_i4] = "SWIRE"
origin[use_candels_irac_i4] = "CANDELS"
irac_origin.add_column(Column(data=origin, name="IRAC4_total"))

master_catalogue.remove_columns(['f_candels-irac_i3',    'f_candels-irac_i4',
                                 'ferr_candels-irac_i3', 'ferr_candels-irac_i4',
                                 'm_candels-irac_i3',    'm_candels-irac_i4',
                                 'merr_candels-irac_i3', 'merr_candels-irac_i4',
                                 'flag_candels-irac_i3', 'flag_candels-irac_i4'])

In [None]:
irac_origin.write("{}/cdfs-swire_irac_fluxes_origins{}.fits".format(OUT_DIR, SUFFIX), overwrite=True)

## VII.b VIDEO vs VHS
VIDEO is deeper than VHS so we take VIDEO flux for any source that has both.

In [None]:
vista_origin = Table()
vista_origin.add_column(master_catalogue['help_id'])

In [None]:
vista_bands = ['y','j','h','k'] # Lowercase naming convention (k is Ks)
for band in vista_bands:
    print('For VISTA band ' + band + ':')
    # VISTA total flux 
    has_video = ~np.isnan(master_catalogue['f_video_' + band])
    has_vhs = ~np.isnan(master_catalogue['f_vhs_' + band])
    has_both = has_video & has_vhs

    print("{} sources with VIDEO flux".format(np.sum(has_video)))
    print("{} sources with VHS flux".format(np.sum(has_vhs)))
    print("{} sources with VIDEO and VHS flux".format(np.sum(has_both)))


    use_video = has_video 
    use_vhs = has_vhs & ~has_both

    print("{} sources for which we use VIDEO".format(np.sum(use_video)))
    print("{} sources for which we use VHS".format(np.sum(use_vhs)))

    f_vista = np.full(len(master_catalogue), np.nan)
    f_vista[use_video] = master_catalogue['f_video_' + band][use_video]
    f_vista[use_vhs] = master_catalogue['f_vhs_' + band][use_vhs]

    ferr_vista = np.full(len(master_catalogue), np.nan)
    ferr_vista[use_video] = master_catalogue['ferr_video_' + band][use_video]
    ferr_vista[use_vhs] = master_catalogue['ferr_vhs_' + band][use_vhs]
    
    m_vista = np.full(len(master_catalogue), np.nan)
    m_vista[use_video] = master_catalogue['m_video_' + band][use_video]
    m_vista[use_vhs] = master_catalogue['m_vhs_' + band][use_vhs]

    merr_vista = np.full(len(master_catalogue), np.nan)
    merr_vista[use_video] = master_catalogue['merr_video_' + band][use_video]
    merr_vista[use_vhs] = master_catalogue['merr_vhs_' + band][use_vhs]

    flag_vista = np.full(len(master_catalogue), False, dtype=bool)
    flag_vista[use_video] = master_catalogue['flag_video_' + band][use_video]
    flag_vista[use_vhs] = master_catalogue['flag_vhs_' + band][use_vhs]

    master_catalogue.add_column(Column(data=f_vista, name="f_vista_" + band))
    master_catalogue.add_column(Column(data=ferr_vista, name="ferr_vista_" + band))
    master_catalogue.add_column(Column(data=m_vista, name="m_vista_" + band))
    master_catalogue.add_column(Column(data=merr_vista, name="merr_vista_" + band))
    master_catalogue.add_column(Column(data=flag_vista, name="flag_vista_" + band))

    master_catalogue.remove_columns(['f_video_' + band, 
                                     'f_vhs_' + band, 
                                     'ferr_video_' + band,
                                     'ferr_vhs_' + band, 
                                     'm_video_' + band, 
                                     'm_vhs_' + band, 
                                     'merr_video_' + band,
                                     'merr_vhs_' + band,
                                     'flag_video_' + band, 
                                     'flag_vhs_' + band])

    origin = np.full(len(master_catalogue), '     ', dtype='<U5')
    origin[use_video] = "VIDEO"
    origin[use_vhs] = "VHS"
    
    vista_origin.add_column(Column(data=origin, name= 'f_vista_' + band ))
    
    
    
    # VISTA Aperture flux
    has_ap_video = ~np.isnan(master_catalogue['f_ap_video_' + band])
    has_ap_vhs = ~np.isnan(master_catalogue['f_ap_vhs_' + band])
    has_ap_both = has_ap_video & has_ap_vhs

    print("{} sources with VIDEO aperture flux".format(np.sum(has_ap_video)))
    print("{} sources with VHS aperture flux".format(np.sum(has_ap_vhs)))
    print("{} sources with VIDEO and VHS aperture flux".format(np.sum(has_ap_both)))


    use_ap_video = has_ap_video 
    use_ap_vhs = has_ap_vhs & ~has_ap_both

    print("{} sources for which we use VIDEO aperture fluxes".format(np.sum(use_ap_video)))
    print("{} sources for which we use VHS aperture fluxes".format(np.sum(use_ap_vhs)))

    f_ap_vista = np.full(len(master_catalogue), np.nan)
    f_ap_vista[use_ap_video] = master_catalogue['f_ap_video_' + band][use_ap_video]
    f_ap_vista[use_ap_vhs] = master_catalogue['f_ap_vhs_' + band][use_ap_vhs]

    ferr_ap_vista = np.full(len(master_catalogue), np.nan)
    ferr_ap_vista[use_ap_video] = master_catalogue['ferr_ap_video_' + band][use_ap_video]
    ferr_ap_vista[use_ap_vhs] = master_catalogue['ferr_ap_vhs_' + band][use_ap_vhs]
    
    m_ap_vista = np.full(len(master_catalogue), np.nan)
    m_ap_vista[use_ap_video] = master_catalogue['m_ap_video_' + band][use_ap_video]
    m_ap_vista[use_ap_vhs] = master_catalogue['m_ap_vhs_' + band][use_ap_vhs]

    merr_ap_vista = np.full(len(master_catalogue), np.nan)
    merr_ap_vista[use_ap_video] = master_catalogue['merr_ap_video_' + band][use_ap_video]
    merr_ap_vista[use_ap_vhs] = master_catalogue['merr_ap_vhs_' + band][use_ap_vhs]


    master_catalogue.add_column(Column(data=f_ap_vista, name="f_ap_vista_" + band))
    master_catalogue.add_column(Column(data=ferr_ap_vista, name="ferr_ap_vista_" + band))
    master_catalogue.add_column(Column(data=m_ap_vista, name="m_ap_vista_" + band))
    master_catalogue.add_column(Column(data=merr_vista, name="merr_ap_vista_" + band))


    master_catalogue.remove_columns(['f_ap_video_' + band, 
                                     'f_ap_vhs_' + band, 
                                     'ferr_ap_video_' + band,
                                     'ferr_ap_vhs_' + band,
                                     'm_ap_video_' + band, 
                                     'm_ap_vhs_' + band, 
                                     'merr_ap_video_' + band,
                                     'merr_ap_vhs_' + band])

    origin_ap = np.full(len(master_catalogue), '     ', dtype='<U5')
    origin_ap[use_ap_video] = "VIDEO"
    origin_ap[use_ap_vhs] = "VHS"
    
    vista_origin.add_column(Column(data=origin_ap, name= 'f_ap_vista_' + band ))
                  

In [None]:
      
#Z band only in VIDEO
               
master_catalogue['f_ap_video_z'].name = 'f_ap_vista_z'
master_catalogue['ferr_ap_video_z'].name = 'ferr_ap_vista_z'
master_catalogue['f_video_z'].name = 'f_vista_z'
master_catalogue['ferr_video_z'].name = 'ferr_vista_z'
master_catalogue['m_ap_video_z'].name = 'm_ap_vista_z'
master_catalogue['merr_ap_video_z'].name = 'merr_ap_vista_z'
master_catalogue['m_video_z'].name = 'm_vista_z'
master_catalogue['merr_video_z'].name = 'merr_vista_z'
master_catalogue['flag_video_z'].name = 'flag_vista_z'

In [None]:
vista_origin.write("{}/cdfs-swire_vista_fluxes_origins{}.fits".format(OUT_DIR, SUFFIX), overwrite=True)

## VIII.a Wavelength domain coverage

We add a binary `flag_optnir_obs` indicating that a source was observed in a given wavelength domain:

- 1 for observation in optical;
- 2 for observation in near-infrared;
- 4 for observation in mid-infrared (IRAC).

It's an integer binary flag, so a source observed both in optical and near-infrared by not in mid-infrared would have this flag at 1 + 2 = 3.

*Note 1: The observation flag is based on the creation of multi-order coverage maps from the catalogues, this may not be accurate, especially on the edges of the coverage.*

*Note 2: Being on the observation coverage does not mean having fluxes in that wavelength domain. For sources observed in one domain but having no flux in it, one must take into consideration de different depths in the catalogue we are using.*

In [None]:
atlas_moc = MOC(filename="../../dmu0/dmu0_ATLAS/data/ATLAS_CDFS-SWIRE_MOC.fits")
combo_moc = MOC(filename="../../dmu0/dmu0_COMBO-17/data/table3_MOC.fits")
#fireworks_moc = MOC(filename="../../dmu0/dmu0_Fireworks/data/Fireworks_MOC.fits")
ps1_moc = MOC(filename="../../dmu0/dmu0_PanSTARRS1-3SS/data/PanSTARRS1-3SS_CDFS-SWIRE_MOC.fits")
servs_moc = MOC(filename="../../dmu0/dmu0_DataFusion-Spitzer/data/DF-SERVS_CDFS-SWIRE_MOC.fits")
swire_moc = MOC(filename="../../dmu0/dmu0_DataFusion-Spitzer/data/DF-SWIRE_CDFS-SWIRE_MOC.fits")
video_moc= MOC(filename="../../dmu0/dmu0_VISTA-VIDEO-private/data/VIDEO-all_2017-02-12_fullcat_errfix_CDFS-SWIRE_MOC.fits")
vhs_moc= MOC(filename="../../dmu0/dmu0_VISTA-VHS/data/VHS_CDFS-SWIRE_MOC.fits")
des_moc =MOC(filename="../../dmu0/dmu0_DES/data/DES-DR1_CDFS-SWIRE_MOC.fits")
candels_moc =MOC(filename="../../dmu0/dmu0_CANDELS-GOODS-S/data/hlsp_candels_hst_wfc3_goodss-tot-multiband_f160w_v1_MOC.fits")

In [None]:
was_observed_optical = inMoc(
    master_catalogue['ra'], master_catalogue['dec'],
    ps1_moc + atlas_moc + combo_moc + des_moc + candels_moc ) #+ fireworks_moc

was_observed_nir = inMoc(
    master_catalogue['ra'], master_catalogue['dec'],
    vhs_moc + video_moc
)

was_observed_mir = inMoc(
    master_catalogue['ra'], master_catalogue['dec'],
    servs_moc + swire_moc
)

In [None]:
master_catalogue.add_column(
    Column(
        1 * was_observed_optical + 2 * was_observed_nir + 4 * was_observed_mir,
        name="flag_optnir_obs")
)

## VIII.b Wavelength domain detection

We add a binary `flag_optnir_det` indicating that a source was detected in a given wavelength domain:

- 1 for detection in optical;
- 2 for detection in near-infrared;
- 4 for detection in mid-infrared (IRAC).

It's an integer binary flag, so a source detected both in optical and near-infrared by not in mid-infrared would have this flag at 1 + 2 = 3.

*Note 1: We use the total flux columns to know if the source has flux, in some catalogues, we may have aperture flux and no total flux.*

To get rid of artefacts (chip edges, star flares, etc.) we consider that a source is detected in one wavelength domain when it has a flux value in **at least two bands**. That means that good sources will be excluded from this flag when they are on the coverage of only one band.

In [None]:
# SpARCS is a catalogue of sources detected in r (with fluxes measured at 
# this prior position in the other bands).  Thus, we are only using the r
# CFHT band.
# Check to use catalogue flags from HSC and PanSTARRS.
nb_optical_flux = (
    1 * ~np.isnan(master_catalogue['f_ps1_g']) +
    1 * ~np.isnan(master_catalogue['f_ps1_r']) +
    1 * ~np.isnan(master_catalogue['f_ps1_i']) +
    1 * ~np.isnan(master_catalogue['f_ps1_z']) +
    1 * ~np.isnan(master_catalogue['f_ps1_y']) +
    
    1 * ~np.isnan(master_catalogue['f_atlas_u']) +
    1 * ~np.isnan(master_catalogue['f_atlas_g']) +
    1 * ~np.isnan(master_catalogue['f_atlas_r']) +
    1 * ~np.isnan(master_catalogue['f_atlas_i']) +
    1 * ~np.isnan(master_catalogue['f_atlas_z']) +
    #DES
    1 * ~np.isnan(master_catalogue['f_decam_g']) +
    1 * ~np.isnan(master_catalogue['f_decam_r']) +    
    1 * ~np.isnan(master_catalogue['f_decam_i']) +
    1 * ~np.isnan(master_catalogue['f_decam_z']) +
    1 * ~np.isnan(master_catalogue['f_decam_y']) 

)

nb_nir_flux = (

    1 * ~np.isnan(master_catalogue['f_vista_y']) +
    1 * ~np.isnan(master_catalogue['f_vista_h']) +
    1 * ~np.isnan(master_catalogue['f_vista_j']) +
    1 * ~np.isnan(master_catalogue['f_vista_k'])
)

nb_mir_flux = (
    1 * ~np.isnan(master_catalogue['f_irac_i1']) +
    1 * ~np.isnan(master_catalogue['f_irac_i2']) +
    1 * ~np.isnan(master_catalogue['f_irac_i3']) +
    1 * ~np.isnan(master_catalogue['f_irac_i4'])
)

In [None]:
has_optical_flux = nb_optical_flux >= 2
has_nir_flux = nb_nir_flux >= 2
has_mir_flux = nb_mir_flux >= 2

master_catalogue.add_column(
    Column(
        1 * has_optical_flux + 2 * has_nir_flux + 4 * has_mir_flux,
        name="flag_optnir_det")
)

## IX - Cross-identification table
We are producing a table associating to each HELP identifier, the identifiers of the sources in the pristine catalogues. This can be used to easily get additional information from them.

In [None]:
master_catalogue['help_id', 
                 'atlas_id', 
                 'combo_id', 
                 #'fireworks_id', 
                 'ps1_id', 
                 'servs_intid', 
                 'swire_intid', 
                 'video_id',
                 'vhs_id', 
                 'specz_id',
                'des_id',
                'candels_id'].write(
    "{}/master_list_cross_ident_cdfs-swire{}.fits".format(OUT_DIR, SUFFIX), overwrite=True)
master_catalogue.remove_columns(['atlas_id', 
                 'combo_id', 
                 #'fireworks_id', 
                 'ps1_id', 
                 'servs_intid', 
                 'swire_intid', 
                 'video_id',
                 'vhs_id', 
                 'specz_id',
                'des_id',
                'candels_id'])

## X - Adding HEALPix index

We are adding a column with a HEALPix index at order 13 associated with each source.

In [None]:
master_catalogue.add_column(Column(
    data=coords_to_hpidx(master_catalogue['ra'], master_catalogue['dec'], order=13),
    name="hp_idx"
))

## XI - Renaming columns

We rename some columns to follow the intrument_filter standard.

In [None]:
# PanSTARRS: The column name must use the instrument name gpc1
for col in master_catalogue.colnames:
        if 'ps1' in col:
            master_catalogue[col].name = col.replace("ps1", "gpc1")

In [None]:
# COMBO-17: The instrument name is wfi
new_name = {
    'combo_420': "wfi_416nm",
    'combo_462': "wfi_461nm",
    'combo_485': "wfi_485nm",
    'combo_518': "wfi_518nm",
    'combo_571': "wfi_571nm",
    'combo_604': "wfi_604nm",
    'combo_646': "wfi_646nm",
    'combo_696': "wfi_696nm",
    'combo_753': "wfi_753nm",
    'combo_815': "wfi_815nm",
    'combo_856': "wfi_856nm",
    'combo_914': "wfi_914nm",
    'combo_b': "wfi_b",
    'combo_i': "wfi_i",
    'combo_r': "wfi_r",
    'combo_u': "wfi_u",
    'combo_v': "wfi_v"
}

for col in master_catalogue.colnames:
    if 'combo' in col:
        for old_name in new_name:
            if old_name in col:
                master_catalogue[col].name = col.replace(old_name, new_name[old_name])

In [None]:
# ATLAS: The instrument is omegacam
# The catalogue contains a "UL" band that is not described anywhere except with
# “Bandpass UL comes from CASU created list driven measurement”. We are removing it.
master_catalogue.remove_columns(['m_ap_atlas_ul', 'merr_ap_atlas_ul', 'm_atlas_ul', 'merr_atlas_ul',
                                 'f_ap_atlas_ul', 'ferr_ap_atlas_ul', 'f_atlas_ul', 'ferr_atlas_ul',
                                 'flag_atlas_ul'])
for col in master_catalogue.colnames:
        if 'atlas' in col:
            master_catalogue[col].name = col.replace("atlas", "omegacam")

In [None]:
# VISTA: The instrument is vircam but vista is used in the filter names.
# The K band is in fact a Ks one.
for col in master_catalogue.colnames:
        if 'vista_k' in col:
            master_catalogue[col].name = col.replace("vista_k", "vista_ks")

## XII - Saving the catalogue

In [None]:
columns = ["help_id", "field", "ra", "dec", "hp_idx"]

bands = [column[5:] for column in master_catalogue.colnames if 'f_ap' in column]


bands_no_ap = (set([column[5:] for column in master_catalogue.colnames if 'flag' in column]) 
               - set(bands) 
               - set(['cleaned', 'gaia', 'merged', 'optnir_det', 'optnir_obs', '_association_flag'])
              )


for band in bands:
    columns += ["f_ap_{}".format(band), "ferr_ap_{}".format(band),
                "m_ap_{}".format(band), "merr_ap_{}".format(band),
                "f_{}".format(band), "ferr_{}".format(band),
                "m_{}".format(band), "merr_{}".format(band),
                #"flag_{}".format(band)
               ]    

for band in bands_no_ap:
    columns += ["f_{}".format(band), "ferr_{}".format(band),
                "m_{}".format(band), "merr_{}".format(band),
                #"flag_{}".format(band)
               ]   
    
columns += ["stellarity", "flag_cleaned", "flag_merged", "flag_gaia", "flag_optnir_obs", "flag_optnir_det",
            "zspec", "zspec_qual", "zspec_association_flag", "ebv"]

In [None]:
# We check for columns in the master catalogue that we will not save to disk.
print("Missing columns: {}".format(set(master_catalogue.colnames) - set(columns)))

In [None]:
master_catalogue[columns].write("{}/master_catalogue_cdfs-swire{}.fits".format(OUT_DIR, SUFFIX), overwrite=True)