# Lockman-SWIRE check HELP IDs

Because the photo-zs were computed from an early version of the masterlist, there is some concern that the HELP IDs may have changed. In this notebook we will check for differences and convert the old IDs to the new ones if required.

In [None]:
from herschelhelp_internal import git_version
print("This notebook was run with herschelhelp_internal version: \n{}".format(git_version()))

In [None]:
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'

import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))

import os
import time

from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.table import join, Column, Table
import numpy as np
from pymoc import MOC

from collections import Counter

from herschelhelp_internal.masterlist import merge_catalogues, nb_merge_dist_plot, specz_merge
from herschelhelp_internal.utils import coords_to_hpidx, ebv, gen_help_id, inMoc

In [None]:

OUT_DIR = os.environ.get('OUT_DIR', "./data")
SUFFIX = os.environ.get('SUFFIX', time.strftime("_%Y%m%d"))

OLD_SUFFIX = "20170710"
NEW_SUFFIX = "20180219"

ML_FOLDER = "../../dmu1/dmu1_ml_Lockman-SWIRE/"

try:
    os.makedirs(OUT_DIR)
except FileExistsError:
    pass

## I - Reading the prepared pristine catalogues

In [None]:
photoz = Table.read("./data/master_catalogue_Lockman-SWIRE_20170710_photoz_20170802_r_and_irac1_optimised.fits")
master_catalogue = Table.read("{}data/master_catalogue_lockman-swire_{}.fits".format(ML_FOLDER, NEW_SUFFIX))["help_id","ra","dec"]


In [None]:
photoz.meta = None

#photoz = photoz['help_id', 'z1_median', 'RA', 'DEC']

photoz['RA'].name = 'photoz_ra'
photoz['DEC'].name = 'photoz_dec'
#photoz['z1_median'].name = 'redshift'

photoz['z1_median'][photoz['z1_median'] < 0] = np.nan  # -99 used for missing values

In [None]:
merged_table = join(master_catalogue, photoz, join_type='left')

# Fill values
merged_table['z1_median'].fill_value = np.nan
merged_table = merged_table.filled()

In [None]:
print("Master catalogue length: {}".format(len(master_catalogue)))
print("Photoz length: {}".format(len(photoz)))
print("Merged length: {}".format(len(merged_table)))
print("Merged photozs length: {}".format(np.sum(~np.isnan(merged_table['z1_median']))))
print("Nonnan photozs length: {}".format(np.sum(~np.isnan(photoz['z1_median']))))

### Check positions

In [None]:
photoz['photoz_ra'].unit = u.deg
photoz['photoz_dec'].unit = u.deg
nb_merge_dist_plot(
    SkyCoord(photoz['photoz_ra'], photoz['photoz_dec']),
    SkyCoord(master_catalogue['ra'], master_catalogue['dec'])
)

In [None]:
def helpid_merge(catalogue, RA_COL, DEC_COL, master_catalogue, radius=0.4*u.arcsec):
    """Add a column with new HELP_ids to some catalogue.

    This function cross-match a catalogue with the HELP latest masterlist:
    
    Occasionally some catalogue was produced from an out of date masterlist
    leading to subtle differences in help_ids and causing problems with
    joins and all means depending on help_ids for matches.
    
    This code uses a positional cross match to add up to date help_id data.
   
    Parameters
    ----------
    catalogue: astropy.table.Table
        The table containing the catalogue. It must not contain a 'help_id' 
        column. 
    RA_COL: 
        The name of the ra column in the catalogue
    DEC_COL:
        The name of the dec column in the catalogue
    master_catalogue: astropy.table.Table
        The table containing the updated masterlist IDs from the latest HELP 
        masterlist.
    radius: astropy.units.quantity.Quantity
        The radius to look for counterparts.  When more than one counterpart is
        found, the corresponding sources will be flagged.

    Return
    ------
    astropy.table.Table
        The catalogue with current help_id column added.

    """
    catalogue = catalogue.copy()
    cat_coords = SkyCoord(catalogue[RA_COL].data * u.deg,
                          catalogue[DEC_COL].data * u.deg)
    master_coords = SkyCoord(master_catalogue['ra'].data * u.deg,
                            master_catalogue['dec'].data * u.deg)

    idx_master, idx_cat,  d2d, _ = cat_coords.search_around_sky(
        master_coords, radius)

    # We sort the three array by increasing d2d
    sort_idx = np.argsort(d2d)
    idx_cat = idx_cat[sort_idx]
    idx_master = idx_master[sort_idx]

    # We want to flag as possible mis-associations the cat ids that may be
    # associated to different sources with the given radius.
    idx_master_toflag = np.unique(
        [item for item, count in Counter(idx_master).items() if count > 1]
    )

    # We keep only the first association of a help_id to a source
    _, unique_idx = np.unique(idx_master, return_index=True)
    idx_cat = idx_cat[unique_idx]
    idx_master = idx_master[unique_idx]

    # We add the new help_id columns to the catalogue.
    catalogue.add_column(
        Column(data=np.full(len(catalogue), '', dtype='<U33'),
               name="help_id"))
    catalogue['help_id'][idx_cat] = master_catalogue['help_id'][idx_master]



    return catalogue

In [None]:
photoz['help_id'].name = 'help_id_old'

In [None]:
photoz_new = helpid_merge(photoz, "photoz_ra", "photoz_dec", master_catalogue, radius=0.8*u.arcsec)

In [None]:
merged_table_new = join(master_catalogue, photoz_new, join_type='left')

In [None]:
print("Master catalogue length: {}".format(len(master_catalogue)))
print("Photoz length: {}".format(len(photoz)))
print("Merged length: {}".format(len(merged_table_new)))
print("Merged photozs length: {}".format(np.sum(~np.isnan(merged_table_new['z1_median']))))
print("Nonnan photozs length: {}".format(np.sum(~np.isnan(photoz_new['z1_median']))))

In [None]:
photoz_new[:10].show_in_notebook()

In [None]:
photoz_new['help_id'].fill_value = ''
photoz_new = photoz_new.filled()
has_no_new_id = np.sum(photoz_new['help_id'] == '')
has_same_id = np.sum(photoz_new['help_id'] == photoz_new['help_id_old'])
print("Objects not given up to date ids: {}".format(has_no_new_id))
print("Objects with the same id before and after: {}".format(has_same_id))

In [None]:
cols = list(photoz_new.colnames)
cols.remove('help_id')
cols.remove('help_id_old')
cols = ['help_id', 'help_id_old'] + cols
cols

In [None]:
photoz_new['help_id', 'help_id_old'][photoz_new['help_id'] != photoz_new['help_id_old']]

In [None]:
# Check that the HELP Ids are unique
mask = (photoz_new['help_id'] != '')
if len(photoz_new[mask]) != len(np.unique(photoz_new['help_id'][mask])):
    print("The HELP IDs are not unique!!!")
else:
    print("OK!")

In [None]:
#We only include objects with help_ids to ensure joins don't break on empty id objects
photoz_new = photoz_new[mask]

## II - Saving the catalogue

In [None]:
photoz_new[cols].write("./data/master_catalogue_Lockman-SWIRE_20170710_photoz_20170802_r_and_irac1_optimised_UPDATED_IDs_{}.fits".format( NEW_SUFFIX)
                      ,overwrite=True)