# Compare tangential shear profiles from the extragalactic and object catalogs for DC2 Run 2.1i

This notebook can be run at NERSC or CC-IN2P3 where the DESC DC2 products are stored. You need to be a DESC member to be able to access those. 

This was put together using:
- the DC2 analysis tutorials (in particular `matching_fof.ipynb` and `object_gcr_2_lensing_cuts.ipynb`)
- the CLMM usage examples


In [None]:
# General imports
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from astropy.table import Table

# DC2 catalog-related imports
import FoFCatalogMatching
import GCRCatalogs
from GCR import GCRQuery

#CLMM imports
try: import clmm
except:
    import notebook_install
    notebook_install.install_clmm_pipeline(upgrade=False)
    import clmm
import clmm.utils as u

### 1. Load the catalogs
- DC2 object catalog
- DC2 extragalactic catalog (cosmoDC2)

In [None]:
object_cat = GCRCatalogs.load_catalog('dc2_object_run2.1i_dr1')

In [None]:
extragalactic_cat = GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_small',)

### 2. Identify one halo in the extragalactic catalog
Choosing the most massive one below z = 0.4

In [None]:
# get list of massive halos in a given redshift and mass range
mmin = 5.e14
zmax = 0.4

massive_halos = extragalactic_cat.get_quantities(['halo_mass','hostHaloMass','redshift','ra', 'dec'],\
                                                 filters=[f'halo_mass > {mmin}','is_central==True',
                                                          f'redshift<{zmax}'])

N_cl = len(massive_halos['halo_mass'])
print(f'There are {N_cl} clusters available')

In [None]:
# Selecting the most massive one
select = massive_halos['halo_mass'] == np.max(massive_halos['halo_mass'])
ra_cl = massive_halos['ra'][select][0]
dec_cl = massive_halos['dec'][select][0]
z_cl = massive_halos['redshift'][select][0]
mass_cl =massive_halos['halo_mass'][select][0]
print (f'The most massive cluster is in ra = {ra_cl:.2f} deg, dec = {dec_cl:.2f} deg, z = {z_cl:.2f}, with mass = {mass_cl:.2e} Msun')

### 3. Selection of background galaxies around the cluster
- Define cuts on the cosmoDC2 and object catalogs. We also add some WL quality cuts for the object catalog.
- The two catalogs will then be matched to end up with the same selection of galaxies.

In [None]:
# Coordinate filter to be applied applied to both extragalactic and object catalog
ra_min, ra_max = ra_cl-0.35, ra_cl+0.35
dec_min, dec_max = dec_cl-0.35, dec_cl+0.35

coord_filters = [
    f'ra >= {ra_min}',
    f'ra < {ra_max}',
    f'dec >= {dec_min}',
    f'dec < {dec_max}',
]

# Redshift cut to be applied to the extragalactic catalog. The object catalog does not have redshift information.
z_min = z_cl + 0.1
redshift_filters = [
    (np.isfinite, 'redshift'),
    f'redshift > {z_min}',
]

# Magnitude cut to be applied to both catalogs
mag_filters = [
    (np.isfinite, 'mag_i'),
    'mag_i < 24.5',
]


# Following DC2 tutorials, basics cuts to be applied to the object catalog
object_basic_cuts = [
    GCRQuery('extendedness > 0'),     # Extended objects
    GCRQuery((np.isfinite, 'mag_i')), # Select objects that have i-band magnitudes
    GCRQuery('clean'), # The source has no flagged pixels (interpolated, saturated, edge, clipped...) 
                       # and was not skipped by the deblender
    GCRQuery('xy_flag == 0'),                                      # Flag for bad centroid measurement
    GCRQuery('ext_shapeHSM_HsmShapeRegauss_flag == 0'),            # Error code returned by shape measurement code
    GCRQuery((np.isfinite, 'ext_shapeHSM_HsmShapeRegauss_sigma')), # Shape measurement uncertainty should not be NaN
]

# Adding the total ellipticity quantity to the object catalog
object_cat.add_quantity_modifier('shape_hsm_regauss_etot', 
                                 (np.hypot, 'ext_shapeHSM_HsmShapeRegauss_e1', 'ext_shapeHSM_HsmShapeRegauss_e2'), 
                                 overwrite=True)


# Following DC2 tutorials, additional WL quality cuts to be applied to the object catalog
object_properties_cuts = [
    GCRQuery('snr_i_cModel > 10'),                              # SNR > 10
    GCRQuery('mag_i_cModel < 24.5'),                            # cModel imag brighter than 24.5
    GCRQuery('ext_shapeHSM_HsmShapeRegauss_resolution >= 0.3'), # Sufficiently resolved galaxies compared to PSF
    GCRQuery('shape_hsm_regauss_etot < 2'),                     # Total distortion in reasonable range
    GCRQuery('ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4'),      # Shape measurement errors reasonable
    # New cut on blendedness:
    GCRQuery('blendedness < 10**(-0.375)')                      # Avoid spurious detections and those contaminated by blends
]

In [None]:
# Load quanitities from cosmoDC2 catalog, using the filters we just defined.
extragal_data = extragalactic_cat.get_quantities(['ra', 'dec', 'shear_1', 'shear_2', 
                                                  'ellipticity_1_true', 'ellipticity_2_true',
                                                  'redshift', 'convergence', 'galaxy_id'], 
                                                 filters=(coord_filters + mag_filters + redshift_filters))

In [None]:
# Load quanitities from object catalog, using the filters we just defined.
# The field under scrutiny falls in tract 3448 of the object catalog. 
# Specifying that tract using native_filters speeds up the process but is not required.

object_data = object_cat.get_quantities(['ra', 'dec',
                                         'ext_shapeHSM_HsmShapeRegauss_e1','ext_shapeHSM_HsmShapeRegauss_e2', 
                                         'id'],
                                        native_filters=['tract == 3448'],
                                        filters=(coord_filters + object_basic_cuts + object_properties_cuts))

### 4. Match the 2 catalogs

Using the `FoFCatalogMatching` method, as examplified in the DC2 analysis tutorial. As mentioned in the tutorial, *`FoFCatalogMatching.match` takes a dictionary of catalogs to match and a friends-of-friends linking length. 
Because the "catalog" is not an astropy table or pandas dataframe, `len(truth_coord)` won't give the actual length of the table so we need to specify `catalog_len_getter` so that the code knows how to get the length of the catalog.*


#### Perform the matching

In [None]:
results = FoFCatalogMatching.match(
    catalog_dict={'extragal': extragal_data, 'object': object_data},
    linking_lengths=1.,
    catalog_len_getter=lambda x: len(x['ra']),
)

# first we need to know which rows are from the extragalactic catalog and which are from the object
extragal_mask = results['catalog_key'] == 'extragal'
object_mask = ~extragal_mask

# then np.bincount will give up the number of id occurrences (like historgram but with integer input)
n_groups = results['group_id'].max() + 1
n_extragal = np.bincount(results['group_id'][extragal_mask], minlength=n_groups)
n_object = np.bincount(results['group_id'][object_mask], minlength=n_groups)

#### Identify one-to-one extragal/object matches

In [None]:
one_to_one_group_mask = np.in1d(results['group_id'], np.flatnonzero((n_extragal == 1) & (n_object == 1)))

# and then we can find the row indices in the *original* extragal/object catalogs for those 1-to-1 groups
extragal_idx = results['row_index'][one_to_one_group_mask & extragal_mask]
object_idx = results['row_index'][one_to_one_group_mask & object_mask]
print(f'Number of 1-to-1 matched objects: {len(extragal_idx)}, {len(object_idx)}')

### 5. Computes the reduced tangential shear profiles from both datasets, using CLMM

#### First, dealing with the cosmoDC2 data.
To measure a reduced tangential shear profile, the shape measurements must be made according to the $\epsilon$ or reduced shear definition $g$. So first , we convert cosmoDC2 `shear1` and `shear2` quantities to reduced shear using the `convergence`. These become the `e1` and `e2` fields of the CLMM cluster galaxy catalog.

In [None]:
e1, e2 = clmm.utils.convert_shapes_to_epsilon(extragal_data['shear_1'][extragal_idx],extragal_data['shear_2'][extragal_idx],
                                              shape_definition='shear',kappa=extragal_data['convergence'][extragal_idx])

# Create the background galaxy catalog as astropy table
dat = Table([extragal_data['ra'][extragal_idx],extragal_data['dec'][extragal_idx],e1,
      e2,extragal_data['redshift'][extragal_idx],extragal_data['galaxy_id'][extragal_idx]], 
      names=('ra','dec', 'e1', 'e2', 'z','id'))

# Instantiate a CLMM cluster object and save it for later use.
cl_from_cosmoDC2 = clmm.GalaxyCluster('CL', ra_cl, dec_cl, z_cl, dat)   
cl_from_cosmoDC2.save('cosmoDC2_GC.pkl')

#### Second, doing the same for the DC2 object catalog
In the object catalog, shapes are measured by `shapeHSM` which return ellipticities according to the $\chi$ definition. Need to convert to the $\epsilon$ definition, once again using the conversion helper function from CLMM. 

In [None]:
e1, e2 = clmm.utils.convert_shapes_to_epsilon(object_data['ext_shapeHSM_HsmShapeRegauss_e1'][object_idx],
                                              object_data['ext_shapeHSM_HsmShapeRegauss_e2'][object_idx],
                                              shape_definition='chi')
# The conversion may create to NaN
mask = np.isfinite(e1)

The object catalog has no redshift information so we'll use the redshift of the matched galaxies in cosmoDC2 to create the GalaxyCluster object.

In [None]:
# Create the background galaxy catalog as astropy table
dat = Table([object_data['ra'][object_idx][mask],object_data['dec'][object_idx][mask],
             e1[mask],
             e2[mask],
             extragal_data['redshift'][extragal_idx][mask],
             object_data['id'][object_idx][mask]], 
            names=('ra','dec', 'e1', 'e2', 'z','id'), masked=True)


# Create the background galaxy catalog as astropy table and save it for later use
cl_from_objectDC2 = clmm.GalaxyCluster('CL', ra_cl, dec_cl, z_cl, dat)  
cl_from_objectDC2.save('objectDC2_GC.pkl')

#### Build the reduced tangential shear profile from both datasets

In [None]:
cl_from_objectDC2 = clmm.load_cluster('objectDC2_GC.pkl')
cl_from_cosmoDC2 = clmm.load_cluster('cosmoDC2_GC.pkl')

In [None]:
cosmo = extragalactic_cat.cosmology
bin_edges = clmm.polaraveraging.make_bins(0.15, 4, 10, method='evenlog10width')

cl_from_cosmoDC2.compute_shear(geometry="flat")
profile_from_cosmoDC2 = cl_from_cosmoDC2.make_shear_profile("radians", "Mpc", bins=bin_edges,cosmo=cosmo)

cl_from_objectDC2.compute_shear(geometry="flat")
profile_from_objectDC2 = cl_from_objectDC2.make_shear_profile("radians", "Mpc", bins=bin_edges,cosmo=cosmo)


#### Taking into account intrinsic ellipticities from cosmoDC2

So far, we've used the `shear1` and `shear2` fields of cosmoDC2, i.e., we negelected the intrinsic ellipticities of the galaxies. To account for shape noise from intrinsic ellipticities, we can use the shears and unlensed ellipticities available in the cosmoDC2 catalog to build lensed ellipticities. The latter can then be used to bluid a CLMM cluster object. The resulting tangential shear profile will then include shape noise.

In [None]:
def calc_lensed_ellipticity(es1, es2, gamma1, gamma2, kappa):
    gamma = gamma1 + gamma2*1j # shear (as a complex number)
    es = es1 + es2*1j # intrinsic ellipticity (as a complex number)
    g = gamma / (1.0 - kappa) # reduced shear
    e = (es + g) / (1.0 + g.conjugate()*es) # lensed ellipticity
    return np.real(e), np.imag(e)

In [None]:
es1 = extragal_data['ellipticity_1_true']
es2 = extragal_data['ellipticity_2_true']
gamma1 = extragal_data['shear_1']
gamma2 = extragal_data['shear_2']
kappa = extragal_data['convergence']

extragal_data['ellipticity_1'] = calc_lensed_ellipticity(es1, es2, gamma1, gamma2, kappa)[0]
extragal_data['ellipticity_2'] = calc_lensed_ellipticity(es1, es2, gamma1, gamma2, kappa)[1]

Make a new CLMM cluster object

In [None]:
dat = Table([extragal_data['ra'][extragal_idx],extragal_data['dec'][extragal_idx],
             extragal_data['ellipticity_1'][extragal_idx],
             extragal_data['ellipticity_2'][extragal_idx],
             extragal_data['redshift'][extragal_idx],
             extragal_data['galaxy_id'][extragal_idx]],
            names=('ra','dec', 'e1', 'e2', 'z','id'))

cl_from_cosmoDC2_with_e1e2 = clmm.GalaxyCluster('CL', ra_cl, dec_cl, z_cl, dat) 

Compute the reduced shear profile

In [None]:
cl_from_cosmoDC2_with_e1e2.compute_shear(geometry="flat")
profile_from_cosmoDC2_with_e1e2 = cl_from_cosmoDC2_with_e1e2.make_shear_profile("radians", "Mpc", bins=bin_edges,cosmo=cosmo)

#### Visualize the results for the three profiles, obtained from the same galaxies in the two catalogs
- from cosmoDC2, neglecting shape noise (blue points)
- from cosmoDC2, including shape noise (orange)
- for the DC2 object catalog (green, where the galaxies redshifts taken from cosmoDC2)

In [None]:
plt.errorbar(profile_from_cosmoDC2['radius'],profile_from_cosmoDC2['gt'],profile_from_cosmoDC2['gt_err'], 
             marker='o',label='from cosmoDC2 g1g2')
plt.errorbar(profile_from_cosmoDC2_with_e1e2['radius'],profile_from_cosmoDC2_with_e1e2['gt'],
             profile_from_cosmoDC2['gt_err'],label='from cosmoDC2 e1e2')
plt.errorbar(profile_from_objectDC2['radius'],profile_from_objectDC2['gt'],profile_from_objectDC2['gt_err'], 
             label='from DC2 objects e1e2')
plt.legend()
plt.xscale('log')
plt.yscale('log')
plt.xlabel('R (Mpc)')
plt.ylabel(r'$\langle g_t \rangle$')
plt.ylim([2.e-3,0.3])

From cosmoDC2 (orange and blue profiles above), we see the impact of shape noise at low radii (orange/blue =w/wo intrinsic ellipticities), where the number of galaxies per bin is small (see below).

In [None]:
plt.scatter(profile_from_cosmoDC2['radius'], profile_from_cosmoDC2['n_src'], marker='o')
[plt.axvline(x=r, ymin=0, ymax=1e3, color='k', linestyle=':') for r in profile_from_cosmoDC2['radius_min']]
plt.ylabel('Ngal in the bin')
plt.xlabel('R (Mpc)')
plt.xscale('log')
plt.yscale('log')
