## De-reddening magnitudes in DC2 to remove Galactic dust<br>
This notebook shows a quick demo of how to remove Galactic dust from the DC2 catalogs.  <br>

In DC2, the CCM model *reference?* was assumed when calculating the dust model, we will use the lsst.sims.catUtils dust package to compute the E(B-V) values for model dust, the lsst.sims.photUtils package to compute the effective wavelengths for the filters, and then set up the CCM model and calculate Alambda/E(B-V) for each of the LSST filters.<br>

As an example, we will compute de-reddened magnitudes for a single tract (tract 4850) of Run2.1i, perform a friends of friends match to the extragalactic truth catalog, and make a histogram of the magnitude residuals, we should see that the peak of the residuals centers at zero after de-reddening if we have done things properly.

In [None]:
import os
import GCRCatalogs
from GCR import GCRQuery
import h5py
import pandas as pd
import numpy as np
import scipy.interpolate
#grab the dust model
from lsst.sims.catUtils.dust import EBVbase
from lsst.sims import photUtils
from lsst.sims.photUtils import BandpassSet
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
ebv_gen = EBVbase()

In [None]:
def compute_ebv(ra,dec):
    """
    compute ebv vector for a set of ra's and dec's, inputs assumed to be in degrees 
    though calculateEbv assumes radians, so need to convert
    NOTE: interp=True was used for DC2, so make sure that is set!!!
    interp defaults to False, and returns the same E(B-V) value in each pixel in that case, 
    rather than a smooth interpolation 
    inputs: ra, dec:
      vectors of sky coords in degrees
    returns:
    ebv:
      vector of E(B-V values from CCM model
    """
    ra_rad = np.radians(ra)
    dec_rad = np.radians(dec)
    eq_coords = np.array([ra_rad,dec_rad])
    ebv_vec = ebv_gen.calculateEbv(equatorialCoordinates=(eq_coords),interp=True)
    return ebv_vec

In [None]:
def compute_alambda_over_ebv(filterset=['u','g','r','i','z','y']):
    """
    compute the effective wavelengths and alambda/E(B-V) balues for a set of filters
    We will grab the flat SED from the SIMS library to calculate the CCM dust model 
    that was assumed for DC2, and then grab the baseline ugrizy filters and calculate 
    their effective wavelengths, and evaluate the CCM alam_over_ebv value at those
    wavelengths
    inputs: filterset:
      vector of filters (limited to ugrizy present for the baseline LSST filterset)
    returns:
    lam_eff_list: 
      np 1d array of filter effective wavelengths for the filters
    alam_over_ebv_list:
      np 1d array of alam_over_ebv values for the filters
    """
    lam_eff_list = []
    alam_over_ebv_list = []
    sed_file = os.path.join(os.environ['SIMS_SED_LIBRARY_DIR'],'flatSED','sed_flat.txt.gz')
    sed = photUtils.Sed()
    sed.readSED_flambda(sed_file)
    ax,bx = sed.setupCCM_ab()
    ccm_model = 3.1*ax+bx
    wl = sed.wavelen
    ccm_spline = scipy.interpolate.interp1d(wl,ccm_model,bounds_error=True)
    alam_over_ebv = 3.1*ax+bx
    filterset = ['u','g','r','i','z','y']
    for filter in filterset:
        bp_file = os.path.join(os.environ['LSST_THROUGHPUTS_BASELINE'],'',f'total_{filter}.dat')
        bandpass = photUtils.Bandpass()
        bandpass.readThroughput(bp_file)
        _,leff = bandpass.calcEffWavelen()
        lam_eff_list.append(leff)
        #print(f"band {filter} leff1: {leff:.3f}")
        alam = ccm_spline(leff)
        #print(f"leff for band {filter}: {alam:.3f}")
        alam_over_ebv_list.append(alam)
    return np.array(lam_eff_list),np.array(alam_over_ebv_list)

Let's do a quick check that we are getting the results that we expect.  For DC2 we should get the following for the effective wavelengths and A_lam/E(B-V) values:<br>
u 367.07 nm A_lambda/EBV = 4.812<br>
g 482.69 nm A_lambda/EBV = 3.642<br>
r 622.32 nm A_lambda/EBV = 2.699<br>
i 754.60 nm A_lambda/EBV = 2.062<br>
z 869.01 nm A_lambda/EBV = 1.578<br>
y 971.03 nm A_lambda/EBV = 1.313<br>
If these values do not match those in the next cell, check that the Baseline filter definitions have not changed!

In [None]:
filterlist = ['u','g','r','i','z','y']
leff_list,alamebv_list = compute_alambda_over_ebv(filterlist)
for filt,leff, alamebv in zip(filterlist,leff_list,alamebv_list):
    print(f"filter {filt} lam_eff: {leff:.2f}nm   alam/E(B-V): {alamebv:.3f}")


In [None]:
def make_catalog(gc,tract):
    """
    function to make a pandas dataframe with some basic info for a tract/patch for all six bands 
    inputs:
    gc: catalog reader
    tract: int; tract number
    returns:
    Pandas dataframe with mags and dereddened mags, plus a few columns
    """
    
    bands = ['u','g','r','i','z','y']
    columns = ['ra','dec','extendedness','blendedness','tract','patch','objectId']
    for band in bands:
        columns.append(f'mag_{band}_cModel')
        columns.append(f'cModelFlux_{band}')
    
    data = gc.get_quantities(columns,native_filters=[f'tract=={tract}'])
    #for i,data in enumerate(gc.get_quantities(columns,return_iterator=True)):
    df = pd.DataFrame(data)
    ebv_vec = compute_ebv(df['ra'],df['dec'])
    df['ebv']=ebv_vec
    tract = df['tract'][0]
    patch = df['patch'][0]
    patch0,patch1 = patch.split(',')

    #band_meanlam = [3671., 4827.,6223.,7546.,8691.,9710.] #mean wl for ugrizy filters
    ##A/E(B-V) calculated from CCM model that Scott said was used for run2.1i
    #band_a_ebv = np.array([4.81,3.64,2.70,2.06,1.58,1.31]) 

    band_meanlam,band_a_ebv = compute_alambda_over_ebv(bands)
    for ii,band in enumerate(bands):
        #add dereddened magnitudes and re-calculate log version of errors    
        deredden_mag = ebv_vec*band_a_ebv[ii]
        cmod_dered =df[f"mag_{band}_cModel"] - deredden_mag
        df[f"cModel_{band}_dered"]=cmod_dered
        
    return df

In [None]:
catalog_name='dc2_object_run2.1i_dr1b'
tract = 4850
gc = GCRCatalogs.load_catalog(catalog_name)
df = make_catalog(gc,tract)

Let's take a look at what we have stored in our dataframe

In [None]:
df.info()

Now, let's do a friends-of-friends match to the extragalactic truth catalog with some code copied from the matching_fof.ipynb tutorial notebook

In [None]:
minra = np.amin(df['ra'])
maxra = np.amax(df['ra'])
mindec = np.amin(df['dec'])
maxdec = np.amax(df['dec'])

In [None]:
truthcat = GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_image')

In [None]:
radec_mask = [f'ra>={minra}',f'ra<{maxra}',f'dec>={mindec}',f'dec<{maxdec}',(np.isfinite,'mag_i_lsst'),'mag_i_lsst<25.0']

In [None]:
truth_data = truthcat.get_quantities(['ra','dec','redshift','mag_u_lsst','mag_g_lsst','mag_r_lsst',
                                      'mag_i_lsst','mag_z_lsst','mag_y_lsst','galaxy_id'],filters=radec_mask)

In [None]:
print(len(truth_data['ra']))

In [None]:
import FoFCatalogMatching

In [None]:
results = FoFCatalogMatching.match(catalog_dict={'truth':truth_data,'obs':df},linking_lengths=1.0,
                                   catalog_len_getter= lambda x: len(x['ra']))

In [None]:
#grab just the 1:1 matches

In [None]:
truth_mask = results['catalog_key'] == 'truth'
object_mask = ~truth_mask
n_groups = results['group_id'].max() + 1
n_truth = np.bincount(results['group_id'][truth_mask], minlength=n_groups)
n_object = np.bincount(results['group_id'][object_mask], minlength=n_groups)
one_to_one_group_mask = np.in1d(results['group_id'], np.flatnonzero((n_truth == 1) & (n_object == 1)))
truth_idx = results['row_index'][one_to_one_group_mask & truth_mask]
object_idx = results['row_index'][one_to_one_group_mask & object_mask]

In [None]:
truthdf = pd.DataFrame(truth_data).iloc[truth_idx].reset_index(drop=True)

In [None]:
objdf = pd.DataFrame(df).iloc[object_idx].reset_index(drop=True)

In [None]:
bigdf = pd.merge(truthdf,objdf,left_index=True,right_index=True,suffixes = ('_truth','_obj'))

We now have a dataframe with both the "truth" and "object" quantities:

In [None]:
bigdf.info()

Let's look at the bright, non-blended, and highest extinction values for the cleanest sample to see the effect of the dust 

In [None]:
brightmask = ((bigdf['mag_i_cModel']<22.) & (bigdf['blendedness']<.1) & (bigdf['ebv']>.013))
brightdf = bigdf[brightmask]

Some histograms of the residuals to check that we did things properly

In [None]:
delu = brightdf['mag_u_cModel']-brightdf['mag_u_lsst']
delu_dered = brightdf['cModel_u_dered']-brightdf['mag_u_lsst']
print(np.nanmedian(delu))
print(np.nanmedian(delu_dered))

In [None]:
bins =np.arange(-2.5,2.5,0.01)
fig = plt.figure(figsize=(15,7))
ax = plt.subplot(211)
plt.hist(delu,bins=bins)
plt.xlim(-.75,.75)
plt.plot([0,0],[0,150],'k--')
bx = plt.subplot(212)
plt.hist(delu_dered,bins=bins)
plt.xlim(-.75,.75)
plt.plot([0,0],[0,150],'k--')

In [None]:
delg = bigdf['mag_g_cModel']-bigdf['mag_g_lsst']
delg_dered = bigdf['cModel_g_dered']-bigdf['mag_g_lsst']
print(np.nanmedian(delg))
print(np.nanmedian(delg_dered))

In [None]:
fig = plt.figure(figsize=(15,7))
ax = plt.subplot(211)
plt.hist(delg,bins=bins)
plt.xlim(-.75,.75)
plt.plot([0,0],[0,3150],'k--')
bx = plt.subplot(212)
plt.hist(delg_dered,bins=bins)
plt.xlim(-.75,.75)
plt.plot([0,0],[0,3150],'k--')

In [None]:
delr = bigdf['mag_r_cModel']-bigdf['mag_r_lsst']
delr_dered = bigdf['cModel_r_dered']-bigdf['mag_r_lsst']
print(np.nanmedian(delr))
print(np.nanmedian(delr_dered))

In [None]:
fig = plt.figure(figsize=(15,7))
ax = plt.subplot(211)
plt.hist(delr,bins=bins)
plt.xlim(-.75,.75)
plt.plot([0,0],[0,5150],'k--')
bx = plt.subplot(212)
plt.hist(delr_dered,bins=bins)
plt.xlim(-.75,.75)
plt.plot([0,0],[0,5150],'k--')

In [None]:
deli = bigdf['mag_i_cModel']-bigdf['mag_i_lsst']
deli_dered = bigdf['cModel_i_dered']-bigdf['mag_i_lsst']
print(np.nanmedian(delr))
print(np.nanmedian(delr_dered))

In [None]:
fig = plt.figure(figsize=(15,7))
ax = plt.subplot(211)
plt.hist(deli,bins=bins)
plt.xlim(-.75,.75)
plt.plot([0,0],[0,5150],'k--')
bx = plt.subplot(212)
plt.hist(deli_dered,bins=bins)
plt.xlim(-.75,.75)
plt.plot([0,0],[0,5150],'k--')

In [None]:
fig = plt.figure(figsize=(10,10))
plt.scatter(bigdf['ra_obj'][::5],bigdf['dec_obj'][::5],s=5,c=bigdf['ebv'][::5],cmap='jet')
plt.colorbar()
#plt.clim(0.)