# Validation tests on DC2 `calexps` and `src` catalogs

Owner: Javier Sanchez (@fjaviersanchez on Slack and GitHub)
Date Last Run: 11-19-2018

This notebook is intended to document part of the ongoing validation work for the DC2 simulations. For more details please check [here](https://confluence.slac.stanford.edu/display/LSSTDESC/DC2+Data+Product+Overview). The code here can be adapted for other DC2 runs.

In [8]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [9]:
import lsst.daf.persistence

In [10]:
from sklearn.neighbors import KDTree
import pandas as pd
import astropy.io.fits as fits
from astropy.wcs import WCS
import os
from scipy.stats import binned_statistic

In [11]:
import glob

In [12]:
import matplotlib
matplotlib.rcParams.update({'font.size': 14})

We are going to read some calexps and the `OpSim` database to select interesting visits

In [13]:
data_imsim = '/global/cscratch1/sd/desc/DC2/data/Run1.2p/w_2018_30/rerun/210918/'

In [14]:
butler_i = lsst.daf.persistence.Butler(data_imsim)

In [15]:
datarefs_i = butler_i.subset('calexp')

In [None]:
db_file = '/global/projecta/projectdirs/lsst/groups/SSim/DC2/minion_1016_desc_dithered_v4.db'

In [None]:
import sqlite3
from sqlite3 import Error
 
def create_connection(db_file):
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except Error as e:
        print(e)
 
    return None

In [None]:
conn = create_connection(db_file)

In [None]:
def get_seeing_altitude(conn,visit):
    cur = conn.cursor()
    cur.execute("SELECT finSeeing, altitude FROM ObsHistory WHERE obsHistID==%d" %(visit))
    rows = cur.fetchall()
    return rows[0][:]

In [None]:
def get_visits(conn,min_alt=80,min_seeing=0.60,max_seeing=0.8):
    cur = conn.cursor()
    cur.execute("SELECT obsHistID, filter FROM ObsHistory WHERE altitude>%f and finSeeing >%f and finSeeing < %f and filter='r'" % (np.radians(min_alt),min_seeing,max_seeing))
    return cur.fetchall()

In [None]:
import fitsio

In [None]:
good_visits = np.array(get_visits(conn))[:,0].astype(int)

In [None]:
len(good_visits)

In [16]:
import GCRCatalogs

In [None]:
#gc = GCRCatalogs.load_catalog('dc2_truth_run1.2_static')

In [17]:
gc = GCRCatalogs.load_catalog('dc2_reference_run1.2')
data = gc.get_quantities(['ra','dec','mag_u','mag_g','mag_r','mag_i','mag_z','mag_y','object_id','is_resolved','is_agn'])

  data = np.genfromtxt(f, self._data_dtype, delimiter=',', max_rows=self._nlines)


In [None]:
from astropy.visualization import ZScaleInterval, ImageNormalize, SqrtStretch
def plot_ref_image(xmin, xmax, ymin, ymax,reference,x,y,x2,y2,mag_true,mag_meas,savename, vmin=-5, vmax=5, show_marker=True):
    """Routine to produce plots of the image in a region from xmin
    to xmax, and ymin to ymax of the reference image
    and annotating the position of three more different catalogs
    (for example input objects, detected stars, and detected galaxies)

    Args:
    ----
        xmin: `float` minimum X position in the chip to be shown
        xmax: `float` maximum X position in the chip to be shown
        ymin: `float` minimum Y position in the chip to be shown
        ymax: `float` maximum Y position in the chip to be shown
        reference: `HDU` HDU containing the image to be analyzed
        x, x2, x3: `float` arrays of X positions to be marked on the image
        y, y2, y3: `float` arrays of Y positions to be marked on the image
        vmin: `float` minimum of the color scale
        vmax: `float` maximum of the color scale
    """
    #interval = ZScaleInterval()
    #norm = ImageNormalize(reference[ymin:ymax,xmin:xmax], interval=interval,stretch=SqrtStretch())
    fig, ax = plt.subplots(ncols=1,figsize=(14,14))
    plt.xlim(xmin,xmax)
    plt.ylim(ymin,ymax)
    if show_marker:
        im0 = ax.scatter(x+1,y+1,c='b',label='imSim KNL',s=90,marker='x')
        #fig.colorbar(im0, ax=ax, shrink=1,label='mag')
        im1 = ax.scatter(x2+1,y2+1,c='r',label='imSim Haswell',s=90,marker='+')
    #fig.colorbar(im1, ax=ax, shrink=1,label='mag$_{PSF}$')
    #ax.plot(x2+1,y2+1,'+',c='r',label='ImSim',markersize=12)
    #ax.plot(x3+1,y3+1,'o',c='orange',label='ImSim',markersize=8,fillstyle='none')
    ax.grid()
    plt.legend(loc='best')
    im = ax.imshow(reference[ymin:ymax,xmin:xmax],extent=[xmin,xmax,ymin,ymax],cmap='gray', origin="lower",vmin=vmin,vmax=vmax, interpolation='none')
    fig.colorbar(im, ax=ax, shrink=1,label='Pixel counts [ADU]')
    fig.savefig(savename)

In [None]:
visitno = []
for visitId in datarefs_i.cache:
    aux = visitId['visit']
    if visitId['visit'] in good_visits:
        #print(visitId)
        visitno.append(aux)
visitno=np.unique(visitno)

In [None]:
band = 'r'
ra_imsim = []
dec_imsim = []
nchild_imsim = []
mag_k_imsim = []
mag_k_err_imsim = []
flux_k_imsim = []
flux_k_err_imsim = []
mag_sdss = []
mag_sdss_err = []
mag_aper = []
mag_psf = []
extendedness= []
isphoto = []
ixx_i = []
iyy_i = []
ixy_i = []
ipsf_xx_i = []
ipsf_yy_i = []
ipsf_xy_i = []
e1_i = []
e2_i = []
icount_i=0
for i, visitId in enumerate(datarefs_i.cache):
    if (visitId['filter']==band) & (visitId['visit'] in visitno):
        print('using', visitId)
        src_cat = butler_i.get('src',visitId)
        calexp = butler_i.get('calexp',visitId)
        calib = calexp.getCalib()
        calib.setThrowOnNegativeFlux(False)
        nchild_imsim.append(src_cat.get('deblend_nChild'))
        ra_imsim.append(np.degrees(src_cat.get('coord_ra')))
        dec_imsim.append(np.degrees(src_cat.get('coord_dec')))
        mag_k_imsim.append(calib.getMagnitude(src_cat.get('ext_photometryKron_KronFlux_flux')))
        mag_k_err_imsim.append(calib.getMagnitude(src_cat.get('ext_photometryKron_KronFlux_fluxSigma')))
        flux_k_imsim.append(src_cat.get('base_PsfFlux_flux'))
        flux_k_err_imsim.append(src_cat.get('base_PsfFlux_fluxSigma'))
        mag_sdss.append(calib.getMagnitude(src_cat.get('base_SdssShape_flux')))
        mag_sdss_err.append(calib.getMagnitude(src_cat.get('base_SdssShape_fluxSigma')))
        mag_aper.append(calib.getMagnitude(src_cat.get('base_CircularApertureFlux_12_0_flux')))
        mag_psf.append(calib.getMagnitude(src_cat.get('base_PsfFlux_flux')))
        extendedness.append(src_cat.get('base_ClassificationExtendedness_value'))
        isphoto.append(src_cat.get('calib_photometry_used'))
        e1_i.append(src_cat.get('ext_shapeHSM_HsmShapeRegauss_e1'))
        e2_i.append(src_cat.get('ext_shapeHSM_HsmShapeRegauss_e2'))
        ixx_i.append(src_cat.get('base_SdssShape_xx'))
        iyy_i.append(src_cat.get('base_SdssShape_yy'))
        ixy_i.append(src_cat.get('base_SdssShape_xy'))
        ipsf_xx_i.append(src_cat.get('base_SdssShape_psf_xx'))
        ipsf_yy_i.append(src_cat.get('base_SdssShape_psf_yy'))
        ipsf_xy_i.append(src_cat.get('base_SdssShape_psf_xy'))
        icount_i=icount_i+1
    else:
        pass

In [None]:
seeing, alt = get_seeing_altitude(conn,visitno)

In [None]:
seeing, np.degrees(alt)

In [None]:
ra_imsim = np.concatenate(np.array(ra_imsim)).ravel()
dec_imsim = np.concatenate(np.array(dec_imsim)).ravel()
nchild_imsim = np.concatenate(np.array(nchild_imsim)).ravel()
mag_k_imsim = np.concatenate(np.array(mag_k_imsim)).ravel()
mag_k_err_imsim = np.concatenate(np.array(mag_k_err_imsim)).ravel()
flux_k_imsim = np.concatenate(np.array(flux_k_imsim)).ravel()
flux_k_err_imsim = np.concatenate(np.array(flux_k_err_imsim)).ravel()
mag_sdss = np.concatenate(np.array(mag_sdss)).ravel()
mag_sdss_err = np.concatenate(np.array(mag_sdss_err)).ravel()
mag_aper = np.concatenate(np.array(mag_aper)).ravel()
mag_psf = np.concatenate(np.array(mag_psf)).ravel()
extendedness = np.concatenate(np.array(extendedness)).ravel()
isphoto = np.concatenate(np.array(isphoto)).ravel()
ixx_i = np.concatenate(np.array(ixx_i)).ravel()
iyy_i = np.concatenate(np.array(iyy_i)).ravel()
ixy_i = np.concatenate(np.array(ixy_i)).ravel()
ipsf_xx_i = np.concatenate(np.array(ipsf_xx_i)).ravel()
ipsf_yy_i = np.concatenate(np.array(ipsf_yy_i)).ravel()
ipsf_xy_i = np.concatenate(np.array(ipsf_xy_i)).ravel()
e1_i = np.concatenate(np.array(e1_i)).ravel()
e2_i = np.concatenate(np.array(e2_i)).ravel()

We define a couple of routines to match input and output sources. These routines can be switched for any other matching algorithms

In [None]:
def spatial_closest(ra_data,dec_data,ra_true,dec_true,true_id):
    """
    Function to return the closest match in magnitude within a user-defined radius within certain
    magnitude difference.
    
    ***Caveats***: This method uses small angle approximation sin(theta)
    ~ theta for the declination axis. This should be fine to find the closest
    neighbor. This method does not use any weighting.
    
    Args:
    -----
    
    ra_data: Right ascension of the measured objects (degrees).
    dec_data: Declination of the measured objects (degrees).
    ra_true: Right ascension of the true catalog (degrees).
    dec_true: Declination of the true catalog (degrees).
    true_id: Array of IDs in the true catalog.
    
    Returns:
    --------
    
    dist: Distance to the closest neighbor in the true catalog. If inputs are
    in degrees, the returned distance is in arcseconds.
    true_id: ID in the true catalog for the closest match.
    matched: True if matched, False if not matched.
    """
    X = np.zeros((len(ra_true),2))
    X[:,0] = ra_true
    X[:,1] = dec_true
    tree = KDTree(X,metric='euclidean')
    Y = np.zeros((len(ra_data),2))
    Y[:,0] = ra_data
    Y[:,1] = dec_data
    dist, ind= tree.query(Y,k=1)
    dist_out = dist.flatten()
    ids = true_id[np.array(ind).flatten()]
    matched = np.ones(len(ind.flatten()),dtype=bool)
    return dist_out*3600., ids,matched

In [None]:
def spatial_closest_mag_1band(ra_data,dec_data,mag_data,
                              ra_true,dec_true,mag_true,true_id,
                              rmax=3,max_deltamag=1.):
    """
    Function to return the closest match in magnitude within a user-defined radius within certain
    magnitude difference.
    
    ***Caveats***: This method uses small angle approximation sin(theta)
    ~ theta for the declination axis. This should be fine to find the closest
    neighbor. This method does not use any weighting.
    
    Args:
    -----
    
    ra_data: Right ascension of the measured objects (degrees).
    dec_data: Declination of the measured objects (degrees).
    mag_data: Measured magnitude of the objects.
    ra_true: Right ascension of the true catalog (degrees).
    dec_true: Declination of the true catalog (degrees).
    mag_true: True magnitude of the true catalog.
    true_id: Array of IDs in the true catalog.
    rmax: Maximum distance in number of pixels to perform the query.
    max_deltamag: Maximum magnitude difference for the match to be good.
    
    Returns:
    --------
    
    dist: Distance to the closest neighbor in the true catalog. If inputs are
    in degrees, the returned distance is in arcseconds.
    true_id: ID in the true catalog for the closest match.
    matched: True if matched, False if not matched.
    """
    X = np.zeros((len(ra_true),2))
    X[:,0] = ra_true
    X[:,1] = dec_true
    tree = KDTree(X,metric='euclidean')
    Y = np.zeros((len(ra_data),2))
    Y[:,0] = ra_data
    Y[:,1] = dec_data
    ind,dist= tree.query_radius(Y,r=rmax*0.2/3600,return_distance=True)
    matched = np.zeros(len(ind),dtype=bool)
    ids = np.zeros(len(ind),dtype=true_id.dtype)
    dist_out = np.zeros(len(ind))
    for i, ilist in enumerate(ind):
        if len(ilist)>0:
            dmag = np.fabs(mag_true[ilist]-mag_data[i])
            good_ind = np.argmin(dmag)
            ids[i]=true_id[ilist[good_ind]]
            dist_out[i]=dist[i][good_ind]
            if np.min(dmag)<max_deltamag:
                matched[i]=True
            else:
                matched[i]=False
        else:
            ids[i]=-99
            matched[i]=False
            dist_out[i]=-99.
    return dist_out*3600., ids,matched

In [None]:
ra_true = data['ra']
dec_true = data['dec']
mag_true = data['mag_%s' %band]

In [None]:
dd, ind_mag, matched = spatial_closest_mag_1band(ra_imsim[nchild_imsim==0],dec_imsim[nchild_imsim==0],mag_k_imsim[nchild_imsim==0],
                              ra_true[mag_true<26],dec_true[mag_true<26],mag_true[mag_true<26],np.arange(np.count_nonzero(mag_true<26)),
                              rmax=5,max_deltamag=1)

We check what the magnitude distribution looks like

In [None]:
plt.hist(mag_psf[nchild_imsim==0],range=(0,30),bins=120,histtype='step')
plt.xlabel('%s' %band)
plt.legend(loc=2)
plt.xlim(15,26)

In [None]:
mask_mag = (np.isnan(mag_k_imsim[nchild_imsim==0])==False) & (matched)

In [None]:
mean_im, be, _ = binned_statistic(mag_psf[nchild_imsim==0][mask_mag],mag_k_imsim[nchild_imsim==0][mask_mag]-mag_true[mag_true<26][ind_mag][matched],range=(10,30),bins=30, statistic='median')
std_im, be, _ = binned_statistic(mag_psf[nchild_imsim==0][mask_mag],mag_k_imsim[nchild_imsim==0][mask_mag]-mag_true[mag_true<26][ind_mag][matched],range=(10,30),bins=30, statistic='std')
n_im, be, _ = binned_statistic(mag_psf[nchild_imsim==0][mask_mag],mag_k_imsim[nchild_imsim==0][mask_mag]-mag_true[mag_true<26][ind_mag][matched],range=(10,30),bins=30, statistic='count')

In [None]:
mean_im

And check for photometric and astrometric biases

In [None]:
plt.errorbar(0.5*be[1:]+0.5*be[:-1],mean_im,std_im/np.sqrt(n_im),fmt='o',color='red')
plt.hexbin(mag_psf[nchild_imsim==0][mask_mag],mag_k_imsim[nchild_imsim==0][mask_mag]-mag_true[mag_true<26][ind_mag][matched],gridsize=200,extent=[14,26,-0.5,0.5])
plt.xlabel('mag$_{Kron,imSim}$',fontsize=16)
plt.ylabel('mag$_{Kron,imSim}$-mag$_{Kron,PhoSim}$',fontsize=16)
plt.colorbar(label='Objects/bin')
plt.grid()
plt.ylim(-0.1,0.1)
plt.xlim(16,24)

In [None]:
plt.hist(3600000*(ra_imsim[nchild_imsim==0][mask_mag]-ra_true[mag_true<26][ind_mag][matched]),range=(-1000,1000),bins=500,histtype='step',label='RA')
plt.hist(3600000*(dec_imsim[nchild_imsim==0][mask_mag]-dec_true[mag_true<26][ind_mag][matched]),range=(-1000,1000),bins=500,histtype='step',label='DEC')
plt.xlabel(r'$\Delta X$ [mas]')
plt.xlim(-100,100)
#plt.ylim(0,80)
plt.legend(loc='best')
plt.savefig('test_astrometry_imSim_knl_vs_phoSim.png')

In [None]:
plt.hist(mag_k_imsim[nchild_imsim==0][mask_mag]-mag_true[mag_true<26][ind_mag][matched],bins=100, histtype='step',range=(-1,1));
plt.xlabel('mag$_{Kron,imSim}$-mag$_{Kron,PhoSim}$',fontsize=16)
plt.xlim(-0.25,0.25)
#print(np.median(mag_psf[nchild_imsim==0][mask_mag]-mag_true[mag_true<26][ind_mag][mask_mag])*1000, 'mmags')
#print(mean_im*1000, 'mmags')

In [None]:
def asymQ(ixx,iyy,ixy):
    asymQx = ixx - iyy
    asymQy = 2*ixy
    return np.sqrt(asymQx**2 + asymQy**2)
def trQ(ixx,iyy):
    return ixx+iyy
def get_a(ixx,iyy,ixy):
    return np.sqrt(0.5*(trQ(ixx,iyy)+asymQ(ixx,iyy,ixy)))
def get_b(ixx,iyy,ixy):
    return np.sqrt(0.5*(trQ(ixx,iyy)-asymQ(ixx,iyy,ixy)))
def get_e(ixx,iyy,ixy):
    a = get_a(ixx,iyy,ixy)
    b = get_b(ixx,iyy,ixy)
    return (a**2-b**2)/(a**2+b**2)

In [None]:
e_psf_i = get_e(ipsf_xx_i,ipsf_yy_i,ipsf_xy_i)[nchild_imsim==0][mask_mag]

We consider that an object is a star if the best/closest match is a star (i.e, `is_resolved==False`)

In [None]:
star = (data['is_resolved'][mag_true<26][ind_mag][matched]==False)

Show the distribution, the median and 95-th percentile. There are certain criteria for these quantities described in [LPM-17](https://docushare.lsst.org/docushare/dsweb/Get/LPM-17)

In [None]:
plt.hist(np.abs(e_psf_i[star]),histtype='step',label=r'run 1.2p',range=(0,0.2),bins=100,normed=True)
plt.plot(np.median(e_psf_i[star])*np.ones(3),np.linspace(0,70,3),'k--')
plt.plot(np.percentile(e_psf_i[star],95)*np.ones(3),np.linspace(0,70,3),'r--')
plt.xlabel(r'$|e|=(1-q^{2})/(1+q^{2})$',fontsize=16)
plt.ylabel(r'$P|e|$',fontsize=16)
plt.legend(loc='best')
plt.ylim(0,15)