# Verify Test DIA Source and DIA Object catalogs for Run 1.2p 
Michael Wood-Vasey
Last Verified to Run: 2019-06-20

Perform sanity checking on the DIA Source and DIA Object tables from the sample run of one patch.

In [None]:
# Inject gcr-catalogs that supports DIA source into path.
import os
import sys

gcr_catalogs_dir = os.path.join(os.getenv('HOME'), 'local', 'lsst', 'gcr-catalogs')
sys.path.insert(0, gcr_catalogs_dir)

In [None]:
import os

import GCRCatalogs

In [None]:
import pandas as pd

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt

import math
import numpy as np

In [None]:
diaSrc = GCRCatalogs.load_catalog('dc2_dia_source_run1.2p_test')
diaObject = GCRCatalogs.load_catalog('dc2_dia_object_run1.2p_test')

Questions for this very simple test verification:
1. How many DIA Sources are there?
2. What's the distribution in RA, Dec?
3. What's the mag vs. mag_err plot
4. Can we get out the filter information?

Note that we don't yet of DIA Object table to match this to.

In [None]:
print(f'There are {len(diaSrc)} DIA Sources and {len(diaObject)} DIA Objects in this test sample')

In [None]:
def scatter_radec(cat, ax=None):
    if ax is None:
        ax = plt.gca()
    ax.scatter(cat['ra'], cat['dec'], marker='.')
    ax.set_xlabel('RA')
    ax.set_ylabel('Dec')
    
    # While we're doing a rectangular plot of the local tangent, we can at least get the local scale right
    median_ra, median_dec = np.median(cat['ra']), np.median(cat['dec'])
    ax.set_aspect(aspect=abs(1/math.cos(median_dec)))

fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(8, 4))
scatter_radec(diaSrc, ax1)
scatter_radec(diaObject, ax2)

In [None]:
def hexbin_radec(cat, ax=None):
    if ax is None:
        ax = plt.gca()
    ax.hexbin(cat['ra'], cat['dec'])
    # I don't know how to do colorbar from the axis
    #    ax.colorbar()
    ax.set_xlabel('RA')
    ax.set_ylabel('Dec')

    # While we're doing a rectangular plot of the local tangent, we can at least get the local scale right
    median_ra, median_dec = np.median(cat['ra']), np.median(cat['dec'])
    ax.set_aspect(aspect=abs(1/math.cos(median_dec)))
    
fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(8, 4))
hexbin_radec(diaSrc, ax1)
hexbin_radec(diaObject, ax2)

In [None]:
diaSrc_r = diaSrc.get_quantities(['ra', 'dec', 'mag', 'mag_err', 'psFlux', 'psFluxErr'],
                           filters=[(lambda x: x == 'r', 'filter'), 'mag_err < 0.1'])

In [None]:
hexbin_radec(diaSrc_r)

In [None]:
def scatter_mag(cat, ax=None):
    if ax is None:
        ax = plt.gca()
    mag, mag_err = cat['mag'], cat['mag_err']
    ax.scatter(mag, mag_err, marker='.')
    ax.set_xlabel('Mag')
    ax.set_ylabel('Mag Err')

scatter_mag(diaSrc_r)
# Oh, there is no mag yet for diaObject.
# scatter_mag(diaObject) 

In [None]:
cat_by_filter = {}
filter_names = ['u', 'g', 'r', 'i', 'z', 'y']
columns = ['ra', 'dec', 'mag', 'mag_err', 'psFlux', 'psFluxErr', 'visit']

for f in filter_names: 
    cat_by_filter[f] = diaSrc.get_quantities(columns,
                                          filters=[(lambda x: x == f, 'filter'), 'mag_err < 0.1'])

In [None]:
for f, this_cat in cat_by_filter.items():
    plt.scatter(this_cat['mag'], this_cat['mag_err'], marker='.', label=f)

plt.legend()
plt.xlabel('mag')
plt.ylabel('mag_err')

In [None]:
diaSrc.list_all_quantities()

In [None]:
diaObject.list_all_quantities()

In [None]:
plt.hexbin(diaSrc['x'], diaSrc['y'])
plt.colorbar()
plt.xlabel('x')
plt.ylabel('y')
plt.gca().set_aspect(aspect=1)
plt.title('x, y on patch');

In [None]:
plt.hist(diaSrc['fluxmag0'])

In [None]:
# Double-check calibration
f = 'r'
this_cat = cat_by_filter[f]
AB_mag_zp_wrt_Jansky = 8.90
AB_mag_zp_wrt_nanoJansky = 2.5 * 9 + AB_mag_zp_wrt_Jansky

mag_from_psFlux = -2.5*np.log10(this_cat['psFlux']) + AB_mag_zp_wrt_nanoJansky
snr = abs(this_cat['psFlux'])/this_cat['psFluxErr']

In [None]:
plt.scatter(this_cat['mag'], mag_from_psFlux - this_cat['mag'], c=this_cat['visit'])
plt.xlabel('mag [{}]'.format(f))
plt.ylabel('mag_from_psFlux [{}]'.format(f))

In [None]:
# Check mag_err vs. flux SNR
# Should be 2.5/ln(10)/SNR = mag_err
fig, ax = plt.subplots(1, 2, figsize=(12,6))
ax[0].scatter(this_cat['mag'], (2.5/math.log(10)) / snr - this_cat['mag_err'])
ax[0].set_xlabel('mag [{}]'.format(f))
ax[0].set_ylabel('1/SNR from psFlux/psFluxErr')

ax[1].scatter(this_cat['mag_err'], (2.5/math.log(10)) / snr - this_cat['mag_err'])
ax[1].set_xlabel('mag_err [{}]'.format(f))
ax[1].set_ylabel('1/SNR from psFlux/psFluxErr')

In [None]:
plt.hist(snr);

## DIAObject statistics

In [None]:
plt.hist(diaObject['nobs'], log=True, bins=[0, 1, 2, 5, 10, 20, 50, 100, 200])
plt.xlabel('Number of DIA Source Observations in DIA Object')
plt.ylabel('DIA Objects per bin');
plt.xscale('log')

In [None]:
reduced_chi2_r = diaObject['psFluxChi2_r']/(diaObject['psFluxNdata_r']-1)
log10_reduced_chi2_r = np.log10(reduced_chi2_r)

In [None]:
plt.hist(log10_reduced_chi2_r, range=(0, 10), bins=np.linspace(-0.25, 10.25, 21));
plt.xlabel(r'$\log_{10}(\chi^2/{\rm dof})$')
plt.ylabel('#/bin');

In [None]:
plt.scatter(diaObject['magMean_r'], log10_reduced_chi2_r,
           c=np.log10(diaObject['nobs']))

plt.xlabel('<r> [mag]')
plt.ylabel(r'$\log_{10}{\chi^2/{\rm dof}}')
plt.colorbar(label='log10(nobs)');

In [None]:
plt.scatter(diaObject['magMean_r'], diaObject['nobs'], c=log10_reduced_chi2_r)
plt.xlabel('<r> [mag]')
plt.ylabel('nobs')
plt.colorbar();

In [None]:
plt.scatter(diaObject['magMean_r'], diaObject['magMeanStd_r'])
plt.xlabel('<r> [mag]')
plt.ylabel('std(r) [mag]');

## A Lightcurve

Let's pick an object with lots of observations and $\chi^2/{\rm dof}$ significantly greater than one.

In [None]:
w, = np.where((diaObject['nobs'] > 100) & (log10_reduced_chi2_r > 2) & (log10_reduced_chi2_r < 3))
objectIds = (diaObject['diaObjectId'][w])

In [None]:
print(objectIds)

In [None]:
this_objectId = objectIds[0]
this_object_diaSrc = pd.DataFrame(diaSrc.get_quantities(['diaObjectId', 'filter', 'mjd', 'mag', 'mag_err', 'psFlux', 'psFluxErr'], filters=[(lambda x: x == this_objectId, 'diaObjectId')]))

In [None]:
this_object_diaSrc

In [None]:
def plot_lightcurve(df, plot='mag'):
    """Plot a lightcurve from a DataFrame.
    """
    # At lexigraphical order, if not wavelength order.
    filters = np.unique(this_object_diaSrc['filter'])

    if plot == 'flux':
        flux_col = 'psFlux'
        flux_err_col = 'psFluxErr'
    else:
        flux_col = 'mag'
        flux_err_col = 'mag_err'
        
    for filt in filters:
        this_filter = df.query(f'filter == "{filt}"')
        plt.errorbar(this_filter['mjd'], this_filter[flux_col], this_filter[flux_err_col],
                     linestyle='none', marker='o',
                     label=filt)
    plt.xlabel('MJD')

    if plot == 'flux':
        plt.ylabel('psFlux [nJy]')
    else:
        plt.ylim(plt.ylim()[::-1])
        plt.ylabel('mag')

    plt.title(f'diaObjectId: {df["diaObjectId"][0]}')
    plt.legend()

In [None]:
plt.figure(figsize=(12, 8))
plot_lightcurve(this_object_diaSrc)

In [None]:
plt.figure(figsize=(12, 8))
plot_lightcurve(this_object_diaSrc, plot='flux')