## Extract the efficiency of the matching between the shear catalog and the object catalog


#### Standard import

In [None]:
import tables_io
import numpy as np
import hpmcm
import matplotlib.pyplot as plt

#### Re-build the skymap used define the tract

In [None]:
from lsst.skymap.ringsSkyMap import RingsSkyMap, RingsSkyMapConfig
skymap = RingsSkyMap(RingsSkyMapConfig(projection='TAN', numRings=120, tractOverlap=1/60, pixelScale=0.2))

#### Set up the configuration

In [None]:
keys = ['_cluster_stats']  # which tables to read
st_ = 'pgauss'             # which catalog type
tract = 10463              # which tract to study
dd = tables_io.read(f"test_data/obj_{st_}_match_{tract}.pq", keys=keys)
data = dd['_cluster_stats']

#### Make maskes of different types of matches

In [None]:
good_mask = np.bitwise_and(data.nSrc ==2, data.nUnique ==2) 
in_tract = skymap.findTractIdArray(data.ra.values, data.dec.values, degrees=True) == 10463
missing_md = np.bitwise_and(~good_mask, data.hasRefCat)
missing_ref = np.bitwise_and(~good_mask, ~data.hasRefCat)
extra = data.nSrc > 2

#### Make a histogram of the different match types

In [None]:
_ = plt.hist(data[good_mask*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label="Good")
_ = plt.hist(data[missing_md*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label="Object only")
_ = plt.hist(data[missing_ref*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label="Shear only")
_ = plt.hist(data[extra*in_tract].SNR, bins=np.logspace(0, 4, 101), alpha=0.5, label="Confusion")

_ = plt.xscale('log')
_ = plt.yscale('log')
_ = plt.legend()

_ = plt.xlabel("Signal-to-noise [r-band]")
_ = plt.ylabel("Objects [per 0.05 dex]")

#### Make a scatter plot of positions of missing matches, to make sure we haven't messed up the sky overlap

In [None]:
_ = plt.scatter(data.ra[good_mask*in_tract], data.dec[good_mask*in_tract], s=1)
_ = plt.scatter(data.ra[missing_ref*in_tract], data.dec[missing_ref*in_tract], s=1)

#### Estimate the good match efficiency as a function of SNR

In [None]:
hist_all =  np.histogram(data.iloc[in_tract].SNR, bins=np.logspace(0, 5, 101))[0]
hist_in_ref = np.histogram(data.iloc[data.hasRefCat.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]
hist_missing_md = np.histogram(data.iloc[missing_md.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]
hist_good = np.histogram(data.iloc[good_mask.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]
hist_missing_ref = np.histogram(data.iloc[missing_ref.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]
hist_in_md = hist_good + hist_missing_ref
hist_extra = np.histogram(data.iloc[extra.values*in_tract].SNR, bins=np.logspace(0, 5, 101))[0]

In [None]:
ineffic_missing_md = hist_missing_md/hist_all
ineffic_missing_ref = hist_missing_ref/hist_all
ineffic_extra = hist_extra/hist_all
ineffic_ref_in_md = hist_missing_ref/hist_in_md
npq_missing_md = np.sqrt(ineffic_missing_md*(1-ineffic_missing_md)/hist_all)
npq_missing_ref = np.sqrt(ineffic_missing_ref*(1-ineffic_missing_ref)/hist_all)
npq_missing_extra = np.sqrt(ineffic_extra*(1-ineffic_extra)/hist_all)
npq_ref_in_md = np.sqrt(ineffic_ref_in_md*(1-ineffic_ref_in_md)/hist_in_md)
bin_edges = np.logspace(0, 5, 101)
bin_centers = np.sqrt(bin_edges[0:-1] * bin_edges[1:])

#### Plot the good match efficiency as a function of SNR

In [None]:
_ = plt.errorbar(bin_centers, ineffic_missing_md, yerr=npq_missing_md, label="Has Ref", ls="", marker='.')
_ = plt.errorbar(bin_centers, ineffic_missing_ref, yerr=npq_missing_ref, label="No Ref", ls="", marker='.')
_ = plt.xscale('log')
_ = plt.yscale('log')

#### Plot the good match efficiency w.r.t. the metadataect catalog as a function of SNR

In [None]:
_ = plt.errorbar(bin_centers, ineffic_ref_in_md, yerr=npq_ref_in_md, label="Has Ref", ls="", marker='.')
_ = plt.xscale('log')
_ = plt.yscale('log')
_ = plt.xlabel("Signal-to-noise [r-band]")
_ = plt.ylabel("Inefficiency w.r.t. MD objects")

#### Estimate the good match efficiency w.r.t. the metadataect catalog

In [None]:
nMissing = hist_missing_ref[20:].sum()
nAll = hist_in_md[20:].sum()
effic = (nAll-nMissing)/nAll
effic_err = np.sqrt(effic*(1-effic)/nAll)

In [None]:
print(f"Effic: {effic:.5} +- {effic_err:.5f}")