# Inject Sources, Subtract and Analyze for DIA Improvement

Michael Wood-Vasey and Shu Liu

Based heavily on
https://github.com/lsst/source_injection/blob/tickets/DM-34253/examples/si_demo_dc2_visit.ipynb
and Shu's work on building host galaxy catalogs from CosmoDC2.

This Notebook uses a custom Jupyter kernel to load a version of `source_injection` package with a

setup -j -r ${HOME}/local/lsst/source_injection

Loading this package is most convenient to do in the `eups` world (and thus before the Notebook starts) rather than in the Jupyter notebook.

### Butler

This Notebook uses the Butler to store data products, and loads new ones for each section.

### Import Modules

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.ndimage import gaussian_filter
import sqlite3

from astropy.coordinates import SkyCoord
from astropy.table import Table
import astropy.units as u

In [None]:
from lsst.ap.association import TransformDiaSourceCatalogConfig, TransformDiaSourceCatalogTask

In [None]:
from lsst.daf.butler import Butler, DimensionUniverse, DatasetType, CollectionType
from lsst.daf.butler.registry import MissingCollectionError
import lsst.afw.display as afwDisplay
from lsst.geom import SpherePoint, degrees
from lsst.ip.diffim import AlardLuptonSubtractConfig, AlardLuptonSubtractTask
from lsst.ip.diffim import GetTemplateConfig, GetTemplateTask
from lsst.ip.diffim import DetectAndMeasureConfig, DetectAndMeasureTask

afwDisplay.setDefaultBackend("matplotlib")

In [None]:
from lsst.source.injection import (ExposureInjectConfig, ExposureInjectTask)
from lsst.source.injection.utils import ingest_injection_catalog

### Load Butler and Pick Image for this Example

Need to have a `~/.lsst/db-auth.yaml` file with the db URL, username, and password to load the Butler:

In [None]:
repo = "/global/cfs/cdirs/lsst/production/gen3/DC2/Run2.2i/repo"
butler = Butler(repo, writeable=True)

In [None]:
collections = sorted(list(set(
    butler.registry.queryCollections()
)))

In [None]:
display(collections)

In [None]:
# Let's pick
input_collection = "u/descdm/coadds_Y1_4639"

In [None]:
# Find a calexp
tract = 4639
patch = 8
band = "r"
calexp_DatasetRefs = sorted(list(set(
    butler.registry.queryDatasets(
        "calexp",
        collections = input_collection,
        where=f"instrument='LSSTCam-imSim' AND skymap='DC2' AND tract={tract} AND patch={patch} AND band='{band}'",
    ))))

In [None]:
print(f"Identified {len(calexp_DatasetRefs)} calexp DatasetRefs")

# Let's just pick one and look at it.
display(calexp_DatasetRefs[5])

data_id = calexp_DatasetRefs[5].dataId

print(f"{data_id = }")

In [None]:
calexp = butler.get("calexp", dataId=data_id, collections=input_collection)
src = butler.get("src", dataId=data_id, collections=input_collection)
display(calexp)

## Create Synthetic Source Input Catalogue

We now have a calexp image that we want to inject into.

Use Shu's curated host galaxy catalog to determine locations to inject point sources.

### Read Catalog

In [None]:
injection_db_path = f"../data/table/gal_{tract}/injection_coord.sqlite"
injection_table_name = "injection_coord"

In [None]:
bands_to_simulate = ["r"]
mags_to_simulate = [20, 21, 22, 23, 24]
injection_mag = mags_to_simulate[0]

In [None]:
conn = sqlite3.connect(injection_db_path)

In [None]:
query = f"SELECT DISTINCT(visit), ra, dec FROM {injection_table_name}"

In [None]:
injection_cat = pd.read_sql_query(query, conn)
injection_cat.drop(columns=["visit"], inplace=True)
injection_cat["mag"] = injection_mag
injection_cat["source_type"] = "DeltaFunction"
injection_cat = Table.from_pandas(injection_cat)

In [None]:
print(f"{len(injection_cat)} in raw catalog")

But this catalog was organized by "visit, ra, dec" so it has duplicates for every visit that covered that RA, Dec.  The RA, Dec values are exactly the same, however, so we can do float comparison.  I'm going to do this through formatting as a string explicitly, just to make it easier to see if things don't work.

In [None]:
str_position = ["{:0.7f} {:0.7f}".format(ra, dec) for ra, dec in injection_cat["ra", "dec"]]
_, uniq_idx = np.unique(str_position, return_index=True)
injection_cat = injection_cat[uniq_idx]

In [None]:
print(f"{len(injection_cat)} after de-duplication")

In [None]:
plt.scatter(injection_cat["ra"], injection_cat["dec"], marker=".")
plt.xlim(plt.xlim()[::-1])
plt.xlabel("RA")
plt.ylabel("Dec")

### Register the source injection collection

The input `injection_cat` will be ingested into a RUN collection in the `repo`. Here we register this collection for subsequent use below.

> Warning: take care when working with a writeable butler, as data on-disk has the potential to be permanently removed or corrupted.

We ensure our chosen RUN collection doesn't already exist.

Finally, the source injection collection is registered in the `repo`.

In [None]:
# Write a separate RUN collection for each fake magnitude range.
injection_input_collection = f"u/wmwv/injection_demo_{injection_mag:02.0f}p{injection_mag % 1:1d}"

OVERWRITE = True
if OVERWRITE:
    try:
        butler.removeRuns([injection_input_collection])
    except MissingCollectionError:
        print("Writing into a new RUN collection")
        pass
    else:
        print("Prior RUN collection located and successfully removed")

# Register the collection
_ = butler.registry.registerCollection(injection_input_collection, type=CollectionType.RUN)
print(f"Registered collection: {injection_input_collection}")

### Ingest the input catalogue into the repo

Finally, we ingest the input catalogue into the `repo`.
We use the `lsst.source.injection.utils.ingest_injection_catalog` for a little convenience, to make sure we use the same dataset type as in `source_injection`, and to set up how a larger pipetask would do this.  The injection catalogs are arranged sharded by HTM7, so this routine handles that too.

This function also ensures that the `injection_catalog` DatasetType exists in the Butler registry, and creates it if it doesn't exist.

In [None]:
for band in bands_to_simulate:
    ingest_injection_catalog(butler, injection_cat, band=band, output_collection=injection_input_collection)

### Instantiate the injection class

At this stage, we have an input image and we have a fully ingested synthetic source input catalogue. We're now ready to inject synthetic sources into the image using the tools available in the `source_injection` repo.

We'll build up from `BaseInject`->`VisitInject`->`ExposureInject` and use the last as our Task

`BaseInject` is the basic process of what we're doing.  This Task takes an input catalog, and image, and we run image injection and get an image with the added sources.  We can't actually use the class directly because it assumes the existence of a declared Connections attribute `input_exposure`, but that's not actually initialized in `BaseInject`.  So we use `ExposureInject`, just to use the metadata implied by the Connections attached to the object.

In [None]:
inject_config = ExposureInjectConfig()

display(inject_config)

inject_task = ExposureInjectTask(config=inject_config)

### Run the source injection task

Finally, we run the run method of the inject task.

As an input, the run method needs:

the input injection catalogue  
the input exposure  
the WCS information  
the photometric calibration information  
the skyMap dataset type.

The skyMap is easily loaded using butler.get. All other inputs are already ready for use at this stage.

As an output, the inject task provides:

the output exposure with sources injected  
the output source injection catalogue  

In [None]:
inject_output = inject_task.run(
    injection_catalogs=injection_cat,
    input_exposure=calexp,
    psf=calexp.getPsf(),
    wcs=calexp.getWcs(),
    photo_calib=calexp.getPhotoCalib(),
)

In [None]:
injected_exposure = inject_output.output_exposure
injected_catalog = inject_output.output_catalog

In [None]:
print(inject_task.config.connections.injected_prefix)
print(inject_task.config.connections.output_exposure)
print(inject_task.config.connections.output_catalog)

In [None]:
# We can resolve the f-style string by passing a dict to the string's format method:
output_exposure_dataset_type = inject_task.config.connections.output_exposure.format(**inject_task.config.connections.toDict())
output_catalog_dataset_type = inject_task.config.connections.output_catalog.format(**inject_task.config.connections.toDict())
print(output_exposure_dataset_type)
print(output_catalog_dataset_type)

Put the outputs into the "injected_postISRCCD{|_catalog}" datasets.  We use the same data_id that we used to load the calexp.

In [None]:
data_id

Create the dataset types that we need.  This only needs to be run for a given butler, so by virtue of me running this demo, these next two lines aren't needed any more.

In [None]:
CREATE_DATASET_TYPES = False
if CREATE_DATASET_TYPES:
    injected_calexp_dataset_type = DatasetType(
        name=output_exposure_dataset_type,
        storageClass="ExposureF",
        dimensions=("instrument", "visit", "detector"),
        universe=DimensionUniverse(),
    )
    butler.registry.registerDatasetType(injected_calexp_dataset_type)

    injected_catalog_dataset_type = DatasetType(
        name=output_catalog_dataset_type,
        storageClass="ArrowAstropy",
        dimensions=("instrument", "visit", "detector"),
        universe=DimensionUniverse(),
    )
    butler.registry.registerDatasetType(injected_catalog_dataset_type)
    
    injected_catalog_dataset_type = DatasetType(
        name=output_catalog_dataset_type,
        storageClass="ArrowAstropy",
        dimensions=("instrument", "visit", "detector"),
        universe=DimensionUniverse(),
    )
    butler.registry.registerDatasetType(injected_catalog_dataset_type)
    

In [None]:
CREATE_DIFF_DATASET_TYPES = False
if CREATE_DIFF_DATASET_TYPES:
    coaddName = "deepCoadd"
    for fakesType in ("", "injected_"):
        deepCoaddDiff_differenceTempExp_dataset_type = DatasetType(
            name=f"{fakesType}{coaddName}Diff_differenceTempExp",
            storageClass="ExposureF",
            dimensions=("instrument", "visit", "detector"),
            universe=DimensionUniverse(),
        )
        butler.registry.registerDatasetType(deepCoaddDiff_differenceTempExp_dataset_type)

        deepCoaddDiff_matchedExp_dataset_type = DatasetType(
            name=f"{fakesType}{coaddName}Diff_matchedExp",
            storageClass="ExposureF",
            dimensions=("instrument", "visit", "detector"),
            universe=DimensionUniverse(),
        )
        butler.registry.registerDatasetType(deepCoaddDiff_matchedExp_dataset_type)

        deepCoaddDiff_differenceScoreExp_dataset_type = DatasetType(
            name=f"{fakesType}{coaddName}Diff_scoreExp",
            storageClass="ExposureF",
            dimensions=("instrument", "visit", "detector"),
            universe=DimensionUniverse(),
        )
        butler.registry.registerDatasetType(deepCoaddDiff_differenceScoreExp_dataset_type)

        deepCoaddDiff_differenceExp_dataset_type = DatasetType(
            name=f"{fakesType}{coaddName}Diff_differenceExp",
            storageClass="ExposureF",
            dimensions=("instrument", "visit", "detector"),
            universe=DimensionUniverse(),
        )
        butler.registry.registerDatasetType(deepCoaddDiff_differenceExp_dataset_type)

        deepCoaddDiff_diaSrc_dataset_type = DatasetType(
            name=f"{fakesType}{coaddName}Diff_diaSrc",
            storageClass="SourceCatalog",
            dimensions=("instrument", "visit", "detector"),
            universe=DimensionUniverse(),
        )
        butler.registry.registerDatasetType(deepCoaddDiff_diaSrc_dataset_type)

        

In [None]:
CREATE_MATCHED_DATASET_TYPES = True
if CREATE_MATCHED_DATASET_TYPES:
    coaddName = "deepCoadd"
    fakesType = "injected_"
    match_diaSrc_dataset_type = DatasetType(
        name=f"{fakesType}{coaddName}Diff_matchDiaSrc",
        storageClass="DataFrame",
        dimensions=("instrument", "visit", "detector"),
        universe=DimensionUniverse(),
    )
    butler.registry.registerDatasetType(match_diaSrc_dataset_type)


In [None]:
butler.registry.refresh()

In [None]:
butler.put(injected_exposure, output_exposure_dataset_type, dataId=data_id, run=injection_input_collection)

In [None]:
butler.put(injected_catalog, output_catalog_dataset_type, dataId=data_id, run=injection_input_collection)

In [None]:
display(injected_catalog[:5])

Clear the data products.  Future steps will load from the Butler

In [None]:
del injected_exposure, injected_catalog, calexp, src

## Plot the output injected_exposure

As before, let's display an image of our newly constructed `injected_exposure`.

We similarly smooth the new image first, and then display the `exposure` alongside the `injected_exposure` using `afwDisplay`.

### Plot the full images

In [None]:
exposure = butler.get("calexp", dataId=data_id, collections=input_collection)
injected_exposure = butler.get(output_exposure_dataset_type, dataId=data_id, collections=injection_input_collection)
injected_catalog = butler.get(output_catalog_dataset_type, dataId=data_id, collections=injection_input_collection)

In [None]:
# Get the x, y back from the ra, dec in injection_cat (because that's all that's saved)
# This feels a little silly.  There should surely be a one-line version of this:
sky = [SpherePoint(ra, dec, degrees) for ra, dec in zip(injected_catalog["ra"], injected_catalog["dec"])]
xy = exposure.wcs.skyToPixel(sky)

x = [i.x for i in xy]
y = [i.y for i in xy]

In [None]:
Q = 10

plot_exposure = exposure.clone()
plot_exposure.image.array = gaussian_filter(exposure.image.array, sigma=3)

plot_injected_exposure = injected_exposure.clone()
plot_injected_exposure.image.array = gaussian_filter(injected_exposure.image.array, sigma=3)

fig, ax = plt.subplots(1, 2, figsize=(8, 6), dpi=150)

plt.sca(ax[0])
display0 = afwDisplay.Display(frame=fig)
display0.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
display0.mtv(plot_exposure.image)
plt.title("original exposure")

plt.sca(ax[1])
display1 = afwDisplay.Display(frame=fig)
display1.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
display1.mtv(plot_injected_exposure.image)
plt.title("injected exposure")

plt.suptitle(str(data_id), y=0.8)
plt.tight_layout()
plt.show()

### Plot a zoomed-in view of the injected_exposure

Here is a zoomed in section of the above.

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(8, 6), dpi=150)

plt.sca(ax[0])
display0 = afwDisplay.Display(frame=fig)
display0.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
display0.mtv(plot_exposure.image)
plt.title("exposure image (zoom)")
plt.xlim(1000, 2500)
plt.ylim(300, 1800)

plt.sca(ax[1])
display1 = afwDisplay.Display(frame=fig)
display1.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
display1.mtv(plot_injected_exposure.image)
plt.title("injected_exposure image (zoom)")
plt.scatter(x, y, marker="o", s=50, fc="none", ec="orange", lw=1.5)
plt.xlim(1000, 2500)
plt.ylim(300, 1800)

plt.suptitle(str(data_id), y=0.8)
plt.tight_layout()
plt.show()

### Plot the differences between the images

It is reassuring to look at a difference image to see the sources we injected.

In [None]:
plot_diff_exposure = exposure.clone()
plot_diff_exposure.image.array = injected_exposure.image.array - exposure.image.array

fig, ax = plt.subplots(1, 2, figsize=(8, 6), dpi=150)

plt.sca(ax[0])
display0 = afwDisplay.Display(frame=fig)
display0.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
display0.mtv(plot_diff_exposure.image)
plt.title("diff image (injected_exposure - exposure)")

plt.sca(ax[1])
display1 = afwDisplay.Display(frame=fig)
display1.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
display1.mtv(plot_diff_exposure.image)
plt.title("diff image with markers")
plt.scatter(x, y, marker="o", s=50, fc="none", ec="orange", lw=1.5)

plt.suptitle(str(data_id), y=0.8)
plt.tight_layout()
plt.show()

In [None]:
del plot_diff_exposure, plot_exposure, plot_injected_exposure
del exposure, injected_exposure

## Run Subtractions

We're going to clear earlier data and load afresh from the Butler products

In [None]:
exposure = butler.get("calexp", dataId=data_id, collections=input_collection)
src = butler.get("src", dataId=data_id, collections=input_collection)

injected_exposure = butler.get(output_exposure_dataset_type, dataId=data_id, collections=injection_input_collection)
injected_catalog = butler.get(output_catalog_dataset_type, dataId=data_id, collections=injection_input_collection)

### Make a Template Image

In [None]:
get_template_task_config = GetTemplateConfig()
get_template_task = GetTemplateTask(config=get_template_task_config)

In [None]:
patches = (0, 1, 2, 7, 8, 9, 14, 15, 16, 17)
band = exposure.getFilter().bandLabel
coadd_exposure_deferred_dataset_handles = butler.registry.queryDatasets(
    "deepCoadd",
    collections=input_collection,
    where=f"instrument='LSSTCam-imSim' AND skymap='DC2' AND tract={tract} AND patch IN {patches} AND band='{band}'",
)

In [None]:
list(coadd_exposure_deferred_dataset_handles)

In [None]:
coadd_exposures = [butler.get(dR) for dR in coadd_exposure_deferred_dataset_handles]

In [None]:
coadd_data_ids = [{"tract": tract, "patch": p, "band": band} for p in patches]
deep_coadd_template = get_template_task.run(coaddExposures=coadd_exposures,
                                            bbox=exposure.getBBox(),
                                            wcs=exposure.getWcs(),
                                            dataIds=coadd_data_ids)

In [None]:
del coadd_exposures

In [None]:
displayc = afwDisplay.Display()
displayc.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
displayc.mtv(deep_coadd_template.template.image)

### Run Original Subtraction

Now we can run a subtraction for each of the original image and for the injected image:

We'll use the source catalog from the original image.  Subtraction is relatively fast (1 min).  The detection and measure steps takes minutes (5-10?).

In [None]:
subtraction_config = AlardLuptonSubtractConfig()
subtraction_task = AlardLuptonSubtractTask(config=subtraction_config)

In [None]:
detect_and_measure_config = DetectAndMeasureConfig(doSkySources=True)
detect_and_measure_task = DetectAndMeasureTask(config=detect_and_measure_config)

In [None]:
subtraction = subtraction_task.run(deep_coadd_template.template, exposure, src)

In [None]:
detections = detect_and_measure_task.run(exposure,
                                         subtraction.matchedTemplate,
                                         subtraction.difference)

#### Save to butler

In [None]:
subtraction_task.config.connections

In [None]:
detect_and_measure_task.config.connections

In [None]:
# We could use the values in the task configs, but we didn't actually set different ones.
# In part to keep things simple, and in part because I don't completely agree.
# E.g., I think we want to use the regular `src` not the `injected_src` to pass to subtraction.
# But clearly the are times when one will want to do the one vs. the other.
label_kwargs = {"fakesType": "", "coaddName": "deepCoadd"}
output_difference_dataset_type = subtraction_task.config.connections.difference.format(**label_kwargs)
output_matchedTemplate_dataset_type = subtraction_task.config.connections.matchedTemplate.format(**label_kwargs)
output_diaSrc_dataset_type = detect_and_measure_task.config.connections.diaSources.format(**label_kwargs)
output_measuredExposure_dataset_type = detect_and_measure_task.config.connections.subtractedMeasuredExposure.format(**label_kwargs)

The differnce between the `subtraction.difference` and `detect_and_measure.subtractedMeasuredExposure` is that the latter adds the footprints recorded in the mask plane for sources measured in the difference image.  So we only save the second one.

In [None]:
butler.put(subtraction.difference, output_difference_dataset_type, dataId=data_id, run=injection_input_collection)
butler.put(subtraction.matchedTemplate, output_matchedTemplate_dataset_type, dataId=data_id, run=injection_input_collection)
butler.put(detections.diaSources, output_diaSrc_dataset_type, dataId=data_id, run=injection_input_collection)
butler.put(detections.subtractedMeasuredExposure, output_measuredExposure_dataset_type, dataId=data_id, run=injection_input_collection);

In [None]:
del detections, exposure, subtraction

### Run Injected Subtraction

In [None]:
injected_subtraction = subtraction_task.run(deep_coadd_template.template, injected_exposure, src)

In [None]:
injected_detections = detect_and_measure_task.run(exposure,
                                                  injected_subtraction.matchedTemplate,
                                                  injected_subtraction.difference)

#### Save Injected Subtraction results

In [None]:
label_kwargs = {"fakesType": "injected_", "coaddName": "deepCoadd"}
output_difference_dataset_type = subtraction_task.config.connections.difference.format(**label_kwargs)
output_matchedTemplate_dataset_type = subtraction_task.config.connections.matchedTemplate.format(**label_kwargs)
output_diaSrc_dataset_type = detect_and_measure_task.config.connections.diaSources.format(**label_kwargs)
output_measuredExposure_dataset_type = detect_and_measure_task.config.connections.subtractedMeasuredExposure.format(**label_kwargs)
output_diaSrc_schema_dataset_type = detect_and_measure_task.config.connections.outputSchema.format(**label_kwargs)

In [None]:
butler.put(injected_subtraction.difference, output_difference_dataset_type, dataId=data_id, run=injection_input_collection)
butler.put(injected_subtraction.matchedTemplate, output_matchedTemplate_dataset_type, dataId=data_id, run=injection_input_collection)
butler.put(injected_detections.diaSources, output_diaSrc_dataset_type, dataId=data_id, run=injection_input_collection)
butler.put(injected_detections.outputSchema, output_diaSrc_schema_dataset_type, dataId=data_id, run=injection_input_collection)
butler.put(injected_detections.subtractedMeasuredExposure, output_measuredExposure_dataset_type, dataId=data_id, run=injection_input_collection);

In [None]:
# diaSrc O SDM -> diaSrcTable
transform_dia_source_config = TransformDiaSourceCatalogConfig
initInputs = {"diaSrcSchema": injected_detections.outputSchema}
transform_dia_source_task = TransformDiaSourceCatalogTask(initInputs, config=transform_dia_source_config)

In [None]:
difference.getInfo().getVisitInfo().id

In [None]:
foo = exposure.getInfo()

In [None]:
transformed_diaSrcTable = transform_dia_source_task.run(injected_detections.diaSources, injected_subtraction.difference, band=difference.getFilter().bandLabel, ccdVisitId=difference.getInfo().getVisitInfo().id)

In [None]:
transform_dia_source_dataset_type = transform_dia_source_task.config.connections.diaSourceTable.format(**label_kwargs)
butler.put(transformed_diaSrcTable, transform_dia_source_dataset_type, dataId=data_id, run=injection_input_collection)

In [None]:
del deep_coadd_template, injected_exposure, injected_detections, injected_subtraction

## Match DIA Detections to Injected Sources

In [None]:
injected_catalog = butler.get("injected_postISRCCD_catalog", dataId=data_id, collections=injection_input_collection)

In [None]:
fakesType = "injected_"
coaddName = "deepCoadd"

difference = butler.get(output_difference_dataset_type, dataId=data_id, collections=injection_input_collection)
diaSrc = butler.get(f"{coaddName}Diff_diaSrc", dataId=data_id, collections=injection_input_collection)
injected_diaSrc = butler.get(f"{fakesType}{coaddName}Diff_diaSrc", dataId=data_id, collections=injection_input_collection)
injected_diaSrc = injected_diaSrc.asAstropy()

In [None]:
from lsst.pipe.tasks.matchFakes import MatchFakesConfig, MatchFakesTask

In [None]:
match_fakes_config = MatchFakesConfig()
match_fakes_task = MatchFakesTask(config=match_fakes_config)

In [None]:
# Updating few columns as a hack to SDMify the columns
injected_diaSrc["ra"] = np.rad2deg(injected_diaSrc["coord_ra"])
injected_diaSrc["dec"] = np.rad2deg(injected_diaSrc["coord_dec"])
# 2023-10-19: diaSrc doesn't have id actually set to anything, so we do a simple index here
injected_diaSrc["diaSourceId"] = list(range(len(injected_diaSrc)))

In [None]:
df = injected_catalog.to_pandas()
df["ra"] = np.deg2rad(df["ra"])
df["dec"] = np.deg2rad(df["dec"])

Clean injected sky coordinates to match diffim bbox

In [None]:
df = match_fakes_task._addPixCoords(fakeCat=df, image=difference)
trimmed_df = match_fakes_task._trimFakeCat(fakeCat=df, image=difference)

In [None]:
print(len(trimmed_df))
print(len(injected_catalog))

In [None]:
matched_fakes = match_fakes_task._processFakes(fakeCat=df, diffIm=difference, associatedDiaSources=injected_diaSrc.to_pandas())

In [None]:
matched_injected_catalog = matched_fakes.matchedDiaSources

In [None]:
df_injected_diaSrc = injected_diaSrc.to_pandas()
df_injected_diaSrc.set_index("diaSourceId", drop=False, inplace=True)
df_injected_catalog = injected_catalog.to_pandas()
df_injected_catalog.set_index("injection_id", drop=False, inplace=True)

In [None]:
unmatched_injected_idx = list(set(injected_catalog["injection_id"]) - set(matched_injected_catalog["injection_id"]))
unmatched_recovered_idx = list(set(injected_diaSrc["diaSourceId"]) - set(matched_injected_catalog["diaSourceId"]))
unmatched_recovered_catalog = df_injected_diaSrc.loc[unmatched_recovered_idx]
unmatched_injected_catalog = df_injected_catalog.loc[unmatched_injected_idx]

In [None]:
print(f"Recovered {len(matched_injected_catalog)} / {len(injected_catalog)} injected sources")
print(f"Found {len(unmatched_recovered_idx)} / {len(injected_diaSrc)} objects in diaSrc that don't match injected catalog")

Note that we haven't checked if any injected catalog objects match several diaSources.  So the numbers above don't have to quite add up.

In [None]:
_, axes = plt.subplots(1, 1, figsize=(8, 4))

ax = axes
# ax.scatter(unmatched_injected_catalog["ra"], unmatched_injected_catalog["dec"], marker=".", color="orange")
ax.scatter(matched_injected_catalog["ra_y"], matched_injected_catalog["dec_y"], marker=".", color="blue")
ax.scatter(np.rad2deg(matched_injected_catalog["ra_x"]), np.rad2deg(matched_injected_catalog["dec_x"]), marker=".", color="red")

# ax.scatter(injected_catalog["ra"], injected_catalog["dec"], marker="o", color="green")
ax.set_xlabel("RA")
ax.set_ylabel("Dec")
ax.set_xlim(ax.get_xlim()[::-1]);

There's an error in the matching.  As one source in the recovered_diaSrc at (57.135862, -30.797897), got matched even though it's clearly not in the injected catlaog.

In [None]:
# np.rad2deg(matched_injected_catalog[["ra_x", "dec_x"]])
matched_injected_catalog[["ra_y", "dec_y"]]

#### Saved matched catalog

In [None]:
butler.put(matched_injected_catalog, "injected_deepCoaddDiff_matchDiaSrc", dataId=data_id, run=injection_input_collection)

## Analyze results

We now have some data products written to our butler repo.  Let's use some `analysis_tools` metrics to analyze.

Example here taken from https://github.com/lsst-sitcom/notebooks_dia/blob/main/explore_auxtel_latiss_dia.ipynb

In [None]:
from lsst.analysis.tools.actions.vector import ConvertUnits, FlagSelector, GoodDiaSourceSelector, LoadVector
from lsst.analysis.tools.atools import SkySourceHistPlot, SkySourceSkyPlot, NumDiaSourcesAllMetric, NumDiaSourcesMetric, NumDipolesMetric

In [None]:
exposure = butler.get("calexp", dataId=data_id, collections=input_collection)
diaSrc = butler.get(f"{coaddName}Diff_diaSrc", dataId=data_id, collections=injection_input_collection)
injected_diaSrc = butler.get(f"{fakesType}{coaddName}Diff_diaSrc", dataId=data_id, collections=injection_input_collection)
injected_diaSrc = injected_diaSrc.asAstropy()

In [None]:
plot_info_dict = {"bands": data_id["band"], "visit": data_id["visit"], "run": injection_input_collection, "tableName": "deepDiff_diaSrc"}

In [None]:
plot = SkySourceHistPlot()

In [None]:
# Use the GoodDiaSourceSelector to get the standard pixel flag selection of good DIA sources
plot.prep.selectors.skySourceSelector = GoodDiaSourceSelector
# And then specify that we just want the sky sources
plot.prep.selectors.skySourceSelector.selectWhenTrue = ["sky_source"]

# We have to rename the columns from the SkySource assumptions of having a DPDD-ified catalog
plot.process.buildActions.hist_psf_flux.vectorKey = "slot_PsfFlux_instFlux"
plot.process.buildActions.hist_ap09_flux.vectorKey = "base_CircularApertureFlux_9_0_instFlux"
plot.process.buildActions.hist_psf_sn.fluxType = "slot_PsfFlux_instFlux"
plot.process.buildActions.hist_ap09_sn.fluxType = "base_CircularApertureFlux_9_0_instFlux"

In [None]:
plot.produce.plot.panels["panel_flux"].bins = 25
plot.produce.plot.panels["panel_sn"].bins = 25

In [None]:
plot.finalize()
results = plot(diaSrc, band=data_id["band"], plotInfo=plot_info_dict)

In [None]:
plot.finalize()
results = plot(injected_diaSrc, band=data_id["band"], plotInfo=plot_info_dict)

In [None]:
plot = SkySourceSkyPlot()

# Use the GoodDiaSourceSelector to get the standard pixel flag selection of good DIA sources
plot.prep.selectors.skySourceSelector = GoodDiaSourceSelector
# And then specify that we just want the sky sources
plot.prep.selectors.skySourceSelector.selectWhenTrue = ["sky_source"]

# We have to rename the columns from the SkySource assumptions of having a DPDD-ified catalog
plot.process.buildActions.z.vectorKey = "base_CircularApertureFlux_9_0_instFlux"
plot.process.buildActions.statMask.fluxType = "slot_PsfFlux_instFlux"

# Convert from rad to degree
# I think this is the simplest way to do it
# We have to override the existing x, y VectorActions
plot.process.buildActions.x = ConvertUnits(buildAction=LoadVector, inUnit="rad", outUnit="degree")
plot.process.buildActions.x.buildAction.vectorKey = "coord_ra"
plot.process.buildActions.y = ConvertUnits(buildAction=LoadVector, inUnit="rad", outUnit="degree")
plot.process.buildActions.y.buildAction.vectorKey = "coord_dec"

In [None]:
plot.finalize()
results = plot(diaSrc, band=data_id["band"], plotInfo=plot_info_dict)

In [None]:
plot.finalize()
results = plot(injected_diaSrc, band=data_id["band"], plotInfo=plot_info_dict)