# Inject Sources in v23 for DIA Improvement

Michael Wood-Vasey and Shu Liu

Based heavily on
https://github.com/lsst/source_injection/blob/tickets/DM-34253/examples/si_demo_dc2_visit.ipynb
and Shu's work on building host galaxy catalogs from CosmoDC2.

This Notebook uses a custom Jupyter kernel to load a version of `source_injection` package with a

setup -j -r ${HOME}/local/lsst/source_injection

Loading this package is most convenient to do in the `eups` world (and thus before the Notebook starts) rather than in the Jupyter notebook.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.ndimage import gaussian_filter

from astropy.table import Table

In [None]:
from lsst.daf.butler import Butler, DimensionUniverse, DatasetType, CollectionType
from lsst.daf.butler.registry import MissingCollectionError
import lsst.afw.display as afwDisplay
from lsst.geom import SpherePoint, degrees
from lsst import sphgeom

afwDisplay.setDefaultBackend("matplotlib")

In [None]:
from lsst.source.injection import (
    BaseInjectConfig,
    BaseInjectTask,
    ExposureInjectConfig,
    ExposureInjectTask,
    VisitInjectConfig,
    VisitInjectTask
)
from lsst.source.injection.utils import ingest_injection_catalog

Need to have a `~/.lsst/db-auth.yaml` file with the db URL, username, and password to load the Butler:

In [None]:
repo = "/global/cfs/cdirs/lsst/production/gen3/DC2/Run2.2i/repo"
butler = Butler(repo)

In [None]:
collections = sorted(list(set(
    butler.registry.queryCollections()
)))

In [None]:
display(collections)

In [None]:
# Let's pick
input_collection = "u/descdm/coadds_Y1_4639"

In [None]:
# Find a calexp
tract = 4639
patch = 8
band = "r"
calexp_DatasetRefs = sorted(list(set(
    butler.registry.queryDatasets(
        "calexp",
        collections = input_collection,
        where=f"instrument='LSSTCam-imSim' AND skymap='DC2' AND tract={tract} AND patch={patch} AND band='{band}'",
    ))))

In [None]:
print(f"Identified {len(calexp_DatasetRefs)} calexp DatasetRefs")

# Let's just pick one and look at it.
display(calexp_DatasetRefs[5])

data_id = calexp_DatasetRefs[5].dataId

print(f"{data_id = }")

In [None]:
calexp = butler.get("calexp", dataId=data_id, collections=input_collection)
src = butler.get("src", dataId=data_id, collections=input_collection)
display(calexp)

## Plot the input calexp

Lets generate a plot of this `calexp` and its associated `mask`.

First, we smooth the `calexp` image (for the purpose of aiding its display in this notebook).

Then, we use afwDisplay to display these data.

In [None]:
Q = 10

plot_calexp = calexp.clone()
plot_calexp.image.array = gaussian_filter(calexp.image.array, sigma=3)

fig, ax = plt.subplots(1, 2, figsize=(8, 6), dpi=150)

plt.sca(ax[0])
display0 = afwDisplay.Display(frame=fig)
display0.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
display0.mtv(plot_calexp.image)
plt.title("calexp image")

plt.sca(ax[1])
display1 = afwDisplay.Display(frame=fig)
display1.scale("linear", min=1, max=2)
display1.mtv(plot_calexp.mask)
plt.title("calexp mask")

plt.suptitle(str(data_id), y=0.8)
plt.tight_layout()
plt.show()

Note that "mask" is an information mask, so the objects show up because their footprint is marked in the "mask" plane.

## Set up a synthetic source input catalogue

We now have a calexp image that we want to inject into.

Use Shu's curated host galaxy catalog to determine locations to inject point sources ("supernovae").

In [None]:
injection_db_path = f"../data/table/gal_{tract}/injection_coord.sqlite"
injection_table_name = "injection_coord"

In [None]:
bands_to_simulate = ["r"]
mags_to_simulate = [20, 21, 22, 23, 24]
si_mag = mags_to_simulate[0]

In [None]:
import sqlite3

conn = sqlite3.connect(injection_db_path)

In [None]:
query = f"SELECT DISTINCT(visit), ra, dec FROM {injection_table_name}"

In [None]:
si_cat = pd.read_sql_query(query, conn)
si_cat.drop(columns=["visit"], inplace=True)
si_cat["mag"] = si_mag
si_cat["source_type"] = "DeltaFunction"
si_cat = Table.from_pandas(si_cat)

display(si_cat[:5])

In [None]:
len(si_cat)

In [None]:
plt.scatter(si_cat["ra"], si_cat["dec"], marker=".")
plt.xlim(plt.xlim()[::-1])
plt.xlabel("RA")
plt.ylabel("Dec")

## Register the source injection collection

The input `si_cat` will be ingested into a RUN collection in the `repo`. Here we register this collection for subsequent use below.

To begin, we first instantiate a writable `butler`. Butlers are instantiated in read-only mode by default. By setting the argument `writeable` to `True`, a butler can also be made to be writeable.

> Warning: take care when working with a writeable butler, as data on-disk has the potential to be permanently removed or corrupted.

We ensure our chosen RUN collection doesn't already exist.

Finally, the source injection collection is registered in the `repo`.

In [None]:
writeable_butler = Butler(repo, writeable=True)

# Write a separate RUN collection for each fake magnitude range.
si_input_collection = f"u/wmwv/si_demo_{si_mag:02.0f}p{si_mag % 1:1d}"

try:
    writeable_butler.removeRuns([si_input_collection])
except MissingCollectionError:
    print("Writing into a new RUN collection")
    pass
else:
    print("Prior RUN collection located and successfully removed")

# Register the collection
_ = writeable_butler.registry.registerCollection(si_input_collection, type=CollectionType.RUN)
print(f"Registered collection: {si_input_collection}")

## Ingest the input catalogue into the repo

Finally, we ingest the input catalogue into the `repo`.
We use the `lsst.source.injection.utils.ingest_injection_catalog` for a little convenience, to make sure we use the same dataset type as in `source_injection`, and to set up how a larger pipetask would do this.  The injection catalogs are arranged sharded by HTM7, so this routine handles that too.

This function also ensures that the `injection_catalog` DatasetType exists in the Butler registry, and creates it if it doesn't exist.

In [None]:
for band in bands_to_simulate:
    ingest_injection_catalog(writeable_butler, si_cat, band=band, output_collection=si_input_collection)

## Instantiate the injection classes

At this stage, we have an input image and we have a fully ingested synthetic source input catalogue. We're now ready to inject synthetic sources into the image using the tools available in the `source_injection` repo.

We'll build up from `BaseInject`->`ExposureInject`->`VisitInject`

`BaseInject` is the basic process of what we're doing.  We take an input catalog, and image, and we run image injection and get an image with the added sources.

`ExposureInject` wraps this with pulling in information from the input and output collection for a single-detector image.
VisitInject takes this to the level of visits.

First, we instantiate the `BaseInjectConfig` class. The `BaseInjectConfig` class is where configuration of the injection task occurs, allowing for modifications to be made to how the task operates.

Following this, we then instantiate the `BaseInjectTask`, using our `BaseInjectConfig` object as the configuration argument.

In [None]:
inject_config = ExposureInjectConfig()

display(inject_config)

inject_task = ExposureInjectTask(config=inject_config)

## Run the source injection task

Finally, we run the run method of the inject task.

As an input, the run method needs:

the input injection catalogue  
the input exposure  
the WCS information  
the photometric calibration information  
the skyMap dataset type.

The skyMap is easily loaded using butler.get. All other inputs are already ready for use at this stage.

As an output, the inject task provides:

the output exposure with sources injected  
the output source injection catalogue  

Note: here we use a clone of the input calexp. This is because the calexp is edited in-place, so inputting a clone allows us to continue using the original calexp later in this notebook.

In [None]:
inject_output = inject_task.run(
    injection_catalogs=si_cat,
    input_exposure=calexp.clone(),
    psf=calexp.getPsf(),
    wcs=calexp.getWcs(),
    photo_calib=calexp.getPhotoCalib(),
)

In [None]:
injected_exposure = inject_output.output_exposure
injected_catalog = inject_output.output_catalog

In [None]:
print(inject_task.config.connections.injected_prefix)
print(inject_task.config.connections.output_exposure)
print(inject_task.config.connections.output_catalog)

In [None]:
# We can resolve the f-style string by passing a dict to the string's format method:
output_exposure_dataset_type = inject_task.config.connections.output_exposure.format(**inject_task.config.connections.toDict())
output_catalog_dataset_type = inject_task.config.connections.output_catalog.format(**inject_task.config.connections.toDict())
print(output_exposure_dataset_type)
print(output_catalog_dataset_type)

Put the outputs into the "injected_postISRCCD{|_catalog}" datasets.  We use the same data_id that we used to load the calexp.

In [None]:
data_id

Create the dataset types that we need.  This only needs to be run for a given butler, so by virtue of me running this demo, these next two lines aren't needed any more.

In [None]:
CREATE_DATASET_TYPES = False
if CREATE_DATASET_TYPES:
    injected_calexp_dataset_type = DatasetType(
        name=output_exposure_dataset_type,
        storageClass="ExposureF",
        dimensions=("instrument", "visit", "detector"),
        universe=DimensionUniverse(),
    )
    writeable_butler.registry.registerDatasetType(injected_calexp_dataset_type)

    injected_catalog_dataset_type = DatasetType(
        name=output_catalog_dataset_type,
        storageClass="ArrowAstropy",
        dimensions=("instrument", "visit", "detector"),
        universe=DimensionUniverse(),
    )
    writeable_butler.registry.registerDatasetType(injected_catalog_dataset_type)
    
    injected_catalog_dataset_type = DatasetType(
        name=output_catalog_dataset_type,
        storageClass="ArrowAstropy",
        dimensions=("instrument", "visit", "detector"),
        universe=DimensionUniverse(),
    )
    writeable_butler.registry.registerDatasetType(injected_catalog_dataset_type)
    
CREATE_DIFF_DATASET_TYPES = False
if CREATE_DIFF_DATASET_TYPES:
    coaddName = "deepCoadd"
    for fakesType in ("", "injected_"):
        deepCoaddDiff_differenceTempExp_dataset_type = DatasetType(
            name=f"{fakesType}{coaddName}Diff_differenceTempExp",
            storageClass="ExposureF",
            dimensions=("instrument", "visit", "detector"),
            universe=DimensionUniverse(),
        )
        writeable_butler.registry.registerDatasetType(deepCoaddDiff_differenceTempExp_dataset_type)

        deepCoaddDiff_matchedExp_dataset_type = DatasetType(
            name=f"{fakesType}{coaddName}Diff_matchedExp",
            storageClass="ExposureF",
            dimensions=("instrument", "visit", "detector"),
            universe=DimensionUniverse(),
        )
        writeable_butler.registry.registerDatasetType(deepCoaddDiff_matchedExp_dataset_type)

        deepCoaddDiff_differenceScoreExp_dataset_type = DatasetType(
            name=f"{fakesType}{coaddName}Diff_scoreExp",
            storageClass="ExposureF",
            dimensions=("instrument", "visit", "detector"),
            universe=DimensionUniverse(),
        )
        writeable_butler.registry.registerDatasetType(deepCoaddDiff_differenceScoreExp_dataset_type)

        deepCoaddDiff_differenceExp_dataset_type = DatasetType(
            name=f"{fakesType}{coaddName}Diff_differenceExp",
            storageClass="ExposureF",
            dimensions=("instrument", "visit", "detector"),
            universe=DimensionUniverse(),
        )
        writeable_butler.registry.registerDatasetType(deepCoaddDiff_differenceExp_dataset_type)

        deepCoaddDiff_diaSrc_dataset_type = DatasetType(
            name=f"{fakesType}{coaddName}Diff_diaSrc",
            storageClass="SourceCatalog",
            dimensions=("instrument", "visit", "detector"),
            universe=DimensionUniverse(),
        )
        writeable_butler.registry.registerDatasetType(deepCoaddDiff_diaSrc_dataset_type)


In [None]:
writeable_butler.put(injected_exposure, output_exposure_dataset_type, dataId=data_id, run=si_input_collection)

In [None]:
writeable_butler.put(injected_catalog, output_catalog_dataset_type, dataId=data_id, run=si_input_collection)

In [None]:
display(injected_catalog[:5])

## Plot the output si_calexp

As before, let's display an image of our newly constructed `si_calexp`.

We similarly smooth the new image first, and then display the `calexp` alongside the `si_calexp` using `afwDisplay`.

In [None]:
Q = 10

plot_si_calexp = injected_exposure.clone()
plot_si_calexp.image.array = gaussian_filter(injected_exposure.image.array, sigma=3)

fig, ax = plt.subplots(1, 2, figsize=(8, 6), dpi=150)

plt.sca(ax[0])
display0 = afwDisplay.Display(frame=fig)
display0.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
display0.mtv(plot_calexp.image)
plt.title("calexp image")

plt.sca(ax[1])
display1 = afwDisplay.Display(frame=fig)
display1.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
display1.mtv(plot_si_calexp.image)
plt.title("injected calexp image")

plt.suptitle(str(data_id), y=0.8)
plt.tight_layout()
plt.show()

## Plot a zoomed-in view of the si_calexp

Here is a zoomed in section of the above.

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(8, 6), dpi=150)

plt.sca(ax[0])
display0 = afwDisplay.Display(frame=fig)
display0.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
display0.mtv(plot_calexp.image)
plt.title("calexp image (zoom)")
plt.xlim(1000, 2500)
plt.ylim(300, 1800)

plt.sca(ax[1])
display1 = afwDisplay.Display(frame=fig)
display1.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
display1.mtv(plot_si_calexp.image)
plt.title("si_calexp image (zoom)")
plt.xlim(1000, 2500)
plt.ylim(300, 1800)

plt.suptitle(str(data_id), y=0.8)
plt.tight_layout()
plt.show()

## Plot the differences between the images

It is reasurring to look at a difference image to see the sources we injected.

In [None]:
# Get the x, y back from the ra, dec in si_cat (because that's all that's saved)
# This feels a little silly.  There should surely be a one-line version of this:
sky = [SpherePoint(ra, dec, degrees) for ra, dec in zip(injected_catalog["ra"], injected_catalog["dec"])]
xy = calexp.wcs.skyToPixel(sky)

x = [i.x for i in xy]
y = [i.y for i in xy]

In [None]:
plot_diff_calexp = calexp.clone()
plot_diff_calexp.image.array = injected_exposure.image.array - calexp.image.array

fig, ax = plt.subplots(1, 2, figsize=(8, 6), dpi=150)

plt.sca(ax[0])
display0 = afwDisplay.Display(frame=fig)
display0.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
display0.mtv(plot_diff_calexp.image)
plt.title("diff image (si_calexp - calexp)")

plt.sca(ax[1])
display1 = afwDisplay.Display(frame=fig)
display1.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
display1.mtv(plot_diff_calexp.image)
plt.title("diff image with markers")
plt.scatter(x, y, marker="o", s=50, fc="none", ec="orange", lw=1.5)

plt.suptitle(str(data_id), y=0.8)
plt.tight_layout()
plt.show()

In [None]:
data_id

Double-check that we can load our datasets that we just saved:

In [None]:
injected_postISRCCD_datarefs = writeable_butler.registry.queryDatasets(
    "injected_postISRCCD",
    collections = si_input_collection,
    where=f"instrument='LSSTCam-imSim' AND visit={data_id['visit']} AND detector={data_id['detector']}",
)

In [None]:
list(injected_postISRCCD_datarefs)

## Now let's run some subtractions

### Utility functions

In [None]:
def detect(science, subtraction):
    # Run detection on subtraction
    # Use option to place "sky" sources to measure blank regions on the image
    detect_and_measure_config = DetectAndMeasureConfig(doSkySources=True)
    detect_and_measure_task = DetectAndMeasureTask(config=detect_and_measure_config)

    detect_and_measure = detect_and_measure_task.run(science,
                                                     subtraction.matchedTemplate,
                                                     subtraction.difference)

    return detect_and_measure

In [None]:
from lsst.ip.diffim import AlardLuptonSubtractConfig, AlardLuptonSubtractTask
from lsst.ip.diffim import GetTemplateConfig, GetTemplateTask
from lsst.ip.diffim import DetectAndMeasureConfig, DetectAndMeasureTask

### Make a Template Image

In [None]:
sky_map = butler.get("skyMap", collections=input_collection, skymap="DC2")

In [None]:
get_template_task_config = GetTemplateConfig()
get_template_task = GetTemplateTask(config=get_template_task_config)

In [None]:
patches = (0, 1, 2, 7, 8, 9, 14, 15, 16, 17)
band = calexp.getFilter().bandLabel
coadd_exposure_deferred_dataset_handles = butler.registry.queryDatasets(
    "deepCoadd",
    collections=input_collection,
    where=f"instrument='LSSTCam-imSim' AND skymap='DC2' AND tract={tract} AND patch IN {patches} AND band='{band}'",
)

In [None]:
list(coadd_exposure_deferred_dataset_handles)

In [None]:
coadd_exposures = [butler.get(dR) for dR in coadd_exposure_deferred_dataset_handles]

In [None]:
coadd_data_ids = [{"tract": tract, "patch": p, "band": band} for p in patches]
deep_coadd_template = get_template_task.run(coaddExposures=coadd_exposures,
                                            bbox=calexp.getBBox(),
                                            wcs=calexp.getWcs(),
                                            dataIds=coadd_data_ids)

In [None]:
del coadd_exposures

In [None]:
displayc = afwDisplay.Display()
displayc.scale("asinh", min=-5/Q, max=25/Q, Q=Q)
displayc.mtv(deep_coadd_template.template.image)

In [None]:
### Run Subtraction

Now we can run a subtraction for each of the original image and for the injected image:

In [None]:
config = AlardLuptonSubtractConfig()
task = AlardLuptonSubtractTask(config=config)

We'll use the source catalog from the original image that we loaded at the beginning

In [None]:
subtraction = task.run(deep_coadd_template.template, calexp, src)

In [None]:
detections = detect(calexp, subtraction)

#### Save to butler

In [None]:
fakesType = ""
coaddName = "deepCoadd"
output_difference_dataset_type = f"{fakesType}{coaddName}Diff_differenceTempExp"
output_matchedTemplate_dataset_type = f"{fakesType}{coaddName}Diff_matchedExp"
output_diaSrc_dataset_type = f"{fakesType}{coaddName}Diff_diaSrc"
output_measuredExposure_dataset_type = f"{fakesType}{coaddName}Diff_differenceExp"

The `subtractedMeasuredExposure` has footprints recorded in the mask plane for sources measured in the difference image.

In [None]:
writeable_butler.put(subtraction.difference, output_difference_dataset_type, dataId=data_id, run=si_input_collection)
writeable_butler.put(subtraction.matchedTemplate, output_matchedTemplate_dataset_type, dataId=data_id, run=si_input_collection)
writeable_butler.put(detections.diaSources, output_diaSrc_dataset_type, dataId=data_id, run=si_input_collection)
writeable_butler.put(detections.subtractedMeasuredExposure, output_measuredExposure_dataset_type, dataId=data_id, run=si_input_collection);

In [None]:
#### Run Injected Subtraction

In [None]:
injected_subtraction = task.run(deep_coadd_template.template, injected_exposure, src)

In [None]:
injected_detections = detect(calexp, injected_subtraction)

#### Save Injected Subtraction results

In [None]:
fakesType = "injected_"
coaddName = "deepCoadd"
output_difference_dataset_type = f"{fakesType}{coaddName}Diff_differenceTempExp"
output_matchedTemplate_dataset_type = f"{fakesType}{coaddName}Diff_matchedExp"
output_diaSrc_dataset_type = f"{fakesType}{coaddName}Diff_diaSrc"
output_measuredExposure_dataset_type = f"{fakesType}{coaddName}Diff_differenceExp"

In [None]:
writeable_butler.put(injected_subtraction.difference, output_difference_dataset_type, dataId=data_id, run=si_input_collection)
writeable_butler.put(injected_subtraction.matchedTemplate, output_matchedTemplate_dataset_type, dataId=data_id, run=si_input_collection)
writeable_butler.put(injected_detections.diaSources, output_diaSrc_dataset_type, dataId=data_id, run=si_input_collection)
writeable_butler.put(injected_detections.subtractedMeasuredExposure, output_measuredExposure_dataset_type, dataId=data_id, run=si_input_collection);

## Analyze results

We now have some data products written to our butler repo.  Let's use some `analysis_tools` metrics to analyze.

Example here taken from https://github.com/lsst-sitcom/notebooks_dia/blob/main/explore_auxtel_latiss_dia.ipynb

In [None]:
from lsst.analysis.tools.actions.vector import ConvertUnits, FlagSelector, GoodDiaSourceSelector, LoadVector
from lsst.analysis.tools.atools import SkySourceHistPlot, SkySourceSkyPlot, NumDiaSourcesAllMetric, NumDiaSourcesMetric, NumDipolesMetric

In [None]:
diaSrc = detections.diaSources
injected_diaSrc = injected_detections.diaSources

In [None]:
plot_info_dict = {"bands": data_id["band"], "visit": data_id["visit"], "run": si_input_collection, "tableName": "deepDiff_diaSrc"}

In [None]:
plot = SkySourceHistPlot()

In [None]:
# Use the GoodDiaSourceSelector to get the standard pixel flag selection of good DIA sources
plot.prep.selectors.skySourceSelector = GoodDiaSourceSelector
# And then specify that we just want the sky sources
plot.prep.selectors.skySourceSelector.selectWhenTrue = ["sky_source"]

# We have to rename the columns from the SkySource assumptions of having a DPDD-ified catalog
plot.process.buildActions.hist_psf_flux.vectorKey = "slot_PsfFlux_instFlux"
plot.process.buildActions.hist_ap09_flux.vectorKey = "base_CircularApertureFlux_9_0_instFlux"
plot.process.buildActions.hist_psf_sn.fluxType = "slot_PsfFlux_instFlux"
plot.process.buildActions.hist_ap09_sn.fluxType = "base_CircularApertureFlux_9_0_instFlux"

In [None]:
plot.produce.plot.panels["panel_flux"].bins = 25
plot.produce.plot.panels["panel_sn"].bins = 25

In [None]:
plot.finalize()
results = plot(diaSrc, band=data_id["band"], plotInfo=plot_info_dict)

In [None]:
plot.finalize()
results = plot(injected_diaSrc, band=data_id["band"], plotInfo=plot_info_dict)

In [None]:
plot = SkySourceSkyPlot()

# Use the GoodDiaSourceSelector to get the standard pixel flag selection of good DIA sources
plot.prep.selectors.skySourceSelector = GoodDiaSourceSelector
# And then specify that we just want the sky sources
plot.prep.selectors.skySourceSelector.selectWhenTrue = ["sky_source"]

# We have to rename the columns from the SkySource assumptions of having a DPDD-ified catalog
plot.process.buildActions.z.vectorKey = "base_CircularApertureFlux_9_0_instFlux"
plot.process.buildActions.statMask.fluxType = "slot_PsfFlux_instFlux"

# Convert from rad to degree
# I think this is the simplest way to do it
# We have to override the existing x, y VectorActions
plot.process.buildActions.x = ConvertUnits(buildAction=LoadVector, inUnit="rad", outUnit="degree")
plot.process.buildActions.x.buildAction.vectorKey = "coord_ra"
plot.process.buildActions.y = ConvertUnits(buildAction=LoadVector, inUnit="rad", outUnit="degree")
plot.process.buildActions.y.buildAction.vectorKey = "coord_dec"

In [None]:
plot.finalize()
results = plot(diaSrc, band=data_id["band"], plotInfo=plot_info_dict)

In [None]:
plot.finalize()
results = plot(injected_diaSrc, band=data_id["band"], plotInfo=plot_info_dict)