### Preprocessing from MAST data

This notebook covers processing the data from `*_uncal.fits` files, through to creating drizzled mosaics.

In [1]:
import os
from pathlib import Path

# This is currently a necessity; newer pipeline reductions do not work well with grizli
os.environ["CRDS_CONTEXT"] = "jwst_1173.pmap"

# Replace the following lines with your preferred directory structure
root_dir = Path(os.getenv("ROOT_DIR"))

# The directory containing the MAST downloads
uncal_dir = root_dir / "archival" / "MAST_2024-10-03T09_25_33.967Z" / "JWST"

# The output directory
raw_output_dir = root_dir / "archival" / "JWST" / "A2744_CTX_1173"
raw_output_dir.mkdir(exist_ok=True, parents=True)

We begin by processing the `*_uncal.fits` files using the `jwst` pipeline package, to produce the `*_rate.fits` files that `grizli` uses.

In [None]:
from jwst.pipeline import Detector1Pipeline

pipe = Detector1Pipeline()

pipe.save_results = True
pipe.output_dir = str(raw_output_dir)
pipe.jump.maximum_cores = "8"

for file in uncal_dir.glob("*uncal.fits"):
    output_filename = (raw_output_dir / file.name).with_stem(
        file.stem.replace("_uncal", "_rate")
    )
    if output_filename.is_file():
        print(f"{file.name} exists.")
        continue
    else:
        pipe.output_file = output_filename.stem.strip("_rate")
        pipe.run(
            str(file),
        )

We import all the necessary packages, and setup the `grizli` directory structure. For further details on installing and configuring `grizli`, refer to the [dedicated installation instructions](https://grizli.readthedocs.io/en/latest/grizli/install.html).

In [None]:
import shutil, logging
from astropy.io import fits
import grizli
from grizli import utils, prep, jwst_utils, multifit
from grizli.pipeline import auto_script

print("Grizli version: ", grizli.__version__)

# Quiet JWST log warnings
jwst_utils.QUIET_LEVEL = logging.INFO
jwst_utils.set_quiet_logging(jwst_utils.QUIET_LEVEL)

root_name = "glass-a2744"

# Setup the grizli directory structure
grizli_home_dir = root_dir / "2024_08_16_A2744_v4" / "grizli_home"

grizli_home_dir.mkdir(exist_ok=True, parents=True)
(grizli_home_dir / "Prep").mkdir(exist_ok=True)
(grizli_home_dir / "RAW").mkdir(exist_ok=True)
(grizli_home_dir / "visits").mkdir(exist_ok=True)

We use the `grizli` association files in our processing. If our observations did not have these files (e.g. PASSAGE), it would be necessary to create them; the functions in `grizli.aws` require them. 

We also copy the `*_rate.fits` files to the visit directories. If we skip this step, `grizli` will automatically download the `rate` files from MAST, which will typically have been processed with the most up-to-date CRDS context. At some point in the future, this may even be desirable.

In [None]:
os.chdir(grizli_home_dir / "visits")

from grizli import utils
from grizli.aws import visit_processor

# Cluster coordinates
ra, dec = 3.58641, -30.39997

# Self explanatory
proposal_id = 1324

# search radius, arcmin
radius = 1

QUERY_URL = "https://grizli-cutout.herokuapp.com/assoc?coord={ra},{dec}&arcmin={radius}&output=csv"

assoc_query = utils.read_catalog(
    QUERY_URL.format(ra=ra, dec=dec, radius=radius), format="csv"
)

nis = (assoc_query["instrument_name"] == "NIRISS") & (
    assoc_query["proposal_id"] == proposal_id
)

print(
    assoc_query[
        "assoc_name", "target", "proposal_id", "filter", "instrument_name", "status"
    ][nis]
)

EXPOSURE_API = "https://grizli-cutout.herokuapp.com/exposures?associations={assoc}"

for assoc in assoc_query["assoc_name"][nis]:
    if not (grizli_home_dir / assoc / "Prep").is_dir():

        exp = utils.read_catalog(EXPOSURE_API.format(assoc=assoc), format="csv")

        # Make all the directories
        assoc_dir = grizli_home_dir / "visits" / assoc
        (assoc_dir / "RAW").mkdir(exist_ok=True, parents=True)
        (assoc_dir / "Persistence").mkdir(exist_ok=True, parents=True)
        (assoc_dir / "Extractions").mkdir(exist_ok=True, parents=True)
        (assoc_dir / "Prep").mkdir(exist_ok=True, parents=True)

        # Only copy files if this visit hasn't been processed yet
        if len([*(assoc_dir / "Prep").glob("*drz_sci.fits")]) == 0:
            for filename in exp["dataset"]:
                try:
                    shutil.copy(
                        raw_output_dir / f"{filename}_rate.fits", assoc_dir / "RAW"
                    )
                except Exception as e:
                    print(e)
                    print(f"{filename} not found.")

Process the `*_rate.fits` files with the default parameters. If a non-standard processing is desired (e.g. skipping the iterative alignment), this is the place to change things.

In [None]:
for assoc in assoc_query["assoc_name"][nis]:
    if len([*(grizli_home_dir / "visits" / assoc / "Prep").glob("*drz_sci.fits")]) == 0:
        _ = visit_processor.process_visit(
            assoc,
            clean=False,
            sync=False,
            with_db=False,
            other_args={
                "CRDS_CONTEXT": os.environ["CRDS_CONTEXT"],
                "mosaic_drizzle_args": {"context": os.environ["CRDS_CONTEXT"]},
            },
        )
    else:
        print(f"Directory {assoc} found, local preprocesing complete!")

In [6]:
os.chdir(grizli_home_dir / "Prep")

# Symlink preprocessed exposure files here
for assoc in assoc_query['assoc_name'][nis]:
    !ln -sf ../visits/{assoc}/Prep/*rate.fits . 

The next step is to make drizzled mosaics from all of the processed files in each filter. For NIRISS, this function creates the mosaics with the `n-clear` filter suffix.

In [None]:
import numpy as np
from astropy.wcs import WCS

files = [str(s) for s in (grizli_home_dir / "Prep").glob("*rate.fits")]
files.sort()
res = visit_processor.res_query_from_local(files=files)
is_grism = np.array(["GR" in filt for filt in res["filter"]])

# Mosaic WCS that contains the exposures, but could come from somewhere else
hdu = utils.make_maximal_wcs(
    files=files, pixel_scale=0.03, pad=6, get_hdu=True, verbose=False
)

ref_wcs = WCS(hdu.header)

_ = visit_processor.cutout_mosaic(
    root_name,
    res=res[~is_grism],  # Pass the exposure information table for the direct images
    ir_wcs=ref_wcs,
    half_optical=False,  # Otherwise will make JWST exposures at half pixel scale of ref_wcs
    kernel="square",  # Drizzle parameters
    pixfrac=0.8,
    clean_flt=False,  # Otherwise removes "rate.fits" files from the working directory!
    s3output=None,
    make_exptime_map=False,
    weight_type="jwst",
    skip_existing=False,
    context=os.environ["CRDS_CONTEXT"],
)

If you want a stacked mosaic from all of the filters, we need a bit of a work around. The `grizli` processing steps above create separate `*_visits.yaml` files for each visit, and so we need to combine them into a single file. This allows us to use the older `grizli.pipeline.auto_script` functions to create a combined mosaic.

In [8]:
from grizli.pipeline import auto_script
from astropy.table import vstack

visits, groups, info = [], [], None
for assoc in assoc_query[nis]["assoc_name"]:
    v, g, i = auto_script.load_visits_yaml(
        grizli_home_dir / "visits" / assoc / "Prep" / f"{assoc}_visits.yaml"
    )
    for j, v_j in enumerate(v):
        v[j]["footprints"] = [fp for fps in v_j["footprints"] for fp in fps]
    for j, g_j in enumerate(g):
        for img_type in g_j.keys():
            try:
                g[j][img_type]["footprints"] = [
                    fp for fps in g_j[img_type]["footprints"] for fp in fps
                ]
            except:
                print(g[j])

    visits.extend(v)
    groups.extend(g)
    if info is None:
        info = i
    else:
        info = vstack([info, i])

auto_script.write_visit_info(visits, groups, info, root_name)

Note the NIRISS suffix on the filter combinations.

In [None]:
auto_script.make_filter_combinations(
    root_name,
    filter_combinations={"ir": ["F115WN-CLEAR", "F150WN-CLEAR", "F200WN-CLEAR"]},
)