From 6aa41925bd2e287f83ac51804e1cd99997f2e614 Mon Sep 17 00:00:00 2001 From: Ian Czekala Date: Mon, 25 May 2026 09:56:28 +1000 Subject: [PATCH] Update ImageMath operation to follow PIL security fix. --- 01-generate-mock-baselines/.gitignore | 2 ++ 01-generate-mock-baselines/README.md | 27 ++++++++++++++++--- 01-generate-mock-baselines/Snakefile | 23 +--------------- .../create_butterfly.py | 5 ++-- 01-generate-mock-baselines/requirements.txt | 3 +-- 5 files changed, 29 insertions(+), 31 deletions(-) create mode 100644 01-generate-mock-baselines/.gitignore diff --git a/01-generate-mock-baselines/.gitignore b/01-generate-mock-baselines/.gitignore new file mode 100644 index 0000000..19d8c2e --- /dev/null +++ b/01-generate-mock-baselines/.gitignore @@ -0,0 +1,2 @@ +data +.snakemake \ No newline at end of file diff --git a/01-generate-mock-baselines/README.md b/01-generate-mock-baselines/README.md index 5adcb4f..60734da 100644 --- a/01-generate-mock-baselines/README.md +++ b/01-generate-mock-baselines/README.md @@ -2,15 +2,34 @@ This example generates a mock sky brightness image and a realistic set of baselines $(u,v)$, which are later used by other examples in this repository, and as a fixture within the MPoL test suite. It uses the IM Lup DSHARP dataset for realistic baselines, weight values, and source flux. +# Prerequisite -# Description of Contents +You should have already downloaded and extracted the IM Lup DSHARP dataset as described in [Example 00](../00-download-and-extract-datasets/README.md). Then, you will need to copy the `IM_Lup_baselines_and_weights.npz` into this directory in a new `data` folder. For example, from within this 01 folder, run + +```shell +$ mkdir data +$ cp ../00-download-and-extract-datasets/data/IM_Lup_baselines_and_weights.npz data/ +``` + +# Installation + +You can install necessary Python packages into your environment by +```shell +$ pip install -r requirements.txt +``` +and then you can run the code by + +```shell +snakemake -c1 all +``` + +# Description of Contents -Note that this script does not sample mock visibility values $\mathcal{V}(u,v)$. That is done on the fly using `mpol.fourier.generate_fake_data` in scripts like `sgd/src/load_data.py`, so that sky image size, flux, and measurement noise level can be adjusted as needed. +Note that this script does not actually sample mock visibility values $\mathcal{V}(u,v)$. That is done on the fly using `mpol.fourier.generate_fake_data` in scripts like `sgd/src/load_data.py`, so that sky image size, flux, and measurement noise level can be adjusted as needed on demand. * `create_butterfly.py` downloads a nice looking image from the `ceyda/smithsonian_butterflies` collection, uses PIL to greyscale and crop it, adjusts the flux value to match DSHARP IM Lup, then saves it as a numpy array. -* `export_baselines.py` uses MPoL-dev/visread and casatools to extract real baselines from the IM Lup measurement set, and saves them as a numpy array. To save space, we take <5% of the visibilities. * `package_data.py` combines the two numpy arrays into a single archive, saved as `float32` to save space. -Will create this mock image (sourced from `ceyda/smithsonian_butterflies`) stuffed into `data/mock_data.npz` +The end result will be this mock image (sourced from `ceyda/smithsonian_butterflies`) stuffed into `data/mock_data.npz` ![Mock Image](img_plot.png) \ No newline at end of file diff --git a/01-generate-mock-baselines/Snakefile b/01-generate-mock-baselines/Snakefile index be9fe87..b2e20b6 100644 --- a/01-generate-mock-baselines/Snakefile +++ b/01-generate-mock-baselines/Snakefile @@ -1,28 +1,7 @@ rule all: input: - "data/ms.ms", "data/mock_data.npz" -rule download_ms: - output: temp("data/IMLup_continuum.ms.tgz") - shell: "wget https://almascience.eso.org/almadata/lp/DSHARP/MSfiles/IMLup_continuum.ms.tgz --directory-prefix=data/" - -rule untar_and_rename: - input: "data/IMLup_continuum.ms.tgz" - output: directory("data/ms.ms") - shell: - "tar -xf {input} -C data/ --no-same-owner && " - "mv data/IMLup_continuum.ms {output}" - -# only uu and vv that have *all channels unflagged* -# only 1 channel is taken from those available: [1, 8, 16] -# 0.05 taken randomly from those -# saved using float32 -rule export_baselines: - input: "data/ms.ms" - output: npz="data/baselines_and_weights.npz", plot="data/baseline_plot.png" - shell: "python export_baselines.py {input} {output.npz} {output.plot} --select_fraction 0.05" - # saved using float32 rule export_img: output: img="data/img.npy", plot="data/img_plot.png" @@ -31,7 +10,7 @@ rule export_img: # now package these together into a single .npz rule package: input: - baselines="data/baselines_and_weights.npz", + baselines="data/IM_Lup_baselines_and_weights.npz", img="data/img.npy" output: "data/mock_data.npz" diff --git a/01-generate-mock-baselines/create_butterfly.py b/01-generate-mock-baselines/create_butterfly.py index c4cff4f..7997744 100644 --- a/01-generate-mock-baselines/create_butterfly.py +++ b/01-generate-mock-baselines/create_butterfly.py @@ -2,7 +2,6 @@ from PIL import Image, ImageOps, ImageMath import numpy as np import matplotlib.pyplot as plt -import matplotlib.colors as mco import argparse dataset = load_dataset("ceyda/smithsonian_butterflies") @@ -36,7 +35,7 @@ def process_image( # apodize im_apod = apodization_function(xsize, ysize, centerfrac, scale) - im_res = ImageMath.eval("a * b", a=im_invert, b=im_apod) + im_res = ImageMath.lambda_eval(lambda d: d["a"] * d["b"], a=im_invert, b=im_apod) # pad to square max_dim = np.maximum(xsize, ysize) @@ -50,7 +49,7 @@ def process_image( a = np.array(im_small) # resizing operation can create some negative pixels, so - # best to just set these to mimimum, which should be 0 + # best to just set these to the minimum, which should be 0 a[a < 0] = 0.0 b = a.astype("float64") diff --git a/01-generate-mock-baselines/requirements.txt b/01-generate-mock-baselines/requirements.txt index d59d5ce..6f868e3 100644 --- a/01-generate-mock-baselines/requirements.txt +++ b/01-generate-mock-baselines/requirements.txt @@ -1,6 +1,5 @@ -casatools -casadata numpy snakemake datasets +matplotlib Pillow \ No newline at end of file