Merge pull request #23 from Yu-AnChen/master

Compress output image files
HMS-IDAC · Sep 5, 2023 · 8eec1cd · 8eec1cd
2 parents cfdf9e1 + b391eb4
commit 8eec1cd
Show file tree

Hide file tree

Showing 10 changed files with 381 additions and 179 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -43,7 +43,7 @@ jobs:
           cd ~/data
           rm -rf large
           mkdir large
-          docker run -v "$PWD":/data s3seg:test-large /bin/bash -c "cd /data; \
+          docker run -v "$PWD":/data -u root s3seg:test-large /bin/bash -c "cd /data; \
             python /app/S3segmenter.py --imagePath $IMG --stackProbPath $PMAP --outputPath large"
 
       # If the action is successful, the output will be available as a downloadable artifact

diff --git a/large/.dev/create-docker-env.md b/large/.dev/create-docker-env.md
@@ -0,0 +1,40 @@
+# Note for creating/building docker image for palom ([reference](https://micromamba-docker.readthedocs.io/en/latest/advanced_usage.html#advanced-usages))
+
+1. Create reference env on micromamba's docker image
+
+    ```bash
+    # Run bash in micromamba docker image with bind volume for writing out env
+    # lock file
+    docker run -it --rm --platform linux/amd64 -v "$(pwd)":/data mambaorg/micromamba:1.4.9 bash
+    ```
+
+    ```bash
+    # Manually install known deps in `palom` env 
+    micromamba create -y -n palom python=3.10 "scikit-image<0.20" scikit-learn "zarr<2.15" tifffile imagecodecs matplotlib tqdm scipy dask numpy loguru=0.5.3 "ome-types>0.3" "pydantic<2" pint napari-lazy-openslide yamale fire termcolor dask-image -c conda-forge
+
+
+    # Use `pip install --dry-run` to verify, would only expect to see `opencv`,
+    # and `palom`
+    micromamba activate palom
+    python -m pip install --dry-run palom
+    # output: Would install opencv-python-4.8.0.76 palom-2023.8.1
+
+
+    # if the above checks out, export micromamba env as lock file
+    micromamba env export --explicit > /data/docker-env.lock
+
+
+    # pip install the rest of the packages, note: use `opencv-python-headless`
+    # instead of `opencv-python`
+    python -m pip install --no-deps palom==2023.8.1 opencv-python-headless==4.8.0.76
+
+
+    # Test the environment
+    python -c "import cv2; cv2.blur"
+    ```
+
+1. When building the docker image, specify `--platform linux/amd64`
+
+    ```bash
+    docker build --platform linux/amd64 --tag test-s3seg-large .
+    ```
diff --git a/large/Dockerfile b/large/Dockerfile
@@ -1,17 +1,18 @@
-FROM python:3.10
+FROM mambaorg/micromamba:1.4.9
 
-RUN python -m pip install \
-    palom>=2022.9.1 \ 
-    dask[dataframe] \
-    dask-image \
-    ome_types
-RUN python -m pip uninstall opencv-python -y
-RUN python -m pip install opencv-python-headless
+COPY --chown=$MAMBA_USER:$MAMBA_USER docker-env.lock /tmp/docker-env.lock
+RUN micromamba install --name base --yes --file /tmp/docker-env.lock \
+    && micromamba clean --trash -aflp --yes
 
-COPY S3segmenter.py ./app/S3segmenter.py
-COPY save_tifffile_pyramid.py ./app/save_tifffile_pyramid.py
-COPY watershed.py ./app/watershed.py
-COPY s3seg_qc.py ./app/s3seg_qc.py
-COPY s3seg_util.py ./app/s3seg_util.py
-COPY ignored_args.py ./app/ignored_args.py
+# pip install packages that are not available/problematic on conda-forge
+RUN /opt/conda/bin/python -m pip install \
+    --no-deps \
+    opencv-python-headless==4.8.0.76 \
+    palom==2023.8.1 \
+    && /opt/conda/bin/python -m pip cache purge
 
+# copy scripts to /app/
+COPY / /app/
+
+# add conda path to PATH to allow entrypoint overwrite
+ENV PATH="${PATH}:/opt/conda/bin"
diff --git a/large/S3segmenter.py b/large/S3segmenter.py
@@ -3,12 +3,13 @@
 import pathlib 
 
 import watershed 
-import s3seg_util
 import s3seg_qc
 import ignored_args
 
 import logging
-
+
+import palom.reader
+
 def main(argv=sys.argv): 
 
     parser = argparse.ArgumentParser( 
@@ -67,13 +68,14 @@ def main(argv=sys.argv):
         pixel_size = args.pixelSize
         logging.info(f"Pixel size: {pixel_size} (user supplied)")
     else:
-        pixel_size = s3seg_util.detect_pixel_size(img_path)
-        if pixel_size is None:
+        try:
+            pixel_size = palom.reader.OmePyramidReader(img_path).pixel_size
+        except Exception as err:
+            print(err)
             logging.error(
                 'Auto-detect pixel size failed, use `--pixelSize SIZE` to specify it'
             )
             return 1
-        logging.info(f"Pixel size: {pixel_size} (from ome-xml)")
 
     watershed.main([ 
         '', 
@@ -89,7 +91,7 @@ def main(argv=sys.argv):
 
     s3seg_qc.run_mcmicro(
         out_path,
-        qc_dir / 'nucleiRingOutlines.ome.tif',
+        qc_dir / f"{img_stem}-nucleiRingOutlines.ome.tif",
         pmap_path=args.stackProbPath,
         img_path=args.imagePath,
         img_channels=img_channels,

diff --git a/large/_version.py b/large/_version.py
@@ -0,0 +1 @@
+VERSION = '1.5.5'
diff --git a/large/docker-env.lock b/large/docker-env.lock
diff --git a/large/s3seg_qc.py b/large/s3seg_qc.py
@@ -7,6 +7,8 @@
 import tifffile
 import palom
 
+import _version
+
 
 def mask_to_bound(
     img_da,
@@ -86,11 +88,15 @@ def run_mcmicro(
     da_stack = da.array(out_channels)
 
     palom.pyramid.write_pyramid(
-        palom.pyramid.normalize_mosaics(da_stack),
+        [da_stack],
         out_path,
-        channel_names=channel_names,
+        channel_names=[channel_names],
         pixel_size=pixel_size,
-        downscale_factor=2
+        downscale_factor=2,
+        compression='zlib',
+        tile_size=1024,
+        save_RAM=True,
+        kwargs_tifffile=dict(software=f"s3segmenter-large v{_version.VERSION}")       
     )
 
     return 0
diff --git a/large/s3seg_util.py b/large/s3seg_util.py
diff --git a/large/save_tifffile_pyramid.py b/large/save_tifffile_pyramid.py
diff --git a/large/watershed.py b/large/watershed.py
@@ -21,9 +21,10 @@
     format="%(asctime)s | %(levelname)-8s | %(message)s (%(filename)s:%(lineno)s)", 
     datefmt="%Y-%m-%d %H:%M:%S", 
     level=logging.INFO 
-) 
+)
+
+import _version
 
-import save_tifffile_pyramid 
 
 def filter_label_area(label_img, area_min, area_max): 
     if np.all(label_img == 0): 
@@ -120,21 +121,27 @@ def run(self, config_id=None, compute=True):
     def write(self, file_path, img=None, config_id=None): 
         file_path = pathlib.Path(file_path) 
         file_name = file_path.name 
+        if img is None: img = self.run(config_id, compute=False) 
         if file_name.endswith('.zarr'): 
-            if img is None: img = self.run(config_id, compute=False) 
             logging.info(f'Writing to {file_path}') 
             with dask.diagnostics.ProgressBar(): 
                 return img.to_zarr(file_path) 
         if file_name.endswith(('.ome.tiff', '.ome.tif')): 
-            if img is None: img = self.run(config_id) 
-            logging.info(f'Writing to {file_path}') 
+            logging.info(f'Writing to {file_path}')
+            pixel_size = self.pixel_size
             if self.pixel_size is None:
-                pixel_sizes = (1, 1)
-            else: 
-                pixel_sizes = (self.pixel_size, self.pixel_size)
-            return save_tifffile_pyramid.save_pyramid( 
-                img, file_path, is_mask=True, pixel_sizes=pixel_sizes
-            ) 
+                pixel_size = 1
+            return palom.pyramid.write_pyramid(
+                [img],
+                file_path,
+                pixel_size=pixel_size,
+                downscale_factor=2,
+                compression='zlib',
+                is_mask=True,
+                tile_size=1024,
+                save_RAM=True,
+                kwargs_tifffile=dict(software=f"s3segmenter-large v{_version.VERSION}")
+            )
         logging.warning('Write failed: output file type not supported') 
         return 
 
@@ -406,10 +413,14 @@ def main(argv=sys.argv):
 
     pixel_size = args.pixel_size
     if pixel_size is None:
-        pixel_size = 1.0
-        logging.warning(
-            f"Pixel size not specified, using {pixel_size} micron as a placeholder"
-        ) 
+        try:
+            pixel_size = palom.reader.OmePyramidReader(args.i).pixel_size
+        except Exception as err:
+            print(err)
+            pixel_size = 1.0
+            logging.warning(
+                f"Pixel size not specified, using {pixel_size} µm as a placeholder"
+            ) 
 
     segmentor = WatershedSegmentor( 
         da.from_array(probability_maps[1], chunks=2048), 
@@ -473,12 +484,11 @@ def expand_mask_from_file(
         tifffile.imread(input_path, aszarr=True, series=0, level=0) 
     ) 
 
-    with dask.diagnostics.ProgressBar(): 
-        segmentor.write_expanded( 
-            expanded_path, 
-            expand_size, 
-            da.from_zarr(z).rechunk(2048) 
-        ) 
+    segmentor.write_expanded( 
+        expanded_path, 
+        expand_size, 
+        da.from_zarr(z).rechunk(2048) 
+    ) 
     return expanded_path 
 
 def difference_mask_from_file(
@@ -508,11 +518,10 @@ def difference_mask_from_file(
     segmentor = WatershedSegmentor( 
         None, None, pixel_size=pixel_size
     ) 
-    with dask.diagnostics.ProgressBar(): 
-        segmentor.write( 
-            file_path=output_path, 
-            img=out_mask 
-        ) 
+    segmentor.write( 
+        file_path=output_path, 
+        img=out_mask 
+    ) 
     return