The notebook is responsible for compressing the data using both classical and deep-learning based methods. Please download the dataset by executing `download_data.ipynb` beforehand.

In [None]:
%load_ext autoreload
%autoreload 2

In [14]:
import os, sys

sys.path.append("../../")

import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

import pandas as pd
import quilt3
from pathlib import Path
from aicsimageio import AICSImage
from aicsimageio.writers import OmeTiffWriter
from random import random
from tqdm import tqdm
from utils import compression_ratio_space_savings, plot_data, write_metrics

os.environ['CUDA_VISIBLE_DEVICES'] = "3"
!export CUDA_VISIBLE_DEVICES=3 

In [15]:
# set parameters

# which cell line to download: in the paper, we tested on four nuclear structures:
# - fibrillarin (cline = "FBL")
# - nucleophosmin (cline = "NPM1")
# - lamin b1 (cline = "LMNB1")
# - histon H2B (cline = "HIST1H2BJ")
cline = "FBL"

# set up path 3D
parent_path_3d = Path("../../../../data/labelfree3D") / f"{cline}"
train_path_3d = parent_path_3d / Path("train")
holdout_path_3d = parent_path_3d / Path("holdout")

# set up path 2D
parent_path_2d = Path("../../../../data/labelfree2D") / f"{cline}"
train_path_2d = parent_path_2d / Path("train")
holdout_path_2d = parent_path_2d / Path("holdout")

# Classic Compression

In [5]:
### Defining compression method (by r/w tmp file)
import tifffile
import os


def wasteful_compression(img, compression):
    try:
        os.remove("./temp.tiff")
    except OSError:
        pass
    tifffile.imwrite(
        "./temp.tiff",
        img,
        bigtiff=True,
        photometric="minisblack",
        planarconfig="separate",
        tile=(16, 16),
        compression=compression,
        compressionargs={"level": 8},
        metadata={"axes": "YX"},
    )
    sample_path = Path("./temp.tiff")
    reader = AICSImage(sample_path)
    img = reader.get_image_data("YX")
    return img


compression_techniques = ["JPEGXR", "JPEG_2000_LOSSY", "LERC"]

In [11]:
### Compression with classic techniques and appending 'original' 'method'
import shutil


def compress_dir(compression, path, copy_gt=True):
    output_path = path / Path(compression)
    output_path.mkdir(parents=True, exist_ok=True)
    for image_path in tqdm(path.glob("*.tiff")):
        output_image_path = output_path / Path(image_path.name)
        if not output_image_path.is_file():
            if image_path.name.endswith("GT.tiff") and copy_gt:
                shutil.copyfile(image_path, output_image_path)
            elif image_path.name.endswith("IM.tiff"):
                reader = AICSImage(image_path)
                img = reader.get_image_data("YX")
                OmeTiffWriter.save(
                    wasteful_compression(img, compression),
                    output_image_path,
                    dim_order="YX",
                )


def extract_origin_dir(path):
    output_path = path / Path("original")
    output_path.mkdir(parents=True, exist_ok=True)
    for image_path in path.glob("*IM.tiff"):
        output_image_path = output_path / Path(image_path.name)
        if not output_image_path.is_file():
            shutil.copyfile(image_path, output_image_path)


for i, compression in enumerate(compression_techniques):
    print(f"step [{i+1}/{len(compression_techniques)}]: {compression}")
    compress_dir(compression, train_path_2d, True)
    compress_dir(compression, holdout_path_2d, False)

extract_origin_dir(holdout_path_2d)
compression_techniques.append("original")

step [1/3]: JPEGXR


806it [02:17,  5.85it/s]
194it [00:32,  5.95it/s]


step [2/3]: JPEG_2000_LOSSY


806it [05:23,  2.49it/s]
194it [01:16,  2.53it/s]


step [3/3]: LERC


806it [01:20, 10.05it/s]
194it [00:18, 10.31it/s]


# Deep Leanring Compression

We just use the pretrained model provided by the CompressAI team. Requires around 16 hours (1 A100 GPU)

In [None]:
from compressai.zoo.image import cfgs
from compressai.zoo import image_models

configurations=[]
for model in image_models.keys():
    if "3d" not in model.lower():  # only consider 2d cases
        for metric in ['mse','ms-ssim']:
            configurations.append((model,max(cfgs[model]),metric))

for setting in configurations:
    for image in holdout_path_2d.glob('*IM*.tiff'): 
        model_name= setting[0]+'_'+setting[2]+'_'+str(setting[1])+'_RGB'
        path_encoded= image.parent/model_name/str(image.stem+'_encoded')
        path_encoded.parent.mkdir(exist_ok=True)
        path_decoded= image.parent/model_name/str(image.stem+'_decoded.tiff')
        path_decoded.parent.mkdir(exist_ok=True)
        if not path_decoded.is_file():
            !python3 ../../../../CompressAI/examples/codec.py encode {image} -o {path_encoded} --model {setting[0]} -q {setting[1]} --channel 3 -m {setting[2]} --cuda
            !python3 ../../../../CompressAI/examples/codec.py decode {path_encoded} -o {path_decoded} --channel 3 --cuda

# Evaluation

We compare both the classic and DL-based methods and write all the metrics to a csv file. The metrics include: Quality (MSE, SSIM, PSNR, Pearson Correlation) and Storage savings.

In [None]:
write_metrics(holdout_path_2d, Path("./") / "compression_2d_metric.csv")