The data were downloaded from https://zenodo.org/record/6139958#.Y78QJKrMLtU and https://zenodo.org/record/6140064#.Y78YeqrMLtU

For data source 1 (https://zenodo.org/record/6139958#.Y78QJKrMLtU), it contains a timelapse tiff of 240 time steps, each with 5 channels.

* Channel 1 : Low Contrast DPC (Digital Phase Contrast)
* Channel 2 : High Contrast DPC
* Channel 3 : Brightfield (the input in our study)
* Channel 4 : EGFP-α-tubulin
* Channel 5 : mCherry-H2B (the ground truth in our study)

For data source 2 (https://zenodo.org/record/6140064#.Y78YeqrMLtU), it contains two sub-folders (train and test), each with snapshots sliced from different timelapse data. Each snapshot is saved as six different tiff files:
* _bf: bright field (the input in our study), 
* _cyto: cytoplasm segmentation mask
* _dpc: phase contrast
* _fluo: two channel, first cytoplasm, second H2B (the H2B channel is the ground truth in our study)
* _nuclei: nuclei segmentation mask
* _sqrdpc: square-root phase contrast

We will all the data from the two sources, and do a 15%:85% (roughly) split in test set and train set. There will be some overlapped files between these two sources, only one will be kept

In [None]:
import pooch
from bioio import BioImage
from bioio.writers import OmeTiffWriter
import matplotlib.pyplot as plt
import zipfile
from pathlib import Path
from random import random
import numpy as np


data_path = Path("../../data/labelfree2D")
data_path.mkdir(exist_ok=True, parents=True)

p = data_path / Path("download")
p.mkdir(exist_ok=True, parents=True)
p = data_path / Path("train")
p.mkdir(exist_ok=True)
p = data_path / Path("test")
p.mkdir(exist_ok=True)

In [None]:
# This might temporarily result in an error, see https://github.com/fatiando/pooch/issues/371
source_part1 = pooch.retrieve(
    url="doi:10.5281/zenodo.6139958/20210904_TL2 - R05-C03-F0.tif",
    known_hash="md5:1d6cb5b86f39f9accb6ee53367bda8e1",
    fname="20210904_TL2-R05-C03-F0.tiff",
    path=data_path / Path("download")
)

In [None]:
reader = BioImage(source_part1)
print(reader.dims)

In [None]:
sample = reader.get_image_data("CYX", T=100, Z=0)
fig, ax = plt.subplots(1, reader.dims.C, figsize=(18,16), dpi=72, facecolor='w', edgecolor='k')
for channel in range(reader.dims.C):
    ax[channel].axis('off')
    ax[channel].imshow(sample[channel,:,:], cmap=plt.cm.gray)

In [None]:
# input (bright field) channel: 2
# ground truth (mCherry-H2B) channel: 4
fn_base = Path(source_part1).stem
for tt in range(reader.dims.T):
    im = reader.get_image_data("YX", Z=0, T=tt, C=2)
    gt = reader.get_image_data("YX", Z=0, T=tt, C=4)
    if random() < 0.15:
        data_type = "test"
    else:
        data_type = "train"

    out_fn = data_path / f"{data_type}" / f"{fn_base}_{tt:03d}_IM.tiff"
    OmeTiffWriter.save(im.astype(np.uint16), out_fn, dim_order="YX")

    out_fn = data_path / f"{data_type}" / f"{fn_base}_{tt:03d}_GT.tiff"
    OmeTiffWriter.save(gt.astype(np.uint16), out_fn, dim_order="YX")

In [None]:
# This might temporarily result in an error, see https://github.com/fatiando/pooch/issues/371
source_part2 = pooch.retrieve(
    url="doi:10.5281/zenodo.6140064/training_dataset.zip",
    known_hash="md5:7d218466d217fd62dc8ec56ad76d23d7",
    fname="labelfree2d_part2.zip",
    path=data_path / Path("download")
)

In [None]:
with zipfile.ZipFile(source_part2,"r") as zip_ref:
    zip_ref.extractall(data_path / Path("download"))

In [None]:
for source_set in ["train", "test"]:
    source_path = data_path / Path("download") / f"{source_set}"
    filenames = sorted(source_path.glob("*_bf.tif"))
    for fn in filenames:
        fn_fluo = source_path / f"{fn.stem[:-2]}fluo.tif"
        fn_base = fn.stem.replace(" ", "")

        # get bright field image
        bf_reader = BioImage(fn)
        im = bf_reader.get_image_data("YX", Z=0, T=0, C=0)

        # get H2b fluorescent image
        h2b_reader = BioImage(fn_fluo)
        gt = h2b_reader.get_image_data("YX", Z=0, C=1, T=0)

        if random() < 0.15:
            data_type = "test"
        else:
            data_type = "train"

        out_fn = data_path / f"{data_type}" / f"{fn_base}_IM.tiff"
        OmeTiffWriter.save(im.astype(np.uint16), out_fn, dim_order="YX")

        out_fn = data_path / f"{data_type}" / f"{fn_base}_GT.tiff"
        OmeTiffWriter.save(gt.astype(np.uint16), out_fn, dim_order="YX")


In [None]:
# you may remove the download folder now.
from shutil import rmtree
rmtree(data_path / Path("download"), ignore_erros=True)