This dataset was downloaded from https://zenodo.org/record/4751737#.Y9gbv4HMLVZ, which was from the follwoing paper: 

Ghahremani, P., Li, Y., Kaufman, A. et al. Deep learning-inferred multiplex immunofluorescence for immunohistochemical image quantification. Nat Mach Intell 4, 401â€“412 (2022). https://doi.org/10.1038/s42256-022-00471-x

The dataset we used is "BC-DeepLIIF_Training_Set.zip" and "BC-DeepLIIF_Validation_Set.zip"

In [None]:
import pooch
from skimage.io import imread, imsave
from skimage.color import rgb2gray
import zipfile
from pathlib import Path
from random import random
import numpy as np


data_path = Path("../../data/multiplexTransfer")
download_path = data_path / Path("download")
download_path.mkdir(exist_ok=True)
train_path = data_path / Path("train_v2")
train_path.mkdir(exist_ok=True)
test_path = data_path / Path("test")
test_path.mkdir(exist_ok=True)

In [None]:
source_part1 = pooch.retrieve(
    url="https://zenodo.org/record/4751737/files/BC-DeepLIIF_Training_Set.zip?download=1",
    known_hash="md5:61d02c92fce42b56d0ec01a20498879b",
    fname="source_part1.zip",
    path=download_path
)

In [None]:
with zipfile.ZipFile(source_part1,"r") as zip_ref:
    zip_ref.extractall(download_path)

In [None]:
source_part2 = pooch.retrieve(
    url="https://zenodo.org/record/4751737/files/BC-DeepLIIF_Validation_Set.zip?download=1",
    known_hash="md5:f172eb8ae915c1ba772ac1e3c2b6db72",
    fname="source_part2.zip",
    path=download_path
)

In [None]:
with zipfile.ZipFile(source_part2,"r") as zip_ref:
    zip_ref.extractall(download_path)

In [None]:
for ii in range(3):
    p = train_path / f"trans{ii+1}"
    p.mkdir(exist_ok=True)

    p = test_path / f"trans{ii+1}"
    p.mkdir(exist_ok=True)

In [None]:
for set_name in ["BC-DeepLIIF_Training_Set", "BC-DeepLIIF_Validation_Set"]:
    set_path = download_path / f"{set_name}"
    filenames = sorted(set_path.glob("*.png"))
    for idx, fn in enumerate(filenames):
        full_img = imread(fn)
        # raw input
        img = full_img[:, :512, :]

        # target 1
        gt1 = full_img[:, 512: 1024, :]
        gt1_gray = rgb2gray(gt1, channel_axis=-1)

        # target 2
        gt2 = full_img[:, 1024: 1536, :]
        gt2_gray = rgb2gray(gt2, channel_axis=-1)

        # target 3
        gt3 = full_img[:, 1536: 2048, :]
        gt3_gray = rgb2gray(gt3, channel_axis=-1)

        if random() < 0.1:
            target_path = test_path
        else:
            target_path = train_path

        out_fn = target_path / Path("trans1") / f"{idx}_IM.tiff"
        imsave(out_fn, img)

        out_fn = target_path / Path("trans2") / f"{idx}_IM.tiff"
        imsave(out_fn, img)

        out_fn = target_path / Path("trans3") / f"{idx}_IM.tiff"
        imsave(out_fn, img)

        out_gt1 = target_path / Path("trans1") / f"{idx}_GT.tiff"
        imsave(out_gt1, gt1)

        out_gt2 = target_path / Path("trans2") / f"{idx}_GT.tiff"
        imsave(out_gt2, gt2)

        out_gt3 = target_path / Path("trans3") / f"{idx}_GT.tiff"
        imsave(out_gt3, gt3)



In [None]:
# you may remove the download folder now.
from shutil import rmtree
rmtree(download_path)