In [1]:
import argparse
import pathlib
import shutil
import sys

import numpy as np
import tqdm

sys.path.append(str(pathlib.Path("../../utils").resolve()))
from file_checking import check_number_of_files

try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
if not in_notebook:
    argparser = argparse.ArgumentParser(
        description="set up directories for the analysis of the data"
    )

    argparser.add_argument(
        "--patient",
        type=str,
        required=True,
        help="patient name, e.g. 'P01'",
    )

    args = argparser.parse_args()
    patient = args.patient
else:
    patient = "NF0014"

overwrite = True

In [3]:
# set path to the processed data dir
processed_data_dir = pathlib.Path(f"../../data/{patient}/processed_data").resolve(
    strict=True
)
raw_input_dir = pathlib.Path(f"../../data/{patient}/zstack_images").resolve(strict=True)

cellprofiler_dir = pathlib.Path(f"../../data/{patient}/cellprofiler").resolve()
if cellprofiler_dir.exists():
    shutil.rmtree(cellprofiler_dir)
    cellprofiler_dir.mkdir(parents=True, exist_ok=True)
else:
    cellprofiler_dir.mkdir(parents=True, exist_ok=True)

In [4]:
# perform checks for each directory
processed_data_dir_directories = list(processed_data_dir.glob("*"))
normalized_data_dir_directories = list(raw_input_dir.glob("*"))
cellprofiler_dir_directories = list(cellprofiler_dir.glob("*"))

print(
    f"""
      #################################################################################\n
      ## Checking the number of files in each subdirectory of:\n 
      ## {processed_data_dir.absolute()}\n
      #################################################################################
      """
)
for file in processed_data_dir_directories:
    check_number_of_files(file, 11)


      #################################################################################

      ## Checking the number of files in each subdirectory of:
 
      ## /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/data/NF0014/processed_data

      #################################################################################
      
G8-1 expected 11 files, but found 14 files.
D5-1 expected 11 files, but found 13 files.
F8-1 expected 11 files, but found 13 files.
G11-1 expected 11 files, but found 10 files.
G7-1 expected 11 files, but found 14 files.
E3-2 expected 11 files, but found 10 files.
C11-2 expected 11 files, but found 10 files.
E11-1 expected 11 files, but found 10 files.
D11-3 expected 11 files, but found 10 files.
E7-2 expected 11 files, but found 12 files.
C3-2 expected 11 files, but found 10 files.
D4-1 expected 11 files, but found 10 files.
E3-1 expected 11 files, but found 14 files.
D8-1 expected 11 files, but found 14 files.
G2-2 expected 11 files, but found

## Copy the normalized images to the cellprofiler images dir

In [5]:
# get the list of dirs in the normalized_data_dir
norm_dirs = [x for x in raw_input_dir.iterdir() if x.is_dir()]
# copy each dir and files to cellprofiler_dir
for norm_dir in tqdm.tqdm(norm_dirs):
    dest_dir = pathlib.Path(cellprofiler_dir, norm_dir.name)
    if dest_dir.exists() and overwrite:
        shutil.rmtree(dest_dir)
        shutil.copytree(norm_dir, dest_dir)
    elif not dest_dir.exists():
        shutil.copytree(norm_dir, dest_dir)
    else:
        pass

100%|██████████| 104/104 [03:11<00:00,  1.85s/it]


## Copy files from processed dir to cellprofiler images dir

In [6]:
masks_names_to_copy_over = [
    "cell_masks_watershed.tiff",
    "cytoplasm_mask.tiff",
    "nuclei_masks_reassigned.tiff",
    "organoid_masks_reconstructed.tiff",
]

In [7]:
# get a list of dirs in processed_data
dirs = [x for x in processed_data_dir.iterdir() if x.is_dir()]
file_extensions = {".tif", ".tiff"}
# get a list of files in each dir
for well_dir in tqdm.tqdm(dirs):
    files = [x for x in well_dir.iterdir() if x.is_file()]
    for file in files:
        if file.suffix in file_extensions:
            for mask_name in masks_names_to_copy_over:
                # check if the file is one of the masks
                if mask_name in file.name:
                    # copy the mask to the cellprofiler_dir
                    new_file_dir = pathlib.Path(
                        cellprofiler_dir, well_dir.name, file.name
                    )
                    shutil.copy(file, new_file_dir)

100%|██████████| 104/104 [00:34<00:00,  2.99it/s]


In [8]:
jobs_to_rerun_path = pathlib.Path("../rerun_jobs.txt").resolve()
if jobs_to_rerun_path.exists():
    jobs_to_rerun_path.unlink()

In [9]:
dirs_in_cellprofiler_dir = [x for x in cellprofiler_dir.iterdir() if x.is_dir()]
dirs_in_cellprofiler_dir = sorted(dirs_in_cellprofiler_dir)
for dir in tqdm.tqdm(dirs_in_cellprofiler_dir):
    if not check_number_of_files(dir, 12):
        with open(jobs_to_rerun_path, "a") as f:
            f.write(f"{dir.name}\n")

100%|██████████| 104/104 [00:00<00:00, 8469.39it/s]

C10-1 expected 12 files, but found 7 files.
C10-2 expected 12 files, but found 9 files.
C11-1 expected 12 files, but found 7 files.
C11-2 expected 12 files, but found 7 files.
C2-1 expected 12 files, but found 6 files.
C2-2 expected 12 files, but found 9 files.
C3-1 expected 12 files, but found 7 files.
C3-2 expected 12 files, but found 7 files.
C4-1 expected 12 files, but found 9 files.
C4-2 expected 12 files, but found 9 files.
C5-1 expected 12 files, but found 9 files.
C5-2 expected 12 files, but found 7 files.
C6-1 expected 12 files, but found 8 files.
C6-2 expected 12 files, but found 9 files.
C7-1 expected 12 files, but found 8 files.
C7-2 expected 12 files, but found 7 files.
C8-1 expected 12 files, but found 7 files.
C8-2 expected 12 files, but found 7 files.
C9-1 expected 12 files, but found 7 files.
C9-2 expected 12 files, but found 9 files.
D10-1 expected 12 files, but found 9 files.
D10-2 expected 12 files, but found 9 files.
D11-1 expected 12 files, but found 9 files.
D11-




In [10]:
# move an example to the example dir
example_dir = pathlib.Path("../animations/gif/C4-2").resolve(strict=True)
final_example_dir = pathlib.Path("../examples/segmentation_output/C4-2/gifs").resolve()
if final_example_dir.exists():
    shutil.rmtree(final_example_dir)


if example_dir.exists():
    shutil.copytree(example_dir, final_example_dir)