# Convert SQLite output(s) to parquet files with CytoTable

## Import libraries

In [1]:
import argparse
import logging
import pathlib
import uuid

import duckdb
import pandas as pd
import tqdm

# cytotable will merge objects from SQLite file into single cells and save as parquet file
from cytotable import convert, presets
from parsl.config import Config
from parsl.executors import HighThroughputExecutor

# Set the logging level to a higher level to avoid outputting unnecessary errors from config file in convert function
logging.getLogger().setLevel(logging.ERROR)
try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
if not in_notebook:
    print("Running as script")
    # set up arg parser
    parser = argparse.ArgumentParser(description="Segment the nuclei of a tiff image")

    parser.add_argument(
        "--patient",
        type=str,
        help="Patient ID",
    )

    args = parser.parse_args()
    patient = args.patient
else:
    print("Running in a notebook")
    patient = "NF0021"

middle_slice_input = pathlib.Path(
    f"../../data/{patient}/cellprofiler_middle_slice_output/"
).resolve(strict=True)
max_projected_input = pathlib.Path(
    f"../../data/{patient}/cellprofiler_zmax_proj_output/"
).resolve(strict=True)

# directory for processed data
output_dir = pathlib.Path(f"../../data/{patient}/0.converted/").resolve()
output_dir.mkdir(parents=True, exist_ok=True)
middle_slice_sc_output = pathlib.Path(output_dir, "middle_slice_sc.parquet").resolve()
max_projected_sc_output = pathlib.Path(output_dir, "max_projected_sc.parquet").resolve()
middle_slice_organoid_output = pathlib.Path(
    output_dir, "middle_slice_organoid.parquet"
).resolve()
max_projected_organoid_output = pathlib.Path(
    output_dir, "max_projected_organoid.parquet"
).resolve()

Running in a notebook


## Set paths and variables

In [3]:
# preset configurations based on typical CellProfiler outputs
preset = "cellprofiler_sqlite_pycytominer"

# update preset to include site metadata and cell counts
joins = presets.config["cellprofiler_sqlite_pycytominer"]["CONFIG_JOINS"].replace(
    "Image_Metadata_Well,",
    "Image_Metadata_Well, Image_Metadata_Site, Image_Count_Cells,",
)

# type of file output from cytotable (currently only parquet)
dest_datatype = "parquet"


well_fov_dict = {}
for sqlite_dir in [middle_slice_input, max_projected_input]:
    twoD_type = sqlite_dir.name.split("_out")[0].split("cellprofiler_")[1]
    well_fov_dict[twoD_type] = {}
    sqlites = list(sqlite_dir.rglob("*sqlite"))
    sqlites.sort()  # sort to ensure consistent order
    for file_path in sqlites:
        well_fov = file_path.parent.stem
        well_fov_dict[twoD_type][well_fov] = {
            "image_path": file_path,
            "output_dir": output_dir / twoD_type / f"{well_fov}",
        }

## Convert SQLite to parquet file(s) for single-cell profiles

In [4]:
output_dict_of_dfs = {}
for sqlite_dir in [middle_slice_input, max_projected_input]:
    output_dict_of_dfs[sqlite_dir.name.split("_out")[0].split("cellprofiler_")[1]] = {
        "df_list": [],
    }
output_dict_of_dfs

{'middle_slice': {'df_list': []}, 'zmax_proj': {'df_list': []}}

In [5]:
total = 0
errors = 0
# loop through the middle and zmax projected sqlite files
for featurization_type in well_fov_dict.keys():
    for well_fov, file_info in tqdm.tqdm(well_fov_dict[featurization_type].items()):
        sqlite_file = file_info["image_path"]
        total += 1
        # convert the sqlite file to a single cell parquet file
        try:
            df = convert(
                sqlite_file,
                preset=preset,
                joins=joins,
                chunk_size=500,
                dest_datatype=dest_datatype,
                dest_path=f"{well_fov_dict[featurization_type][well_fov]['output_dir']}_sc.parquet",
                parsl_config=Config(
                    executors=[HighThroughputExecutor()],
                    run_dir=f"cytotable_runinfo/{uuid.uuid4().hex}",
                ),
            )
            output_dict_of_dfs[featurization_type]["df_list"].append(
                f"{well_fov_dict[featurization_type][well_fov]['output_dir']}_sc.parquet"
            )
        except Exception as e:
            errors += 1
            print(f"Error processing {sqlite_file}: {e}")
            continue
print(f"Total files processed: {total}")
print(f"Total errors encountered: {errors}")

  0%|          | 1/347 [00:07<41:52,  7.26s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C10-1/gff_extracted_features.sqlite: list index out of range
Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C10-2/gff_extracted_features.sqlite: An existing file or directory was provided as dest_path: '/home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/0.converted/middle_slice/C10-2_sc.parquet'. Please use a new path for this parameter.


  1%|          | 4/347 [00:08<08:24,  1.47s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C10-3/gff_extracted_features.sqlite: list index out of range
Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C10-4/gff_extracted_features.sqlite: An existing file or directory was provided as dest_path: '/home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/0.converted/middle_slice/C10-4_sc.parquet'. Please use a new path for this parameter.


  2%|▏         | 6/347 [00:08<04:20,  1.31it/s]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C10-5/gff_extracted_features.sqlite: An existing file or directory was provided as dest_path: '/home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/0.converted/middle_slice/C10-5_sc.parquet'. Please use a new path for this parameter.
Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C10-6/gff_extracted_features.sqlite: An existing file or directory was provided as dest_path: '/home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/0.converted/middle_slice/C10-6_sc.parquet'. Please use a new path for this parameter.


  2%|▏         | 7/347 [00:08<03:16,  1.73it/s]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C10-7/gff_extracted_features.sqlite: An existing file or directory was provided as dest_path: '/home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/0.converted/middle_slice/C10-7_sc.parquet'. Please use a new path for this parameter.
Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C11-1/gff_extracted_features.sqlite: An existing file or directory was provided as dest_path: '/home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/0.converted/middle_slice/C11-1_sc.parquet'. Please use a new path for this parameter.


  3%|▎         | 10/347 [00:09<02:03,  2.73it/s]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C11-2/gff_extracted_features.sqlite: list index out of range
Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C11-3/gff_extracted_features.sqlite: An existing file or directory was provided as dest_path: '/home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/0.converted/middle_slice/C11-3_sc.parquet'. Please use a new path for this parameter.


  4%|▍         | 14/347 [00:40<34:02,  6.13s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C11-7/gff_extracted_features.sqlite: list index out of range


  5%|▌         | 18/347 [01:12<43:02,  7.85s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C2-4/gff_extracted_features.sqlite: list index out of range


  5%|▌         | 19/347 [01:13<31:17,  5.73s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C2-5/gff_extracted_features.sqlite: list index out of range


  6%|▌         | 21/347 [01:23<30:00,  5.52s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C2-7/gff_extracted_features.sqlite: list index out of range


  6%|▋         | 22/347 [01:24<21:59,  4.06s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C3-1/gff_extracted_features.sqlite: list index out of range


  7%|▋         | 23/347 [01:24<16:23,  3.04s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C3-2/gff_extracted_features.sqlite: list index out of range


  9%|▉         | 32/347 [02:51<50:07,  9.55s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C4-5/gff_extracted_features.sqlite: list index out of range


 12%|█▏        | 41/347 [04:18<49:12,  9.65s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C5-7/gff_extracted_features.sqlite: list index out of range


 14%|█▍        | 50/347 [05:45<47:56,  9.69s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C7-2/gff_extracted_features.sqlite: list index out of range


 15%|█▍        | 51/347 [05:46<34:37,  7.02s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C7-3/gff_extracted_features.sqlite: list index out of range


 15%|█▍        | 52/347 [05:47<25:12,  5.13s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C7-4/gff_extracted_features.sqlite: list index out of range


 15%|█▌        | 53/347 [05:47<18:08,  3.70s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C7-5/gff_extracted_features.sqlite: list index out of range


 16%|█▌        | 54/347 [05:47<13:01,  2.67s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C7-6/gff_extracted_features.sqlite: list index out of range


 16%|█▌        | 56/347 [05:57<19:42,  4.06s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C8-1/gff_extracted_features.sqlite: list index out of range


 19%|█▉        | 66/347 [07:37<45:45,  9.77s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/C9-4/gff_extracted_features.sqlite: list index out of range


 22%|██▏       | 75/347 [09:10<47:24, 10.46s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D10-6/gff_extracted_features.sqlite: list index out of range


 24%|██▍       | 83/347 [10:31<45:17, 10.29s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D11-7/gff_extracted_features.sqlite: list index out of range


 25%|██▌       | 87/347 [11:11<43:44, 10.09s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D2-4/gff_extracted_features.sqlite: list index out of range


 26%|██▌       | 91/347 [12:08<56:19, 13.20s/it]  

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D3-1/gff_extracted_features.sqlite: list index out of range


 27%|██▋       | 95/347 [13:07<1:00:12, 14.34s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D3-5/gff_extracted_features.sqlite: list index out of range


 28%|██▊       | 97/347 [13:29<53:10, 12.76s/it]  

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D3-7/gff_extracted_features.sqlite: list index out of range


 29%|██▉       | 101/347 [14:29<58:50, 14.35s/it]  

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D4-4/gff_extracted_features.sqlite: list index out of range


 29%|██▉       | 102/347 [14:31<43:10, 10.57s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D4-5/gff_extracted_features.sqlite: list index out of range


 31%|███       | 106/347 [15:29<54:49, 13.65s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D5-2/gff_extracted_features.sqlite: list index out of range


 34%|███▎      | 117/347 [18:48<1:04:24, 16.80s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D6-6/gff_extracted_features.sqlite: list index out of range


 34%|███▍      | 119/347 [19:08<50:44, 13.35s/it]  

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D7-1/gff_extracted_features.sqlite: list index out of range


 35%|███▍      | 120/347 [19:09<37:18,  9.86s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D7-2/gff_extracted_features.sqlite: list index out of range


 35%|███▍      | 121/347 [19:11<27:48,  7.38s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D7-3/gff_extracted_features.sqlite: list index out of range


 35%|███▌      | 122/347 [19:12<20:22,  5.43s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D7-4/gff_extracted_features.sqlite: list index out of range


 35%|███▌      | 123/347 [19:13<15:16,  4.09s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D7-6/gff_extracted_features.sqlite: list index out of range


 36%|███▌      | 124/347 [19:14<11:37,  3.13s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D7-7/gff_extracted_features.sqlite: list index out of range


 39%|███▉      | 136/347 [22:57<58:01, 16.50s/it]  

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/D9-5/gff_extracted_features.sqlite: list index out of range


 41%|████▏     | 144/347 [25:20<57:25, 16.97s/it]  

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/E10-7/gff_extracted_features.sqlite: list index out of range


 42%|████▏     | 145/347 [25:22<41:42, 12.39s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/E11-1/gff_extracted_features.sqlite: list index out of range


 43%|████▎     | 150/347 [26:41<50:12, 15.29s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/E11-6/gff_extracted_features.sqlite: list index out of range


 44%|████▎     | 151/347 [26:43<36:42, 11.24s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/E11-7/gff_extracted_features.sqlite: list index out of range


 46%|████▌     | 159/347 [29:03<51:44, 16.51s/it]  

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/E3-1/gff_extracted_features.sqlite: list index out of range


 47%|████▋     | 164/347 [30:31<51:09, 16.77s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/E3-6/gff_extracted_features.sqlite: list index out of range


 48%|████▊     | 168/347 [31:31<46:07, 15.46s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/E4-3/gff_extracted_features.sqlite: list index out of range


 52%|█████▏    | 179/347 [34:51<47:17, 16.89s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/E5-7/gff_extracted_features.sqlite: list index out of range


 52%|█████▏    | 182/347 [35:30<39:25, 14.34s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/E6-3/gff_extracted_features.sqlite: list index out of range


 58%|█████▊    | 200/347 [41:24<43:56, 17.94s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/E8-7/gff_extracted_features.sqlite: list index out of range


 60%|█████▉    | 208/347 [43:59<44:23, 19.16s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F10-1/gff_extracted_features.sqlite: list index out of range


 61%|██████    | 212/347 [45:17<43:16, 19.24s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F10-5/gff_extracted_features.sqlite: list index out of range


 61%|██████▏   | 213/347 [45:19<31:41, 14.19s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F10-6/gff_extracted_features.sqlite: list index out of range


 62%|██████▏   | 215/347 [45:40<26:59, 12.27s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F11-1/gff_extracted_features.sqlite: list index out of range


 63%|██████▎   | 217/347 [46:02<25:20, 11.69s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F11-3/gff_extracted_features.sqlite: list index out of range


 63%|██████▎   | 220/347 [46:44<27:59, 13.23s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F11-6/gff_extracted_features.sqlite: list index out of range


 65%|██████▌   | 226/347 [48:31<33:54, 16.81s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F2-5/gff_extracted_features.sqlite: list index out of range


 66%|██████▋   | 230/347 [49:36<31:40, 16.24s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F3-2/gff_extracted_features.sqlite: list index out of range


 67%|██████▋   | 233/347 [50:21<29:16, 15.40s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F3-5/gff_extracted_features.sqlite: list index out of range


 67%|██████▋   | 234/347 [50:24<21:36, 11.48s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F3-6/gff_extracted_features.sqlite: list index out of range


 68%|██████▊   | 235/347 [50:26<16:02,  8.59s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F3-7/gff_extracted_features.sqlite: list index out of range


 68%|██████▊   | 237/347 [50:46<17:17,  9.43s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F4-2/gff_extracted_features.sqlite: list index out of range


 69%|██████▊   | 238/347 [50:48<13:14,  7.29s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F4-3/gff_extracted_features.sqlite: list index out of range


 69%|██████▉   | 240/347 [51:08<15:36,  8.75s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F4-5/gff_extracted_features.sqlite: list index out of range


 69%|██████▉   | 241/347 [51:10<12:00,  6.80s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F4-6/gff_extracted_features.sqlite: list index out of range


 70%|██████▉   | 242/347 [51:12<09:14,  5.28s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F4-7/gff_extracted_features.sqlite: list index out of range


 70%|███████   | 244/347 [51:33<13:34,  7.91s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F5-2/gff_extracted_features.sqlite: list index out of range


 72%|███████▏  | 250/347 [53:20<26:19, 16.28s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F6-1/gff_extracted_features.sqlite: list index out of range


 75%|███████▌  | 261/347 [57:01<26:50, 18.73s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F7-5/gff_extracted_features.sqlite: list index out of range


 78%|███████▊  | 270/347 [1:00:09<26:54, 20.97s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F8-7/gff_extracted_features.sqlite: list index out of range


 78%|███████▊  | 271/347 [1:00:12<19:39, 15.51s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F9-1/gff_extracted_features.sqlite: list index out of range


 78%|███████▊  | 272/347 [1:00:15<14:29, 11.59s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F9-2/gff_extracted_features.sqlite: list index out of range


 79%|███████▉  | 274/347 [1:00:38<14:22, 11.81s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F9-4/gff_extracted_features.sqlite: list index out of range


 80%|███████▉  | 276/347 [1:01:02<13:51, 11.70s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/F9-6/gff_extracted_features.sqlite: list index out of range


 80%|████████  | 278/347 [1:01:24<13:18, 11.57s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G10-1/gff_extracted_features.sqlite: list index out of range


 80%|████████  | 279/347 [1:01:27<09:58,  8.80s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G10-2/gff_extracted_features.sqlite: list index out of range


 81%|████████▏ | 282/347 [1:02:11<13:42, 12.65s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G10-5/gff_extracted_features.sqlite: list index out of range


 83%|████████▎ | 289/347 [1:04:20<16:35, 17.17s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G11-5/gff_extracted_features.sqlite: list index out of range


 85%|████████▍ | 294/347 [1:05:48<15:05, 17.09s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G2-3/gff_extracted_features.sqlite: list index out of range


 86%|████████▌ | 298/347 [1:06:56<13:40, 16.74s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G2-7/gff_extracted_features.sqlite: list index out of range


 86%|████████▌ | 299/347 [1:06:58<09:59, 12.50s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G3-1/gff_extracted_features.sqlite: list index out of range


 88%|████████▊ | 306/347 [1:09:14<12:21, 18.08s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G4-1/gff_extracted_features.sqlite: list index out of range


 90%|████████▉ | 312/347 [1:11:07<10:28, 17.97s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G4-7/gff_extracted_features.sqlite: list index out of range


 90%|█████████ | 314/347 [1:11:30<08:04, 14.68s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G5-2/gff_extracted_features.sqlite: list index out of range


 91%|█████████ | 316/347 [1:11:54<06:53, 13.34s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G5-4/gff_extracted_features.sqlite: list index out of range


 92%|█████████▏| 318/347 [1:12:18<06:09, 12.75s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G5-6/gff_extracted_features.sqlite: list index out of range


 92%|█████████▏| 319/347 [1:12:21<04:30,  9.67s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G5-7/gff_extracted_features.sqlite: list index out of range


 92%|█████████▏| 320/347 [1:12:23<03:18,  7.37s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G6-1/gff_extracted_features.sqlite: list index out of range


 93%|█████████▎| 323/347 [1:13:08<04:58, 12.44s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G6-4/gff_extracted_features.sqlite: list index out of range


 94%|█████████▍| 327/347 [1:14:18<05:18, 15.93s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G7-1/gff_extracted_features.sqlite: list index out of range


 95%|█████████▍| 329/347 [1:14:42<04:12, 14.01s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G7-3/gff_extracted_features.sqlite: list index out of range


 95%|█████████▌| 331/347 [1:15:05<03:24, 12.78s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G7-5/gff_extracted_features.sqlite: list index out of range


 96%|█████████▌| 332/347 [1:15:07<02:24,  9.64s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G7-6/gff_extracted_features.sqlite: list index out of range


 98%|█████████▊| 341/347 [1:18:10<01:54, 19.02s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G9-1/gff_extracted_features.sqlite: list index out of range


 99%|█████████▉| 343/347 [1:18:34<01:00, 15.20s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G9-3/gff_extracted_features.sqlite: list index out of range


 99%|█████████▉| 344/347 [1:18:36<00:34, 11.48s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G9-4/gff_extracted_features.sqlite: list index out of range


 99%|█████████▉| 345/347 [1:18:38<00:17,  8.64s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G9-5/gff_extracted_features.sqlite: list index out of range


100%|█████████▉| 346/347 [1:18:41<00:06,  6.78s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G9-6/gff_extracted_features.sqlite: list index out of range


100%|██████████| 347/347 [1:18:43<00:00, 13.61s/it]


Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_middle_slice_output/G9-7/gff_extracted_features.sqlite: list index out of range


  0%|          | 1/347 [00:02<14:24,  2.50s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C10-1/gff_extracted_features.sqlite: list index out of range


  3%|▎         | 11/347 [03:33<1:53:02, 20.19s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C11-4/gff_extracted_features.sqlite: list index out of range


  4%|▍         | 14/347 [04:26<1:39:20, 17.90s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C11-7/gff_extracted_features.sqlite: list index out of range


  5%|▍         | 17/347 [05:12<1:29:04, 16.20s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C2-3/gff_extracted_features.sqlite: list index out of range


  5%|▌         | 19/347 [05:37<1:18:33, 14.37s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C2-5/gff_extracted_features.sqlite: list index out of range


  6%|▌         | 21/347 [06:02<1:11:47, 13.21s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C2-7/gff_extracted_features.sqlite: list index out of range


  6%|▋         | 22/347 [06:05<54:53, 10.13s/it]  

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C3-1/gff_extracted_features.sqlite: list index out of range


  7%|▋         | 23/347 [06:08<42:31,  7.87s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C3-2/gff_extracted_features.sqlite: list index out of range


  8%|▊         | 27/347 [07:16<1:18:20, 14.69s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C3-7/gff_extracted_features.sqlite: list index out of range


 14%|█▍        | 50/347 [15:53<1:42:15, 20.66s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C7-2/gff_extracted_features.sqlite: list index out of range


 15%|█▍        | 51/347 [15:57<1:17:16, 15.66s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C7-3/gff_extracted_features.sqlite: list index out of range


 15%|█▍        | 52/347 [16:00<58:24, 11.88s/it]  

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C7-4/gff_extracted_features.sqlite: list index out of range


 15%|█▌        | 53/347 [16:02<44:05,  9.00s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C7-5/gff_extracted_features.sqlite: list index out of range


 16%|█▌        | 54/347 [16:05<34:49,  7.13s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C7-6/gff_extracted_features.sqlite: list index out of range


 16%|█▌        | 56/347 [16:28<45:37,  9.41s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/C8-1/gff_extracted_features.sqlite: list index out of range


 22%|██▏       | 75/347 [24:04<1:38:11, 21.66s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/D10-6/gff_extracted_features.sqlite: list index out of range


 24%|██▍       | 83/347 [27:05<1:33:42, 21.30s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/D11-7/gff_extracted_features.sqlite: list index out of range


 25%|██▌       | 87/347 [28:24<1:25:38, 19.76s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/D2-4/gff_extracted_features.sqlite: list index out of range


 26%|██▌       | 91/347 [29:42<1:21:34, 19.12s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/D3-1/gff_extracted_features.sqlite: list index out of range


 28%|██▊       | 97/347 [31:49<1:22:35, 19.82s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/D3-7/gff_extracted_features.sqlite: list index out of range


 29%|██▉       | 102/347 [33:30<1:19:53, 19.57s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/D4-5/gff_extracted_features.sqlite: list index out of range


 35%|███▍      | 120/347 [40:36<1:20:27, 21.27s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/D7-2/gff_extracted_features.sqlite: list index out of range


 35%|███▍      | 121/347 [40:39<59:58, 15.92s/it]  

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/D7-3/gff_extracted_features.sqlite: list index out of range


 35%|███▌      | 123/347 [41:08<56:11, 15.05s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/D7-6/gff_extracted_features.sqlite: list index out of range


 36%|███▌      | 124/347 [41:11<42:40, 11.48s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/D7-7/gff_extracted_features.sqlite: list index out of range


 40%|████      | 140/347 [47:28<1:12:13, 20.93s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/E10-3/gff_extracted_features.sqlite: list index out of range


 41%|████▏     | 144/347 [48:49<1:07:09, 19.85s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/E10-7/gff_extracted_features.sqlite: list index out of range


 42%|████▏     | 145/347 [48:52<50:27, 14.99s/it]  

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/E11-1/gff_extracted_features.sqlite: list index out of range


 43%|████▎     | 148/347 [49:46<54:51, 16.54s/it]  

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/E11-4/gff_extracted_features.sqlite: list index out of range


 43%|████▎     | 150/347 [50:14<50:08, 15.27s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/E11-6/gff_extracted_features.sqlite: list index out of range


 46%|████▌     | 159/347 [53:35<1:04:47, 20.68s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/E3-1/gff_extracted_features.sqlite: list index out of range


 48%|████▊     | 168/347 [57:02<1:04:00, 21.45s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/E4-3/gff_extracted_features.sqlite: list index out of range


 52%|█████▏    | 179/347 [1:01:02<51:05, 18.25s/it]  

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/E5-7/gff_extracted_features.sqlite: list index out of range


 54%|█████▍    | 189/347 [1:03:35<36:44, 13.95s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/E7-3/gff_extracted_features.sqlite: list index out of range


 61%|██████    | 212/347 [1:11:55<49:53, 22.17s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F10-5/gff_extracted_features.sqlite: list index out of range


 65%|██████▌   | 226/347 [1:17:44<45:38, 22.64s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F2-5/gff_extracted_features.sqlite: list index out of range


 66%|██████▌   | 228/347 [1:18:13<36:01, 18.16s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F2-7/gff_extracted_features.sqlite: list index out of range


 66%|██████▌   | 229/347 [1:18:16<27:06, 13.79s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F3-1/gff_extracted_features.sqlite: list index out of range


 66%|██████▋   | 230/347 [1:18:22<22:05, 11.33s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F3-2/gff_extracted_features.sqlite: list index out of range


 67%|██████▋   | 233/347 [1:19:19<30:27, 16.03s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F3-5/gff_extracted_features.sqlite: list index out of range


 68%|██████▊   | 235/347 [1:19:50<29:03, 15.57s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F3-7/gff_extracted_features.sqlite: list index out of range


 69%|██████▊   | 238/347 [1:20:48<32:01, 17.63s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F4-3/gff_extracted_features.sqlite: list index out of range


 70%|██████▉   | 242/347 [1:22:13<34:42, 19.83s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F4-7/gff_extracted_features.sqlite: list index out of range


 71%|███████   | 247/347 [1:24:05<35:18, 21.19s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F5-5/gff_extracted_features.sqlite: list index out of range


 73%|███████▎  | 253/347 [1:26:25<34:06, 21.77s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F6-4/gff_extracted_features.sqlite: list index out of range


 76%|███████▋  | 265/347 [1:31:28<31:15, 22.88s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F8-2/gff_extracted_features.sqlite: list index out of range


 78%|███████▊  | 270/347 [1:33:24<28:27, 22.17s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F8-7/gff_extracted_features.sqlite: list index out of range


 78%|███████▊  | 272/347 [1:33:54<23:02, 18.44s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F9-2/gff_extracted_features.sqlite: list index out of range


 80%|███████▉  | 276/347 [1:35:17<23:02, 19.47s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/F9-6/gff_extracted_features.sqlite: list index out of range


 81%|████████▏ | 282/347 [1:37:40<23:46, 21.95s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G10-5/gff_extracted_features.sqlite: list index out of range


 83%|████████▎ | 287/347 [1:39:33<21:38, 21.65s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G11-3/gff_extracted_features.sqlite: list index out of range


 83%|████████▎ | 289/347 [1:40:05<17:52, 18.49s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G11-5/gff_extracted_features.sqlite: list index out of range


 85%|████████▌ | 295/347 [1:42:27<18:46, 21.66s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G2-4/gff_extracted_features.sqlite: list index out of range


 86%|████████▌ | 298/347 [1:43:27<16:22, 20.04s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G2-7/gff_extracted_features.sqlite: list index out of range


 86%|████████▌ | 299/347 [1:43:32<12:21, 15.44s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G3-1/gff_extracted_features.sqlite: list index out of range


 88%|████████▊ | 305/347 [1:45:55<14:56, 21.35s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G3-7/gff_extracted_features.sqlite: list index out of range


 88%|████████▊ | 306/347 [1:45:59<11:06, 16.25s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G4-1/gff_extracted_features.sqlite: list index out of range


 90%|████████▉ | 312/347 [1:48:21<12:30, 21.45s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G4-7/gff_extracted_features.sqlite: list index out of range


 90%|█████████ | 314/347 [1:48:53<10:02, 18.26s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G5-2/gff_extracted_features.sqlite: list index out of range


 92%|█████████▏| 319/347 [1:50:46<09:45, 20.92s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G5-7/gff_extracted_features.sqlite: list index out of range


 95%|█████████▍| 329/347 [1:54:59<06:55, 23.07s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G7-3/gff_extracted_features.sqlite: list index out of range


 95%|█████████▌| 331/347 [1:55:31<05:05, 19.12s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G7-5/gff_extracted_features.sqlite: list index out of range


 96%|█████████▌| 332/347 [1:55:35<03:39, 14.60s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G7-6/gff_extracted_features.sqlite: list index out of range


 99%|█████████▉| 344/347 [2:01:16<01:14, 24.84s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G9-4/gff_extracted_features.sqlite: list index out of range


100%|██████████| 347/347 [2:02:17<00:00, 21.15s/it]

Error processing /home/lippincm/4TB_A/NF1_2D_organoid_profiling_pipeline/data/NF0021/cellprofiler_zmax_proj_output/G9-7/gff_extracted_features.sqlite: list index out of range
Total files processed: 694
Total errors encountered: 166





In [6]:
# read in the dataframes and concatenate them in place
for featurization_type in output_dict_of_dfs.keys():
    print(
        f"Concatenating {len(output_dict_of_dfs[featurization_type]['df_list'])} dataframes for {featurization_type}"
    )
    df_list = [
        pd.read_parquet(df) for df in output_dict_of_dfs[featurization_type]["df_list"]
    ]
    output_dict_of_dfs[featurization_type]["df"] = pd.concat(df_list, ignore_index=True)
    # Define the list of columns to prioritize and prefix
    prioritized_columns = [
        "Nuclei_Location_Center_X",
        "Nuclei_Location_Center_Y",
        "Cells_Location_Center_X",
        "Cells_Location_Center_Y",
        "Image_Count_Cells",
    ]

    # If any, drop rows where "Metadata_ImageNumber" is NaN (artifact of cytotable)
    output_dict_of_dfs[featurization_type]["df"] = output_dict_of_dfs[
        featurization_type
    ]["df"].dropna(subset=["Metadata_ImageNumber"])

    # Rearrange columns and add "Metadata" prefix in one line
    output_dict_of_dfs[featurization_type]["df"] = output_dict_of_dfs[
        featurization_type
    ]["df"][
        prioritized_columns
        + [
            col
            for col in output_dict_of_dfs[featurization_type]["df"].columns
            if col not in prioritized_columns
        ]
    ].rename(
        columns=lambda col: "Metadata_" + col if col in prioritized_columns else col
    )
    # rename Image_Metadata_Well
    output_dict_of_dfs[featurization_type]["df"] = output_dict_of_dfs[
        featurization_type
    ]["df"].rename(columns={"Image_Metadata_Well": "Metadata_Well"})

    if featurization_type == "middle_slice":
        output_dict_of_dfs[featurization_type]["df"].to_parquet(
            middle_slice_sc_output, index=False
        )
    elif featurization_type == "zmax_proj":
        output_dict_of_dfs[featurization_type]["df"].to_parquet(
            max_projected_sc_output, index=False
        )
    print(
        f"Saved {featurization_type} data to {output_dict_of_dfs[featurization_type]['df'].shape[0]} rows in {output_dict_of_dfs[featurization_type]['df'].shape[1]} columns"
    )

Concatenating 246 dataframes for middle_slice
Saved middle_slice data to 1441 rows in 2910 columns
Concatenating 282 dataframes for zmax_proj
Saved zmax_proj data to 2775 rows in 2910 columns


## Extract organoid only profiles

In [7]:
output_dict_of_dfs = {}
for sqlite_dir in [middle_slice_input, max_projected_input]:
    output_dict_of_dfs[sqlite_dir.name.split("_out")[0].split("cellprofiler_")[1]] = {
        "df_list": [],
    }
output_dict_of_dfs

{'middle_slice': {'df_list': []}, 'zmax_proj': {'df_list': []}}

In [8]:
total = 0
errors = 0
for featurization_type in well_fov_dict.keys():
    print(f"Processing {featurization_type} files")
    for well_fov, file_info in tqdm.tqdm(well_fov_dict[featurization_type].items()):
        well = well_fov.split("-")[0]
        fov = well_fov.split("-")[1]
        sqlite_file = file_info["image_path"]
        total += 1
        try:
            # Create a DuckDB connection
            with duckdb.connect(sqlite_file) as con:
                # get the organoid table
                organoid_table = con.execute("SELECT * FROM Per_Organoid").df()
                organoid_table.rename(
                    columns={
                        "ImageNumber": "Metadata_ImageNumber",
                        "Organoid_Number_Object_Number": "Metadata_Organoid_Number_Object_Number",
                        "Image_Metadata_Well": "Metadata_Well",
                    },
                    inplace=True,
                )
                organoid_table.insert(0, "Metadata_Well_FOV", well_fov)
                organoid_table.insert(1, "Metadata_FOV", fov)
                organoid_table.insert(2, "Metadata_Well", well)
            output_dict_of_dfs[featurization_type]["df_list"].append(organoid_table)

        except Exception as e:
            errors += 1
            print(f"Error processing {sqlite_file}: {e}")
            continue

Processing middle_slice files


100%|██████████| 347/347 [01:03<00:00,  5.47it/s]


Processing zmax_proj files


100%|██████████| 347/347 [01:03<00:00,  5.51it/s]


In [9]:
# read in the dataframes and concatenate them in place
for featurization_type in output_dict_of_dfs.keys():
    print(
        f"Concatenating {len(output_dict_of_dfs[featurization_type]['df_list'])} dataframes for {featurization_type}"
    )
    output_dict_of_dfs[featurization_type]["df"] = pd.concat(
        output_dict_of_dfs[featurization_type]["df_list"], ignore_index=True
    )

    # If any, drop rows where "Metadata_ImageNumber" is NaN (artifact of cytotable)
    output_dict_of_dfs[featurization_type]["df"] = output_dict_of_dfs[
        featurization_type
    ]["df"].dropna(subset=["Metadata_ImageNumber"])
    if featurization_type == "middle_slice":
        output_dict_of_dfs[featurization_type]["df"].to_parquet(
            middle_slice_organoid_output, index=False
        )
    elif featurization_type == "zmax_proj":
        output_dict_of_dfs[featurization_type]["df"].to_parquet(
            max_projected_organoid_output, index=False
        )
    print(
        f"Saved {featurization_type} data to {output_dict_of_dfs[featurization_type]['df'].shape[0]} rows in {output_dict_of_dfs[featurization_type]['df'].shape[1]} columns"
    )

Concatenating 347 dataframes for middle_slice
Saved middle_slice data to 283 rows in 967 columns
Concatenating 347 dataframes for zmax_proj
Saved zmax_proj data to 791 rows in 967 columns
