In [1]:
import logging
import os
import pathlib
import sys
import time
import urllib

import numpy as np
import pandas as pd
import psutil
from arg_parsing_utils import check_for_missing_args, parse_args
from loading_classes import ImageSetLoader, ObjectLoader
from notebook_init_utils import bandicoot_check, init_notebook
from resource_profiling_util import get_mem_and_time_profiling
from sammed3d_featurizer import call_SAMMed3D_pipeline

root_dir, in_notebook = init_notebook()
from notebook_init_utils import bandicoot_check, init_notebook

image_base_dir = bandicoot_check(
    pathlib.Path(os.path.expanduser("~/mnt/bandicoot")).resolve(), root_dir
)

In [2]:
# set up logging
logging.basicConfig(level=logging.INFO)

In [3]:
if not in_notebook:
    arguments_dict = parse_args()
    patient = arguments_dict["patient"]
    well_fov = arguments_dict["well_fov"]
    compartment = arguments_dict["compartment"]
    channel = arguments_dict["channel"]
    input_subparent_name = arguments_dict["input_subparent_name"]
    mask_subparent_name = arguments_dict["mask_subparent_name"]
    output_features_subparent_name = arguments_dict["output_features_subparent_name"]

else:
    well_fov = "C4-2"
    patient = "NF0014_T1"
    compartment = "Cell"
    channel = "Mito"
    input_subparent_name = "zstack_images"
    mask_subparent_name = "segmentation_masks"
    output_features_subparent_name = "extracted_features"

image_set_path = pathlib.Path(
    f"{image_base_dir}/data/{patient}/{input_subparent_name}/{well_fov}/"
)
mask_set_path = pathlib.Path(
    f"{image_base_dir}/data/{patient}/{mask_subparent_name}/{well_fov}/"
)

output_parent_path = pathlib.Path(
    f"{image_base_dir}/data/{patient}/{output_features_subparent_name}/{well_fov}/"
)
output_parent_path.mkdir(parents=True, exist_ok=True)

In [4]:
sam3dmed_checkpoint_url = (
    "https://huggingface.co/blueyo0/SAM-Med3D/resolve/main/sam_med3d_turbo.pth"
)
sam3dmed_checkpoint_path = pathlib.Path("../models/sam-med3d-turbo.pth").resolve()
if not sam3dmed_checkpoint_path.exists():
    sam3dmed_checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
    urllib.request.urlretrieve(sam3dmed_checkpoint_url, str(sam3dmed_checkpoint_path))

In [5]:
channel_n_compartment_mapping = {
    "DNA": "405",
    "AGP": "488",
    "ER": "555",
    "Mito": "640",
    "BF": "TRANS",
    "Nuclei": "nuclei_",
    "Cell": "cell_",
    "Cytoplasm": "cytoplasm_",
    "Organoid": "organoid_",
}

In [6]:
start_time = time.time()
# get starting memory (cpu)
start_mem = psutil.Process(os.getpid()).memory_info().rss / 1024**2

In [7]:
image_set_loader = ImageSetLoader(
    image_set_path=image_set_path,
    mask_set_path=mask_set_path,
    anisotropy_spacing=(1, 0.1, 0.1),
    channel_mapping=channel_n_compartment_mapping,
)
image_set_loader.image_set_dict.keys()

dict_keys(['DNA', 'AGP', 'ER', 'Mito', 'BF', 'Cell', 'Cytoplasm', 'Nuclei', 'Organoid'])

In [8]:
# load the objects for the compartment and channel of interest
object_loader = ObjectLoader(
    image_set_loader.image_set_dict[channel],
    image_set_loader.image_set_dict[compartment],
    channel,
    compartment,
)
#  redirect stdout to logging
logging.info("Starting SAM-Med3D feature extraction")
feature_dict = call_SAMMed3D_pipeline(
    object_loader=object_loader,
    SAMMed3D_model_path=str(sam3dmed_checkpoint_path),
    feature_type="cls",
)

INFO:root:Starting SAM-Med3D feature extraction


creating model SAM-Med3D
try to load pretrained weights from /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/3.cellprofiling/models/sam-med3d-turbo.pth
creating model SAM-Med3D
try to load pretrained weights from /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/3.cellprofiling/models/sam-med3d-turbo.pth
creating model SAM-Med3D
try to load pretrained weights from /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/3.cellprofiling/models/sam-med3d-turbo.pth
creating model SAM-Med3D
try to load pretrained weights from /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/3.cellprofiling/models/sam-med3d-turbo.pth
creating model SAM-Med3D
try to load pretrained weights from /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/3.cellprofiling/models/sam-med3d-turbo.pth
creating model SAM-Med3D
try to load pretrained weights from /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/3.cellprofiling/models/sam-med3d-turbo.pth
creating model S

In [9]:
final_df = pd.DataFrame(feature_dict)
try:
    final_df["feature_name"] = (
        final_df["feature_name"]
        + "_"
        + final_df["compartment"]
        + "_"
        + final_df["channel"]
    )
    final_df["feature_name"] = final_df["feature_name"].str.replace("_feature_", ".")
    final_df = final_df.drop(columns=["compartment", "channel"])
except Exception as e:
    logging.error(f"Probably a zero object error: {e}")
final_df.head()

Unnamed: 0,object_id,feature_name,value,feature_type
0,2,SAMMed3D.0_Cell_Mito,-0.247702,cls
1,2,SAMMed3D.1_Cell_Mito,-0.312534,cls
2,2,SAMMed3D.2_Cell_Mito,-0.048873,cls
3,2,SAMMed3D.3_Cell_Mito,-0.001652,cls
4,2,SAMMed3D.4_Cell_Mito,-0.09437,cls


In [10]:
final_df = final_df.pivot(
    index="object_id", columns="feature_name", values="value"
).reset_index()
# drop the multiindexing from pivot
final_df.columns.name = None
final_df.head()

Unnamed: 0,object_id,SAMMed3D.0_Cell_Mito,SAMMed3D.100_Cell_Mito,SAMMed3D.101_Cell_Mito,SAMMed3D.102_Cell_Mito,SAMMed3D.103_Cell_Mito,SAMMed3D.104_Cell_Mito,SAMMed3D.105_Cell_Mito,SAMMed3D.106_Cell_Mito,SAMMed3D.107_Cell_Mito,...,SAMMed3D.91_Cell_Mito,SAMMed3D.92_Cell_Mito,SAMMed3D.93_Cell_Mito,SAMMed3D.94_Cell_Mito,SAMMed3D.95_Cell_Mito,SAMMed3D.96_Cell_Mito,SAMMed3D.97_Cell_Mito,SAMMed3D.98_Cell_Mito,SAMMed3D.99_Cell_Mito,SAMMed3D.9_Cell_Mito
0,2,-0.247702,-0.007566,-0.168385,0.179898,0.047746,-0.097566,-0.21315,-0.03836,-0.080081,...,-0.108127,0.022254,-0.010769,0.035341,0.05587,0.064976,0.187349,0.302837,0.209006,0.072749
1,18,-0.236761,-0.006064,-0.153226,0.166874,0.042067,-0.09953,-0.227857,-0.039392,-0.071842,...,-0.117605,0.02737,-0.01083,0.030849,0.046936,0.059656,0.194709,0.312344,0.206264,0.102732
2,28,-0.264912,0.006871,-0.162844,0.140498,0.060915,-0.074216,-0.214327,-0.033809,-0.073356,...,-0.117052,0.035639,-0.01076,0.029725,0.052916,0.05928,0.212212,0.323864,0.222271,0.112499
3,29,-0.218653,-0.001755,-0.156938,0.168258,0.057769,-0.094659,-0.207445,-0.027397,-0.058211,...,-0.124168,0.020642,-0.01079,0.029033,0.052901,0.065579,0.188313,0.323091,0.201179,0.103487
4,33,-0.222147,-0.00414,-0.143358,0.177048,0.077148,-0.117513,-0.220157,-0.032146,-0.044084,...,-0.086353,0.039858,-0.010819,0.035369,0.040118,0.058744,0.174516,0.30503,0.20929,0.109308


In [11]:
# prepend compartment and channel to column names
for col in final_df.columns:
    if col not in ["object_id"]:
        final_df[col] = final_df[col].astype(np.float32)

In [12]:
# de-fragment
final_df = final_df.copy()
final_df.insert(1, "image_set", image_set_loader.image_set_name)

output_file = pathlib.Path(
    output_parent_path / f"SAMMed3D_{compartment}_{channel}_GPU_features.parquet"
)
final_df.to_parquet(output_file, index=False)
final_df.head()

Unnamed: 0,object_id,image_set,SAMMed3D.0_Cell_Mito,SAMMed3D.100_Cell_Mito,SAMMed3D.101_Cell_Mito,SAMMed3D.102_Cell_Mito,SAMMed3D.103_Cell_Mito,SAMMed3D.104_Cell_Mito,SAMMed3D.105_Cell_Mito,SAMMed3D.106_Cell_Mito,...,SAMMed3D.91_Cell_Mito,SAMMed3D.92_Cell_Mito,SAMMed3D.93_Cell_Mito,SAMMed3D.94_Cell_Mito,SAMMed3D.95_Cell_Mito,SAMMed3D.96_Cell_Mito,SAMMed3D.97_Cell_Mito,SAMMed3D.98_Cell_Mito,SAMMed3D.99_Cell_Mito,SAMMed3D.9_Cell_Mito
0,2,C4-2,-0.247702,-0.007566,-0.168385,0.179898,0.047746,-0.097566,-0.21315,-0.03836,...,-0.108127,0.022254,-0.010769,0.035341,0.05587,0.064976,0.187349,0.302837,0.209006,0.072749
1,18,C4-2,-0.236761,-0.006064,-0.153226,0.166874,0.042067,-0.09953,-0.227857,-0.039392,...,-0.117605,0.02737,-0.01083,0.030849,0.046936,0.059656,0.194709,0.312344,0.206264,0.102732
2,28,C4-2,-0.264912,0.006871,-0.162844,0.140498,0.060915,-0.074216,-0.214327,-0.033809,...,-0.117052,0.035639,-0.01076,0.029725,0.052916,0.05928,0.212212,0.323864,0.222271,0.112499
3,29,C4-2,-0.218653,-0.001755,-0.156938,0.168258,0.057769,-0.094659,-0.207445,-0.027397,...,-0.124168,0.020642,-0.01079,0.029033,0.052901,0.065579,0.188313,0.323091,0.201179,0.103487
4,33,C4-2,-0.222147,-0.00414,-0.143358,0.177048,0.077148,-0.117513,-0.220157,-0.032146,...,-0.086353,0.039858,-0.010819,0.035369,0.040118,0.058744,0.174516,0.30503,0.20929,0.109308


In [13]:
end_mem = psutil.Process(os.getpid()).memory_info().rss / 1024**2
end_time = time.time()
get_mem_and_time_profiling(
    start_mem=start_mem,
    end_mem=end_mem,
    start_time=start_time,
    end_time=end_time,
    feature_type="SAMMed3D",
    well_fov=well_fov,
    patient_id=patient,
    channel="DNA",
    compartment=compartment,
    CPU_GPU="GPU",
    output_file_dir=pathlib.Path(
        f"{root_dir}/data/{patient}/extracted_features/run_stats/{well_fov}_SAMMed3D_{channel}_{compartment}_GPU.parquet"
    ),
)


        Memory and time profiling for the run:

        Patient ID: NF0014_T1

        Well and FOV: C4-2

        Feature type: SAMMed3D

        CPU/GPU: GPU")

        Memory usage: 2485.09 MB

        Time:

        --- %s seconds --- % 119.27980327606201

        --- %s minutes --- % 1.9879967212677

        --- %s hours --- % 0.033133278687795004
    


True