In [1]:
import logging
import os
import pathlib
import sys
import time
import urllib

import numpy as np
import pandas as pd
import psutil
from arg_parsing_utils import check_for_missing_args, parse_args
from loading_classes import ImageSetLoader, ObjectLoader
from notebook_init_utils import bandicoot_check, init_notebook
from resource_profiling_util import get_mem_and_time_profiling
from sammed3d_featurizer import call_SAMMed3D_pipeline

root_dir, in_notebook = init_notebook()

In [2]:
# set up logging
logging.basicConfig(level=logging.INFO)

In [None]:
if not in_notebook:
    arguments_dict = parse_args()
    patient = arguments_dict["patient"]
    well_fov = arguments_dict["well_fov"]
    compartment = arguments_dict["compartment"]
    channel = arguments_dict["channel"]
    input_subparent_name = arguments_dict["input_subparent_name"]
    mask_subparent_name = arguments_dict["mask_subparent_name"]
    output_features_subparent_name = arguments_dict["output_features_subparent_name"]

else:
    well_fov = "C4-2"
    patient = "NF0014_T1"
    compartment = "Nuclei"
    channel = "DNA"
    input_subparent_name = "zstack_images"
    mask_subparent_name = "segmentation_masks"
    output_features_subparent_name = "extracted_features"

image_set_path = pathlib.Path(
    f"{root_dir}/data/{patient}/{input_subparent_name}/{well_fov}/"
)
mask_set_path = pathlib.Path(
    f"{root_dir}/data/{patient}/{mask_subparent_name}/{well_fov}/"
)

output_parent_path = pathlib.Path(
    f"{root_dir}/data/{patient}/{output_features_subparent_name}/{well_fov}/"
)
output_parent_path.mkdir(parents=True, exist_ok=True)

In [4]:
sam3dmed_checkpoint_url = (
    "https://huggingface.co/blueyo0/SAM-Med3D/resolve/main/sam_med3d_turbo.pth"
)
sam3dmed_checkpoint_path = pathlib.Path("../models/sam-med3d-turbo.pth").resolve()
if not sam3dmed_checkpoint_path.exists():
    sam3dmed_checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
    urllib.request.urlretrieve(sam3dmed_checkpoint_url, str(sam3dmed_checkpoint_path))

In [5]:
channel_n_compartment_mapping = {
    "DNA": "405",
    "AGP": "488",
    "ER": "555",
    "Mito": "640",
    "BF": "TRANS",
    "Nuclei": "nuclei_",
    "Cell": "cell_",
    "Cytoplasm": "cytoplasm_",
    "Organoid": "organoid_",
}

In [6]:
start_time = time.time()
# get starting memory (cpu)
start_mem = psutil.Process(os.getpid()).memory_info().rss / 1024**2

In [None]:
image_set_loader = ImageSetLoader(
    image_set_path=image_set_path,
    mask_set_path=mask_set_path,
    anisotropy_spacing=(1, 0.1, 0.1),
    channel_mapping=channel_n_compartment_mapping,
)

In [8]:
# load the objects for the compartment and channel of interest
object_loader = ObjectLoader(
    image_set_loader.image_set_dict[channel],
    image_set_loader.image_set_dict[compartment],
    channel,
    compartment,
)
#  redirect stdout to logging
logging.info("Starting SAM-Med3D feature extraction")
feature_dict = call_SAMMed3D_pipeline(
    object_loader=object_loader,
    SAMMed3D_model_path=str(sam3dmed_checkpoint_path),
    feature_type="cls",
)

INFO:root:Starting SAM-Med3D feature extraction


creating model SAM-Med3D
try to load pretrained weights from /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/3.cellprofiling/models/sam-med3d-turbo.pth
creating model SAM-Med3D
try to load pretrained weights from /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/3.cellprofiling/models/sam-med3d-turbo.pth
creating model SAM-Med3D
try to load pretrained weights from /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/3.cellprofiling/models/sam-med3d-turbo.pth
creating model SAM-Med3D
try to load pretrained weights from /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/3.cellprofiling/models/sam-med3d-turbo.pth
creating model SAM-Med3D
try to load pretrained weights from /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/3.cellprofiling/models/sam-med3d-turbo.pth
creating model SAM-Med3D
try to load pretrained weights from /home/lippincm/Documents/GFF_3D_organoid_profiling_pipeline/3.cellprofiling/models/sam-med3d-turbo.pth
creating model S

In [9]:
final_df = pd.DataFrame(feature_dict)

final_df["feature_name"] = (
    final_df["feature_name"] + "_" + final_df["compartment"] + "_" + final_df["channel"]
)
final_df["feature_name"] = final_df["feature_name"].str.replace("_feature_", ".")
final_df = final_df.drop(columns=["compartment", "channel"])
final_df.head()

Unnamed: 0,object_id,feature_name,value,feature_type
0,5,SAMMed3D.0_Nuclei_DNA,-0.203949,cls
1,5,SAMMed3D.1_Nuclei_DNA,-0.248421,cls
2,5,SAMMed3D.2_Nuclei_DNA,-0.070039,cls
3,5,SAMMed3D.3_Nuclei_DNA,-0.006313,cls
4,5,SAMMed3D.4_Nuclei_DNA,-0.076351,cls


In [10]:
final_df = final_df.pivot(
    index="object_id", columns="feature_name", values="value"
).reset_index()
# drop the multiindexing from pivot
final_df.columns.name = None
final_df

Unnamed: 0,object_id,SAMMed3D.0_Nuclei_DNA,SAMMed3D.100_Nuclei_DNA,SAMMed3D.101_Nuclei_DNA,SAMMed3D.102_Nuclei_DNA,SAMMed3D.103_Nuclei_DNA,SAMMed3D.104_Nuclei_DNA,SAMMed3D.105_Nuclei_DNA,SAMMed3D.106_Nuclei_DNA,SAMMed3D.107_Nuclei_DNA,...,SAMMed3D.91_Nuclei_DNA,SAMMed3D.92_Nuclei_DNA,SAMMed3D.93_Nuclei_DNA,SAMMed3D.94_Nuclei_DNA,SAMMed3D.95_Nuclei_DNA,SAMMed3D.96_Nuclei_DNA,SAMMed3D.97_Nuclei_DNA,SAMMed3D.98_Nuclei_DNA,SAMMed3D.99_Nuclei_DNA,SAMMed3D.9_Nuclei_DNA
0,5,-0.203949,0.028844,-0.166061,0.153101,0.04368,-0.087269,-0.188015,-0.020713,-0.001387,...,-0.133094,0.061736,-0.010518,0.038423,0.028575,0.042,0.17459,0.230862,0.223262,0.128034
1,20,-0.278051,-0.020236,-0.162448,0.153393,0.044911,-0.087497,-0.218098,-0.050803,-0.060617,...,-0.080153,0.049333,-0.010862,0.03013,0.049745,0.062702,0.204064,0.320472,0.238666,0.130208
2,24,-0.22665,-0.00107,-0.16208,0.170267,0.060208,-0.099462,-0.202039,-0.028035,-0.05531,...,-0.095105,0.041351,-0.010749,0.033226,0.047211,0.06378,0.177373,0.319819,0.209997,0.112176
3,1542,-0.258233,-0.026216,-0.142235,0.167606,0.048505,-0.110014,-0.219207,-0.030135,-0.069411,...,-0.117387,0.045312,-0.010851,0.036892,0.04237,0.047627,0.194583,0.295221,0.213595,0.100978
4,3341,-0.227754,-0.011071,-0.144284,0.181907,0.060766,-0.121817,-0.203476,-0.029114,-0.055409,...,-0.117203,0.038857,-0.010814,0.039062,0.038011,0.054979,0.174576,0.286723,0.199126,0.09607
5,5140,-0.243665,-0.014954,-0.145341,0.166374,0.054387,-0.102879,-0.22292,-0.026584,-0.074159,...,-0.122543,0.032626,-0.010829,0.035142,0.047591,0.054502,0.193609,0.301051,0.204718,0.109065
6,6682,-0.214001,-0.000656,-0.15899,0.173545,0.06285,-0.100144,-0.203619,-0.028583,-0.052918,...,-0.120573,0.024988,-0.010812,0.030044,0.054865,0.066105,0.186416,0.319229,0.197685,0.101291
7,10280,-0.210727,0.006057,-0.16787,0.176184,0.058173,-0.09998,-0.20635,-0.032694,-0.055056,...,-0.115794,0.022421,-0.010764,0.029464,0.051584,0.070201,0.182761,0.317782,0.196978,0.098476
8,11822,-0.213915,0.003211,-0.158758,0.177573,0.05962,-0.102131,-0.211948,-0.038269,-0.057685,...,-0.118615,0.023173,-0.010787,0.029702,0.054902,0.067518,0.188797,0.331509,0.18994,0.097339
9,13621,-0.217956,0.002657,-0.164009,0.17422,0.051384,-0.095521,-0.211827,-0.035406,-0.061408,...,-0.120637,0.025061,-0.010799,0.027827,0.051448,0.06627,0.189301,0.318215,0.202341,0.102454


In [11]:
# prepend compartment and channel to column names
for col in final_df.columns:
    if col not in ["object_id"]:
        final_df[col] = final_df[col].astype(np.float32)

In [12]:
# de-fragment
final_df = final_df.copy()
final_df.insert(1, "image_set", image_set_loader.image_set_name)

output_file = pathlib.Path(
    output_parent_path / f"SAMMed3D_{compartment}_{channel}_features.parquet"
)
final_df.to_parquet(output_file, index=False)
final_df.head()

Unnamed: 0,object_id,image_set,SAMMed3D.0_Nuclei_DNA,SAMMed3D.100_Nuclei_DNA,SAMMed3D.101_Nuclei_DNA,SAMMed3D.102_Nuclei_DNA,SAMMed3D.103_Nuclei_DNA,SAMMed3D.104_Nuclei_DNA,SAMMed3D.105_Nuclei_DNA,SAMMed3D.106_Nuclei_DNA,...,SAMMed3D.91_Nuclei_DNA,SAMMed3D.92_Nuclei_DNA,SAMMed3D.93_Nuclei_DNA,SAMMed3D.94_Nuclei_DNA,SAMMed3D.95_Nuclei_DNA,SAMMed3D.96_Nuclei_DNA,SAMMed3D.97_Nuclei_DNA,SAMMed3D.98_Nuclei_DNA,SAMMed3D.99_Nuclei_DNA,SAMMed3D.9_Nuclei_DNA
0,5,C4-2,-0.203949,0.028844,-0.166061,0.153101,0.04368,-0.087269,-0.188015,-0.020713,...,-0.133094,0.061736,-0.010518,0.038423,0.028575,0.042,0.17459,0.230862,0.223262,0.128034
1,20,C4-2,-0.278051,-0.020236,-0.162448,0.153393,0.044911,-0.087497,-0.218098,-0.050803,...,-0.080153,0.049333,-0.010862,0.03013,0.049745,0.062702,0.204064,0.320472,0.238666,0.130208
2,24,C4-2,-0.22665,-0.00107,-0.16208,0.170267,0.060208,-0.099462,-0.202039,-0.028035,...,-0.095105,0.041351,-0.010749,0.033226,0.047211,0.06378,0.177373,0.319819,0.209997,0.112176
3,1542,C4-2,-0.258233,-0.026216,-0.142235,0.167606,0.048505,-0.110014,-0.219207,-0.030135,...,-0.117387,0.045312,-0.010851,0.036892,0.04237,0.047627,0.194583,0.295221,0.213595,0.100978
4,3341,C4-2,-0.227754,-0.011071,-0.144284,0.181907,0.060766,-0.121817,-0.203476,-0.029114,...,-0.117203,0.038857,-0.010814,0.039062,0.038011,0.054979,0.174576,0.286723,0.199126,0.09607


In [13]:
end_mem = psutil.Process(os.getpid()).memory_info().rss / 1024**2
end_time = time.time()
get_mem_and_time_profiling(
    start_mem=start_mem,
    end_mem=end_mem,
    start_time=start_time,
    end_time=end_time,
    feature_type="AreaSizeShape",
    well_fov=well_fov,
    patient_id=patient,
    channel="DNA",
    compartment=compartment,
    CPU_GPU="GPU",
    output_file_dir=pathlib.Path(
        f"{root_dir}/data/{patient}/extracted_features/run_stats/{well_fov}_AreaSizeShape_DNA_{compartment}_GPU.parquet"
    ),
)


        Memory and time profiling for the run:

        Patient ID: NF0014_T1

        Well and FOV: C4-2

        Feature type: AreaSizeShape

        CPU/GPU: GPU")

        Memory usage: 2388.04 MB

        Time:

        --- %s seconds --- % 75.45903062820435

        --- %s minutes --- % 1.2576505104700724

        --- %s hours --- % 0.020960841841167872
    


True