In [1]:
import argparse
import pathlib

import pandas as pd

try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

# Get the current working directory
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")

In [2]:
if not in_notebook:
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        "--well_fov",
        type=str,
        required=True,
        help="Well and field of view to process, e.g. 'A01_1'",
    )
    argparser.add_argument(
        "--patient",
        type=str,
        required=True,
        help="Patient ID to process, e.g. 'P01'",
    )
    args = argparser.parse_args()
    well_fov = args.well_fov
    patient = args.patient
else:
    well_fov = "C2-1"
    patient = "NF0014"

In [3]:
def centroid_within_bbox_detection(
    centroid: tuple,
    bbox: tuple,
) -> bool:
    """
    Check if the centroid is within the bbox

    Parameters
    ----------
    centroid : tuple
        Centroid of the object in the order of (z, y, x)
        Order of the centroid is important
    bbox : tuple
        Where the bbox is in the order of (z_min, y_min, x_min, z_max, y_max, x_max)
        Order of the bbox is important

    Returns
    -------
    bool
        True if the centroid is within the bbox, False otherwise
    """
    z_min, y_min, x_min, z_max, y_max, x_max = bbox
    z, y, x = centroid
    # check if the centroid is within the bbox
    if (
        z >= z_min
        and z <= z_max
        and y >= y_min
        and y <= y_max
        and x >= x_min
        and x <= x_max
    ):
        return True
    else:
        return False

### Pathing

In [4]:
# input paths
sc_profile_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/0.converted_profiles/{well_fov}/sc_profiles_{well_fov}.parquet"
).resolve(strict=True)
organoid_profile_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/0.converted_profiles/{well_fov}/organoid_profiles_{well_fov}.parquet"
).resolve(strict=True)
# output paths
sc_profile_output_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/0.converted_profiles/{well_fov}/sc_profiles_{well_fov}_related.parquet"
).resolve()
organoid_profile_output_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/0.converted_profiles/{well_fov}/organoid_profiles_{well_fov}_related.parquet"
).resolve()

In [5]:
sc_profile_df = pd.read_parquet(sc_profile_path)
organoid_profile_df = pd.read_parquet(organoid_profile_path)
print(f"Single-cell profile shape: {sc_profile_df.shape}")
print(f"Organoid profile shape: {organoid_profile_df.shape}")

Single-cell profile shape: (1, 1926)
Organoid profile shape: (0, 642)


In [6]:
# initialize the parent organoid column
sc_profile_df.insert(2, "parent_organoid", -1)

In [7]:
x_y_z_sc_colnames = [
    x for x in sc_profile_df.columns if "area" in x.lower() and "center" in x.lower()
]
print(
    f"The nuclei centroids in the single-cell profile are in the columns:\n{x_y_z_sc_colnames}"
)

The nuclei centroids in the single-cell profile are in the columns:
['Area.Size.Shape_Nuclei_CENTER.X', 'Area.Size.Shape_Nuclei_CENTER.Y', 'Area.Size.Shape_Nuclei_CENTER.Z', 'Area.Size.Shape_Cell_CENTER.X', 'Area.Size.Shape_Cell_CENTER.Y', 'Area.Size.Shape_Cell_CENTER.Z', 'Area.Size.Shape_Cytoplasm_CENTER.X', 'Area.Size.Shape_Cytoplasm_CENTER.Y', 'Area.Size.Shape_Cytoplasm_CENTER.Z']


In [8]:
organoid_bbox_colnames = [
    x for x in organoid_profile_df.columns if "Area" in x and ("MIN" in x or "MAX" in x)
]
organoid_bbox_colnames = sorted(organoid_bbox_colnames)
print(f"The organoid bounding boxes are in the columns:\n{organoid_bbox_colnames}")

The organoid bounding boxes are in the columns:
['Area.Size.Shape_Organoid_MAX.X', 'Area.Size.Shape_Organoid_MAX.Y', 'Area.Size.Shape_Organoid_MAX.Z', 'Area.Size.Shape_Organoid_MIN.X', 'Area.Size.Shape_Organoid_MIN.Y', 'Area.Size.Shape_Organoid_MIN.Z']


In [9]:
# loop thorugh the organoids first as there are less organoids than single-cells
for organoid_index, organoid_row in organoid_profile_df.iterrows():
    # get the organoid bbox - should be alphabetically sorted
    # define the organoid bbox in the order of:
    # (z_min, y_min, x_min, z_max, y_max, x_max)
    organoid_bbox = (
        organoid_row[organoid_bbox_colnames[5]],
        organoid_row[organoid_bbox_colnames[4]],
        organoid_row[organoid_bbox_colnames[3]],
        organoid_row[organoid_bbox_colnames[2]],
        organoid_row[organoid_bbox_colnames[1]],
        organoid_row[organoid_bbox_colnames[0]],
    )
    # loop through the single-cells and check if the centroid is within the organoid bbox
    for sc_index, sc_row in sc_profile_df.iterrows():
        # get the single-cell centroid - should be alphabetically sorted
        # define the single-cell centroid in the order of (z, y, x)
        sc_centroid = (
            sc_row[x_y_z_sc_colnames[2]],
            sc_row[x_y_z_sc_colnames[1]],
            sc_row[x_y_z_sc_colnames[0]],
        )

        if centroid_within_bbox_detection(sc_centroid, organoid_bbox):
            sc_profile_df.at[sc_index, "parent_organoid"] = organoid_row["object_id"]
        else:
            # if the centroid is not within the organoid bbox, set the parent organoid to -1
            sc_profile_df.at[sc_index, "parent_organoid"] = -1

### Add single-cell counts for each organoid

In [10]:
organoid_sc_counts = (
    sc_profile_df["parent_organoid"]
    .value_counts()
    .to_frame(name="single_cell_count")
    .reset_index()
)
# merge the organoid profile with the single-cell counts
organoid_profile_df = pd.merge(
    organoid_profile_df,
    organoid_sc_counts,
    left_on="object_id",
    right_on="parent_organoid",
    how="left",
).drop(columns=["parent_organoid"])
sc_count = organoid_profile_df.pop("single_cell_count")
organoid_profile_df.insert(2, "single_cell_count", sc_count)

In [11]:
if organoid_profile_df.empty:
    # add a row with Na values
    organoid_profile_df.loc[len(organoid_profile_df)] = [None] * len(
        organoid_profile_df.columns
    )
    organoid_profile_df["image_set"] = well_fov

In [12]:
if sc_profile_df.empty:
    # add a row with Na values
    sc_profile_df.loc[len(sc_profile_df)] = [None] * len(sc_profile_df.columns)
    sc_profile_df["image_set"] = well_fov

### Save the profiles

In [13]:
organoid_profile_df.to_parquet(organoid_profile_output_path, index=False)
organoid_profile_df.head()

Unnamed: 0,object_id,image_set,single_cell_count,Colocalization_Organoid_AGP.BF_MEAN.CORRELATION.COEFF,Colocalization_Organoid_AGP.BF_MEDIAN.CORRELATION.COEFF,Colocalization_Organoid_AGP.BF_MIN.CORRELATION.COEFF,Colocalization_Organoid_AGP.BF_MAX.CORRELATION.COEFF,Colocalization_Organoid_AGP.BF_MEAN.MANDERS.COEFF.M1,Colocalization_Organoid_AGP.BF_MEDIAN.MANDERS.COEFF.M1,Colocalization_Organoid_AGP.BF_MIN.MANDERS.COEFF.M1,...,Area.Size.Shape_Organoid_MIN.X,Area.Size.Shape_Organoid_MAX.X,Area.Size.Shape_Organoid_MIN.Y,Area.Size.Shape_Organoid_MAX.Y,Area.Size.Shape_Organoid_MIN.Z,Area.Size.Shape_Organoid_MAX.Z,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER,Area.Size.Shape_Organoid_EQUIVALENT.DIAMETER,Area.Size.Shape_Organoid_SURFACE.AREA
0,,C2-1,,,,,,,,,...,,,,,,,,,,


In [14]:
sc_profile_df.to_parquet(sc_profile_output_path, index=False)
sc_profile_df.head()

Unnamed: 0,object_id,image_set,parent_organoid,Area.Size.Shape_Nuclei_VOLUME,Area.Size.Shape_Nuclei_CENTER.X,Area.Size.Shape_Nuclei_CENTER.Y,Area.Size.Shape_Nuclei_CENTER.Z,Area.Size.Shape_Nuclei_BBOX.VOLUME,Area.Size.Shape_Nuclei_MIN.X,Area.Size.Shape_Nuclei_MAX.X,...,Texture_Cytoplasm_Mito_Difference.Entropy_256.3,Texture_Cytoplasm_Mito_Difference.Variance_256.3,Texture_Cytoplasm_Mito_Entropy_256.3,Texture_Cytoplasm_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Cytoplasm_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Cytoplasm_Mito_Inverse.Difference.Moment_256.3,Texture_Cytoplasm_Mito_Sum.Average_256.3,Texture_Cytoplasm_Mito_Sum.Entropy_256.3,Texture_Cytoplasm_Mito_Sum.Variance_256.3,Texture_Cytoplasm_Mito_Variance_256.3
0,170,C2-1,-1,1145.0,1238.155518,386.541473,0.0,1734.0,1222.0,1256.0,...,0.005223,0.003889,0.005704,-0.171692,0.019883,0.999746,0.023496,0.005329,3.476826,1.398775
