In [None]:
import pathlib

import pandas as pd
from IPython.display import Markdown, display

try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

    # Get the current working directory
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")

In [2]:
sc_profiles_path = pathlib.Path(
    root_dir, "data/all_patient_profiles/sc_profiles.parquet"
).resolve(strict=True)
organoid_profiles_path = pathlib.Path(
    root_dir, "data/all_patient_profiles/organoid_profiles.parquet"
).resolve(strict=True)

In [3]:
sc_df = pd.read_parquet(sc_profiles_path)
organoid_df = pd.read_parquet(organoid_profiles_path)

In [4]:
# get the unique combinations of patient and treatment
compounds_counts = (
    organoid_df.groupby(["patient", "treatment"])
    .size()
    .to_frame()
    .reset_index()
    .rename(columns={0: "count"})
    .drop(columns="count")
    .groupby(["patient"])
    .size()
    .to_frame()
    .reset_index()
    .rename(columns={0: "number_of_compounds"})
)

In [5]:
# get the unique combinations of patient and treatment
treatments_counts = (
    organoid_df.groupby(["patient", "treatment", "dose"])
    .size()
    .to_frame()
    .reset_index()
    .rename(columns={0: "count"})
    .drop(columns="count")
    .groupby(["patient"])
    .size()
    .to_frame()
    .reset_index()
    .rename(columns={0: "number_of_treatments"})
)

In [6]:
# get the unique combinations of patient and treatment
well_counts = (
    sc_df.groupby(["patient", "Well"])
    .size()
    .to_frame()
    .reset_index()
    .rename(columns={0: "count"})
    .drop(columns="count")
    .groupby(["patient"])
    .size()
    .to_frame()
    .reset_index()
    .rename(columns={0: "number_of_wells"})
)

In [7]:
well_fov_counts = (
    sc_df.groupby(["patient", "treatment", "Well", "image_set"])
    .size()
    .to_frame()
    .reset_index()
    .rename(columns={0: "count"})
    .drop(columns="count")
    .groupby(["patient"])
    .size()
    .to_frame()
    .reset_index()
    .rename(columns={0: "number_of_well_fovs"})
)

In [8]:
organoid_counts = (
    sc_df.groupby(["patient", "treatment", "Well", "image_set", "parent_organoid"])
    .size()
    .to_frame()
    .reset_index()
    .rename(columns={0: "count"})
    .drop(columns="count")
    .loc[sc_df["parent_organoid"] != -1]
    .groupby(["patient"])
    .size()
    .to_frame()
    .reset_index()
    .rename(columns={0: "number_of_organoids"})
)

In [9]:
sc_df.groupby(
    [
        "patient",
        "treatment",
        "Well",
        "image_set",
    ]
).size().to_frame().reset_index().rename(columns={0: "count"})
# sum the count at the patient level
sc_df.groupby(
    ["patient", "treatment", "Well", "image_set"]
).size().to_frame().reset_index().rename(columns={0: "count"}).groupby(
    ["patient"]
).sum()

single_cell_counts = (
    sc_df.groupby(["patient", "treatment", "Well", "image_set"])
    .size()
    .to_frame()
    .reset_index()
    .rename(columns={0: "number_of_single_cells"})
    .groupby(["patient"])
    .sum()
    .drop(columns=["treatment", "Well", "image_set"])
    .reset_index()
)

In [None]:
table1 = pd.merge(
    pd.merge(
        pd.merge(
            pd.merge(
                pd.merge(compounds_counts, treatments_counts, on="patient"),
                well_counts,
                on="patient",
            ),
            well_fov_counts,
            on="patient",
        ),
        organoid_counts,
        on="patient",
    ),
    single_cell_counts,
    on="patient",
)

Unnamed: 0,patient,number_of_compounds,number_of_treatments,number_of_wells,number_of_well_fovs,number_of_organoids,number_of_single_cells
0,NF0014,17,21,49,98,121,1401
1,NF0016,16,20,43,77,85,376
2,NF0018,17,21,44,106,116,624
3,NF0021,17,21,50,307,350,2716
4,NF0030,17,21,49,177,196,1328
5,NF0040,22,26,60,337,382,2858
6,SARCO219,17,21,50,132,110,426
7,SARCO361,17,21,50,270,256,1503


In [11]:
table1["theoretical_number_of_compounds"] = pd.Series([17, 17, 17, 17, 17, 22, 17, 17])
table1["theoretical_number_of_treatments"] = pd.Series([21, 21, 21, 21, 21, 26, 21, 21])
table1["theoretical_number_of_well_fovs"] = pd.Series(
    [104, 125, 162, 350, 208, 420, 199, 350]
)
table1["Tumor type"] = pd.Series(
    [
        "Neurofirbroma (subcutaneous)",
        "Neurofibroma, diffuse and plexiform",
        "Neurofibroma, plexiform, non-contiguous cutaneous",
        "Neurofibroma, plexiform, non-contiguous cutaneous",
        "Myopericytoma",
        "Schwannoma, with degeneration",
        "MPNST in association with plexiform neurofibroma",
        "Sarcoma, treated",
    ]
)
tumor_type = table1.pop("Tumor type")
table1.insert(1, "Tumor type", tumor_type)
table1

Unnamed: 0,patient,Tumor type,number_of_compounds,number_of_treatments,number_of_wells,number_of_well_fovs,number_of_organoids,number_of_single_cells,theoretical_number_of_compounds,theoretical_number_of_treatments,theoretical_number_of_well_fovs
0,NF0014,Neurofirbroma (subcutaneous),17,21,49,98,121,1401,17,21,104
1,NF0016,"Neurofibroma, diffuse and plexiform",16,20,43,77,85,376,17,21,125
2,NF0018,"Neurofibroma, plexiform, non-contiguous cutaneous",17,21,44,106,116,624,17,21,162
3,NF0021,"Neurofibroma, plexiform, non-contiguous cutaneous",17,21,50,307,350,2716,17,21,350
4,NF0030,Myopericytoma,17,21,49,177,196,1328,17,21,208
5,NF0040,"Schwannoma, with degeneration",22,26,60,337,382,2858,22,26,420
6,SARCO219,MPNST in association with plexiform neurofibroma,17,21,50,132,110,426,17,21,199
7,SARCO361,"Sarcoma, treated",17,21,50,270,256,1503,17,21,350


In [None]:
# convert the table to a markdown table
table1_md = table1.to_markdown(index=False, tablefmt="pipe")

'| patient   | Tumor type                                        |   number_of_compounds |   number_of_treatments |   number_of_wells |   number_of_well_fovs |   number_of_organoids |   number_of_single_cells |   theoretical_number_of_compounds |   theoretical_number_of_treatments |   theoretical_number_of_well_fovs |\n|:----------|:--------------------------------------------------|----------------------:|-----------------------:|------------------:|----------------------:|----------------------:|-------------------------:|----------------------------------:|-----------------------------------:|----------------------------------:|\n| NF0014    | Neurofirbroma (subcutaneous)                      |                    17 |                     21 |                49 |                    98 |                   121 |                     1401 |                                17 |                                 21 |                               104 |\n| NF0016    | Neurofibroma, diffuse and

In [None]:
# Display the table nicely formatted

# Display as formatted markdown
print("Rendered Table:")
display(Markdown(table1_md))

Rendered Table:


| patient   | Tumor type                                        |   number_of_compounds |   number_of_treatments |   number_of_wells |   number_of_well_fovs |   number_of_organoids |   number_of_single_cells |   theoretical_number_of_compounds |   theoretical_number_of_treatments |   theoretical_number_of_well_fovs |
|:----------|:--------------------------------------------------|----------------------:|-----------------------:|------------------:|----------------------:|----------------------:|-------------------------:|----------------------------------:|-----------------------------------:|----------------------------------:|
| NF0014    | Neurofirbroma (subcutaneous)                      |                    17 |                     21 |                49 |                    98 |                   121 |                     1401 |                                17 |                                 21 |                               104 |
| NF0016    | Neurofibroma, diffuse and plexiform               |                    16 |                     20 |                43 |                    77 |                    85 |                      376 |                                17 |                                 21 |                               125 |
| NF0018    | Neurofibroma, plexiform, non-contiguous cutaneous |                    17 |                     21 |                44 |                   106 |                   116 |                      624 |                                17 |                                 21 |                               162 |
| NF0021    | Neurofibroma, plexiform, non-contiguous cutaneous |                    17 |                     21 |                50 |                   307 |                   350 |                     2716 |                                17 |                                 21 |                               350 |
| NF0030    | Myopericytoma                                     |                    17 |                     21 |                49 |                   177 |                   196 |                     1328 |                                17 |                                 21 |                               208 |
| NF0040    | Schwannoma, with degeneration                     |                    22 |                     26 |                60 |                   337 |                   382 |                     2858 |                                22 |                                 26 |                               420 |
| SARCO219  | MPNST in association with plexiform neurofibroma  |                    17 |                     21 |                50 |                   132 |                   110 |                      426 |                                17 |                                 21 |                               199 |
| SARCO361  | Sarcoma, treated                                  |                    17 |                     21 |                50 |                   270 |                   256 |                     1503 |                                17 |                                 21 |                               350 |