In [1]:
# Correlation matrices saved as Excel file
# interaction map 8d, bins, shape modes
# concordance 8d, bins, shape modes
# concordance edge, non-edge, i1, m1, i2, m2 

In [2]:
!pwd
!date

/allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/cvapipe_analysis/local_staging_notebooks/Tables
Mon May 30 19:27:49 PDT 2022


In [3]:
import os
import sys
import importlib
import concurrent
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm.notebook import tqdm
from skimage import io as skio
import matplotlib.pyplot as plt
from aicscytoparam import cytoparam
from aicsshparam import shtools, shparam
from aicsimageio import AICSImage
from aicsimageio.writers import OmeTiffWriter
from cvapipe_analysis.tools import io, viz, general, controller, shapespace, plotting

sys.path.insert(1, '../tools')
import common

In [4]:
# Controller form cvapipe_analysis
path_config = Path("/allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/cvapipe_analysis/")
config = general.load_config_file(path_config)
control = controller.Controller(config)
device = io.LocalStagingIO(control)
df = device.load_step_manifest("loaddata")
print(df.shape, control.get_staging())

/allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/cvapipe_analysis/local_staging_variance


In [5]:
# Removing bad NUP plate. This has to be fixed when paper is submitted <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
df = df.loc[df.PlateId!=3500002920]
df_size = df.groupby("structure_name").size()
print(df.shape)

In [15]:
# Number of FOVs
df.FOVId.nunique()

  0%|          | 0/8 [00:00<?, ?it/s]

In [8]:
# Number of outliers outliers
df_out = pd.read_csv("../../local_staging_variance/preprocessing/outliers.csv", index_col="CellId")
df_out.loc[df_out.Outlier!="No"].shape

(1044, 2)

In [9]:
# Mitotic vs interphase
df_int = df.loc[df.cell_stage=="M0"]
df_int_size = df_int.groupby("structure_name").size()
df_mito = df.loc[df.cell_stage!="M0"]
df_mito_size = df_mito.groupby("structure_name").size()

In [10]:
# Analysis dataset (interphase + no outliers)
df_analysis = device.load_step_manifest("preprocessing")
df_analysis_size = df_analysis.groupby("structure_name").size()
print(df_analysis.shape)

(202847, 1290)


In [11]:
# Shape modes dataset (No cells between 0-1th and 99-100th percentiles) 
space = shapespace.ShapeSpace(control)
space.execute(df_analysis)
df_shapemodes = space.shape_modes
print(df_shapemodes.shape)

(175148, 8)


In [12]:
# Cells inside the 8d sphere
df_sphere_size = []
df_sphere = space.get_cells_inside_ndsphere_of_radius()

In [13]:
df_sphere_size = pd.Series(dict([(row.structure, len(row.CellIds)) for index, row in df_sphere.iterrows()]), name="structure_name")

### Other datasets

In [14]:
importlib.reload(common)
dsname = "edges"
path_cvapipe = Path(control.get_staging()).parent
datasets = {
    "edges": {
        "control": f"{path_cvapipe}/local_staging_variance_edges",
        "perturbed": f"{path_cvapipe}/local_staging_edge_cells_midpoint_expanded"
    },
    "m1m2": {
        "control": f"{path_cvapipe}/local_staging_variance_m1m2",
        "perturbed": f"{path_cvapipe}/local_staging_m1m2"
    },
    "m3": {
        "control": f"{path_cvapipe}/local_staging_variance_m3",
        "perturbed": f"{path_cvapipe}/local_staging_m3"
    }}
managers = {}
for dsname, ds in datasets.items():
    managers[dsname] = common.setup_cvapipe_for_matched_dataset(config, ds)

In [15]:
# Edge cells dataset
df_edge_ct = managers["edges"]["control"]["device"].load_step_manifest("preprocessing")
df_edge_pt = managers["edges"]["perturbed"]["device"].load_step_manifest("preprocessing")
df_edge_ct_size = df_edge_ct.groupby("structure_name").size()
df_edge_pt_size = df_edge_pt.groupby("structure_name").size()
print(df_edge_ct.shape, df_edge_pt.shape)

(4477, 1290) (5169, 1255)


In [16]:
# M1 cells dataset
df_m1m2_ct = managers["m1m2"]["control"]["device"].load_step_manifest("preprocessing")
df_m1m2_pt = managers["m1m2"]["perturbed"]["device"].load_step_manifest("preprocessing")
df_m1m2_ct_size = df_m1m2_ct.groupby("structure_name").size()
df_m1m2_pt_size = df_m1m2_pt.groupby("structure_name").size()
print(df_m1m2_ct.shape, df_m1m2_pt.shape)

(1969, 1290) (2201, 1255)


In [18]:
# M2 cells dataset
df_m3_ct = managers["m3"]["control"]["device"].load_step_manifest("preprocessing")
df_m3_pt = managers["m3"]["perturbed"]["device"].load_step_manifest("preprocessing")
df_m3_ct_size = df_m3_ct.groupby("structure_name").size()
df_m3_pt_size = df_m3_pt.groupby("structure_name").size()
print(df_m3_ct.shape, df_m3_pt.shape)

(877, 1290) (981, 1255)


### Merging all columns together

In [32]:
df_ds = pd.DataFrame([
    df_size,
    df_int_size,
    df_mito_size,
    df_analysis_size,
    df_sphere_size,
    df_edge_ct_size,
    df_edge_pt_size,
    df_m1m2_ct_size,
    df_m1m2_pt_size,
    df_m3_ct_size,
    df_m3_pt_size
]).T
df_ds.columns=pd.MultiIndex.from_tuples([
    ("single cell dataset", "total"),
    ("single cell dataset", "interphase"),
    ("single cell dataset", "mitotic"),
    ("single cell dataset", "analysis"),
    ("single cell dataset", "8d sphere"),
    ("shape-matched dataset","non-edge cells"),
    ("shape-matched dataset","edge cells"),
    ("shape-matched dataset","interphase"),
    ("shape-matched dataset","prophase"),
    ("shape-matched dataset","interphase"),
    ("shape-matched dataset","early prometaphase")
])
df_ds = df_ds.loc[control.get_gene_names()]

In [33]:
df_work = df.groupby("structure_name").first()
df_work = df_work.WorkflowId
for k, v in df_work.items():
    df_work[k] = v.split("'")[1]

In [34]:
df_ds.insert(loc=0, column="Workflow", value=df_work)
# Order that we want to display (by acquisition)
df_ds.insert(loc=1, column="Acquisition Order", value=[10,17,25,24,21,3,22,5,23,20,19,2,11,7,1,12,13,9,4,18,14,6,15,8,16])
df_ds.insert(loc=2, column="Structure", value=control.get_structure_names())

In [35]:
df_ds = df_ds.append(df_ds.sum().rename('Total'))
df_ds

Unnamed: 0_level_0,Workflow,Acquisition Order,Structure,single cell dataset,single cell dataset,single cell dataset,single cell dataset,single cell dataset,shape-matched dataset,shape-matched dataset,shape-matched dataset,shape-matched dataset,shape-matched dataset,shape-matched dataset
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,total,interphase,mitotic,analysis,8d sphere,non-edge cells,edge cells,interphase,prophase,interphase.1,early prometaphase
FBL,Pipeline 4.1,10,nucleoli [DFC),10446,9986,460,9955,1554,260,310,68,71,24,26
NPM1,Pipeline 4.2,17,nucleoli [GC),12550,11865,685,11827,1819,314,358,105,119,59,67
SON,Pipeline 4.4,25,nuclear speckles,2983,2840,143,2837,369,125,149,51,57,9,9
SMC1A,Pipeline 4.4,24,cohesins,2392,2287,105,2275,550,33,35,29,31,6,6
HIST1H2BJ,Pipeline 4.4,21,histones,15877,15093,784,15091,2876,125,138,136,147,65,73
LMNB1,Pipeline 4,3,nuclear envelope,12409,11895,514,11865,2718,183,195,89,97,40,47
NUP153,Pipeline 4.4,22,nuclear pores,17738,16854,884,16819,3293,116,120,180,199,70,73
SEC61B,Pipeline 4,5,ER [Sec61 beta),6714,6418,296,6410,747,167,192,67,74,22,27
ATP2A2,Pipeline 4.4,23,ER [SERCA2),10177,9720,457,9706,2303,45,48,70,80,29,34
SLC25A17,Pipeline 4.4,20,peroxisomes,1997,1853,144,1853,304,41,49,23,25,12,15


In [36]:
df_ds.to_csv("TableS1_main.csv")

### Number of cells along shape modes

In [171]:
df_bin = []
mpIdc = control.get_center_map_point_index()
for sm in control.get_shape_modes():
    space.set_active_shape_mode(sm, digitize=True)
    for mpId in control.get_map_point_indexes():
        space.set_active_map_point_index(mpId)
        serie_bin = df.loc[space.get_active_cellids()].groupby("structure_name").size()
        serie_bin.name = (sm, mpId-mpIdc)
        df_bin.append(serie_bin)
df_bin = pd.DataFrame(df_bin).T
df_bin = df_bin.loc[control.get_gene_names()]

In [173]:
df_bin["Structure"] = control.get_structure_names()
df_bin.set_index("Structure", drop=True)

Unnamed: 0_level_0,NUC_MEM_PC1,NUC_MEM_PC1,NUC_MEM_PC1,NUC_MEM_PC1,NUC_MEM_PC1,NUC_MEM_PC1,NUC_MEM_PC1,NUC_MEM_PC1,NUC_MEM_PC1,NUC_MEM_PC2,...,NUC_MEM_PC7,NUC_MEM_PC8,NUC_MEM_PC8,NUC_MEM_PC8,NUC_MEM_PC8,NUC_MEM_PC8,NUC_MEM_PC8,NUC_MEM_PC8,NUC_MEM_PC8,NUC_MEM_PC8
Unnamed: 0_level_1,-4,-3,-2,-1,0,1,2,3,4,-4,...,4,-4,-3,-2,-1,0,1,2,3,4
Structure,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
nucleoli [DFC),94,295,642,1096,1530,1761,1488,1015,502,240,...,416,409,480,870,1495,1724,1484,940,561,460
nucleoli [GC),123,385,887,1443,1854,2078,1695,1111,424,189,...,467,509,624,1138,1804,1998,1678,1137,619,493
nuclear speckles,580,524,383,293,252,157,74,39,7,40,...,95,58,96,246,461,573,421,258,131,65
cohesins,193,336,430,467,334,172,85,28,6,48,...,78,62,86,213,351,515,409,252,108,55
histones,748,1490,2314,2554,2289,2045,1355,586,133,509,...,600,622,798,1639,2491,2962,2425,1430,686,461
nuclear envelope,829,1374,1802,2025,1900,1457,833,348,109,157,...,396,304,477,1122,2002,2580,2093,1222,534,343
nuclear pores,382,952,1918,2633,3111,2786,1914,913,309,597,...,615,753,903,1745,2791,3194,2613,1551,840,528
ER [Sec61 beta),97,173,356,593,814,942,934,798,511,129,...,238,323,354,594,898,1113,886,556,269,225
ER [SERCA2),683,1478,2140,2078,1430,675,279,116,17,405,...,348,243,379,842,1532,2149,1929,1119,485,218
peroxisomes,29,122,225,317,338,305,176,74,29,88,...,68,75,110,186,290,322,274,176,100,82


In [174]:
df_bin.to_csv("TableS1_shapemodes.csv")