In [1]:
# Generate stacks for picking examples of non-edge and edge cells based on LDA axis

In [2]:
!pwd
!date

/allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/cvapipe_analysis/local_staging_notebooks/MovieEdges
Mon May 30 20:34:09 PDT 2022


In [3]:
import os
import sys
import pickle
import importlib
import concurrent
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm.notebook import tqdm
from skimage import io as skio
import matplotlib.pyplot as plt
from aicscytoparam import cytoparam
from sklearn.decomposition import PCA
from aicsshparam import shtools, shparam
from aicsimageio import AICSImage
from aicsimageio.writers import OmeTiffWriter
from cvapipe_analysis.tools import io, viz, general, controller, shapespace, plotting

sys.path.insert(1, '../tools')
import common

KeyboardInterrupt: 

In [None]:
# Controller form cvapipe_analysis
path_config = Path("/allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/cvapipe_analysis/")
config = general.load_config_file(path_config)
control = controller.Controller(config)
device = io.LocalStagingIO(control)
df = device.load_step_manifest("preprocessing")
print(df.shape, control.get_staging())

In [None]:
space = shapespace.ShapeSpace(control)
space.execute(df)

In [None]:
# local_staging_variance_edges is generated by using the output dataframe from the
# mapping process to filter out not matched cells from the full dataset.
dsname = "edges"
path_cvapipe = Path(control.get_staging()).parent
datasets = {
    dsname: {
        "control": f"{path_cvapipe}/local_staging_variance_edges",
        "perturbed": f"{path_cvapipe}/local_staging_edge_cells_midpoint_expanded"
    }}

In [None]:
smapper = shapespace.ShapeSpaceMapper(space, output_folder="./")
smapper.use_full_base_dataset()
smapper.set_make_plots_off()
smapper.set_distance_threshold(1e10)
smapper.map(datasets)
df_map = smapper.result
df_map.head()

### Control and Device for each shape matched dataset (control and perturbed)

In [None]:
dsmanagers = common.setup_cvapipe_for_matched_dataset(config, datasets[dsname])

### Load representations and compute PCA and compute LDA

In [None]:
importlib.reload(common)
if not os.path.exists("./data.pkl"):
    print("Loading PILRs and running LDA...")
    pca_lda = common.run_lda_analysis(df_map=df_map, managers=dsmanagers)
    with open("data.pkl", "wb") as fp:
        pickle.dump(pca_lda, fp)
else:
    print("Loading pre-computed LDA...")
    with open("data.pkl", "rb") as fp:
        pca_lda = pickle.load(fp)

In [None]:
# importlib.reload(common)
# pca_lda = common.run_lda_analysis(df_map=df_map.loc[pd.IndexSlice[:,"HIST1H2BJ",:]], managers=dsmanagers)

### Finding cells nearest the mean of the two populations

In [None]:
ncells = 15
CellIds = common.find_cells_nearest_the_mean_of_the_two_populations(
    pca_lda = pca_lda,
    ncells = ncells
)

### Load single cell data and generates stacks for selected cells

In [None]:
df_full = device.load_step_manifest("loaddata")
df_pt = dsmanagers["perturbed"]["device"].load_step_manifest("loaddata")

In [None]:
for gene, axes in pca_lda.items():
    imgs_ct = common.load_multiple_single_cell_images_fast(
        selection = {gene: [idx for ds, idx in CellIds[gene] if ds==0]},
        df = df_full,
        control = control,
        redirect = True
    )
    imgs_pt = common.load_multiple_single_cell_images_fast(
        selection = {gene: [idx for ds, idx in CellIds[gene] if ds==1]},
        df = df_pt,
        control = dsmanagers["perturbed"]["control"],
        redirect = False,
        alignment = False
    )
    allcellsinfo = []
    for ds, dsname, imgs in zip([0, 1], ["Control", "Perturbed"], [imgs_ct, imgs_pt]):
        for cellinfo in imgs[gene]:
            CellId = cellinfo["CellId"]
            allcellsinfo.append({
                "CellId": CellId,
                "gene": gene,
                "dataset": dsname,
                "lda": axes.at[(ds, CellId), "LDA"],
                "img": cellinfo["img"]
            })
    stack = common.get_stack_with_single_cell_from_two_populations(allcellsinfo, scale=4, bbox=400)
    skio.imsave(f"SingleCellsLDA-{gene}.tif", stack)

In [None]:
common.now("complete")