In [None]:
from tifffile import imread, imsave
import os, re, sys, csv
import numpy as np
import matplotlib.pyplot as pyp
from skimage.morphology import ball
from skimage.segmentation import find_boundaries
from skimage.measure import regionprops, regionprops_table, label
from skimage.segmentation import clear_border
import cv2
import copy
import pandas as pd
from scipy import ndimage as ndi
import napari
import scanpy as sc
import seaborn as sns
from skimage.future.graph import RAG, rag_mean_color
import math
sys.path.append('~/3D_IMC_paper/Python/python_3d_imc_tools')
from io_files import image_filepath_for_3D_stack
import phenograph
import pickle


In [None]:
## function from skimage package https://github.com/scikit-image/scikit-image/blob/main/skimage/measure/_regionprops.py#L869-L1161

COL_DTYPES = {
    'area': int,
    'bbox': int,
    'bbox_area': int,
    'moments_central': float,
    'centroid': float,
    'convex_area': int,
    'convex_image': object,
    'coords': object,
    'eccentricity': float,
    'equivalent_diameter': float,
    'euler_number': int,
    'extent': float,
    'feret_diameter_max': float,
    'filled_area': int,
    'filled_image': object,
    'moments_hu': float,
    'image': object,
    'inertia_tensor': float,
    'inertia_tensor_eigvals': float,
    'intensity_image': object,
    'label': int,
    'local_centroid': float,
    'major_axis_length': float,
    'max_intensity': int,
    'mean_intensity': float,
    'min_intensity': int,
    'minor_axis_length': float,
    'moments': float,
    'moments_normalized': float,
    'orientation': float,
    'perimeter': float,
    'slice': object,
    'solidity': float,
    'weighted_moments_central': float,
    'weighted_centroid': float,
    'weighted_moments_hu': float,
    'weighted_local_centroid': float,
    'weighted_moments': float,
    'weighted_moments_normalized': float
}

OBJECT_COLUMNS = {
    'image', 'coords', 'convex_image', 'slice',
    'filled_image', 'intensity_image'
}

def  skimage_props_to_dict(regions, properties=('label', 'bbox'), separator='-'):
    """Convert image region properties list into a column dictionary."""

    out = {}
    n = len(regions)
    for prop in properties:
        r = regions[0]
        rp = getattr(r, prop)
        if prop in COL_DTYPES:
            dtype = COL_DTYPES[prop]
        else:
            func = r._extra_properties[prop]
            dtype = _infer_regionprop_dtype(
                func,
                intensity=r._intensity_image is not None,
                ndim=r.image.ndim,
            )
        column_buffer = np.zeros(n, dtype=dtype)

        # scalars and objects are dedicated one column per prop
        # array properties are raveled into multiple columns
        # for more info, refer to notes 1
        if np.isscalar(rp) or prop in OBJECT_COLUMNS or dtype is np.object_:
            for i in range(n):
                column_buffer[i] = regions[i][prop]
            out[prop] = np.copy(column_buffer)
        else:
            if isinstance(rp, np.ndarray):
                shape = rp.shape
            else:
                shape = (len(rp),)

            for ind in np.ndindex(shape):
                for k in range(n):
                    loc = ind if len(ind) > 1 else ind[0]
                    column_buffer[k] = regions[k][prop][loc]
                modified_prop = separator.join(map(str, (prop,) + ind))
                out[modified_prop] = np.copy(column_buffer)
    return out

### Set inputs

In [None]:
# INPUT: single chanel TIFFs from the whole 3D model to use for 

#folder for registeration i.e an image per slice
input_base = '~/3D_registred_tiffs/IMC_fullStack_registred/imageJ_registration/full_model_aligned/'
cell_labels_input = input_base + "measured_mask_final_segmentation_hwatershed_500.00_90%.tif"

results_file = input_base +'model201710_singleCell_analysis.h5ad'  # the file that will store the analysis results
panCK_mean_expression_image = input_base + "panCK_mean_labels_image.tif"
cluster_labels_image = input_base + "cluster_labels_image.tif"

In [None]:
adata = sc.read_h5ad(results_file)
o = list(adata.obs['phenograph'])
c = list(adata.obs['cell_labels'])

cluster_cell_label_dictionary = {}

for item in range(len(o)):
    dict_key = int(c[item])
    cluster_cell_label_dictionary[dict_key]= o[item]

In [None]:
cell_labels = imread(cell_labels_input)
panCK_mean_X = imread(panCK_mean_expression_image)
cluster_labels = imread(cluster_labels_image)

In [None]:
with napari.gui_qt():
    viewer = napari.view_image(cell_labels[60:, :350, :200] , scale = [2,1,1])

In [None]:
ROI_image = cell_labels[60:, :350, :200]
ROI_cluster_im = copy.deepcopy(ROI_image)
labels_ROI = np.unique(ROI_image)

Set up dictionaries to link cell labels to clusters

In [None]:

for i in labels_ROI:
    if i ==0:
        continue
    else:
        ROI_cluster  _im[ROI_cluster_im == i] = int(cluster_cell_label_dictionary[i])

In [None]:
with napari.gui_qt():
    viewer = napari.view_image(panCK_mean_X, scale = [2,1,1])
    #viewer.add_labels(ROI_cluster_im, scale = [2,1,1])

In [None]:
index_labels = []
for x in labels_ROI:
    if x == 0:
        continue
    else:
        i = c.index(x)
        index_labels.append(i)

#### overlay panCK expression for the ROI of interest. 
Only visaulize a part of the model where the invasive clusters are present. Click on each invasive cell to extract the cell labels

In [None]:
marker_of_interest = 'panCK'
dict_mean = {}

for x in index_labels:
    object_label = adata.obs['cell_labels'][x]
    a_marker = adata[adata.obs['cell_labels']== object_label, marker_of_interest]
    dict_mean[object_label] = float(a_marker.X)

In [None]:
mean_marker_image = np.zeros(ROI_image.shape)
for key in dict_mean.keys():
    mean_marker_image[ROI_image == key] = round(dict_mean[key],3)

In [None]:
tumor_cells_ROI = copy.deepcopy(ROI_image)
for x in dict_mean.keys():
    panCK_val = dict_mean[x]
    if panCK_val < 0.05:
        tumor_cells_ROI[tumor_cells_ROI == x] = 0

In [None]:
roi_name = input_base + "panCK_ROI_marker_xpression_INVASIVE_image.tif"
roi_labels_name = input_base + "ROI_INVASIVE_labels_image.tif"

imsave(roi_labels_name,tumor_cells_ROI) 
imsave(roi_name,mean_marker_image) 


In [None]:
mean_marker_image = imread(input_base + "panCK_ROI_marker_xpression_INVASIVE_image.tif")
tumor_cells_ROI = imread(input_base + "ROI_INVASIVE_labels_image.tif")
panCK = imread(input_base + 'panCK_ROI_image.tif')

In [None]:
with napari.gui_qt():
    viewer = napari.view_image(panCK[60:, :350, :200] , scale = [1,1,1])
    label_layer = viewer.add_labels(tumor_cells_ROI, scale = [1,1,1])
    @label_layer.mouse_drag_callbacks.append
    def callback(layer, event):
        #print(event)
        print(layer._value)  # (0,0) is the center of the upper left pixel

#### Use list of objects of interest

In [None]:
ooo = open("invasive_object_list_of_labels", 'r')
o0 = set(ooo)
invasive_obi = list(o0)
invasive_obi = invasive_obi[1:]

In [None]:
len(invasive_obi)

Create a new adata object that only contains the invasive cells to then cluster the cells and visualize with heatmap

In [None]:
adata = sc.read_h5ad(results_file)
o = list(adata.obs['phenograph'])
c = list(adata.obs['cell_labels'])

cluster_cell_label_dictionary = {}

for item in range(len(o)):
    dict_key = int(c[item])
    cluster_cell_label_dictionary[dict_key]= o[item]

In [None]:
index_labels = []
for x in invasive_obi:
        i = c.index(x)
        index_labels.append(i)

In [None]:
invasive_ROI = adata[index_labels]

In [None]:
sc.pp.neighbors(invasive_ROI, n_neighbors=10,random_state = 111)

Use python implementation of phenograph: https://github.com/jacoblevine/PhenoGraph

In [None]:
communities, graph, Q = phenograph.cluster(invasive_ROI.X, k=10, primary_metric= 'manhattan', seed = 10)
print(np.unique(communities))

In [None]:
largest = max(np.unique(communities))+1
communities = [largest if x == 0 else x for x in communities]
invasive_ROI.obs['phenograph'] = pd.Categorical(communities)

In [None]:
sc.set_figure_params(dpi=100, color_map = 'viridis_r')
sc.settings.verbosity = 1

In [None]:
sc.pl.matrixplot(invasive_ROI,invasive_ROI.var_names, 'phenograph', dendrogram=True, cmap='Blues', 
                 colorbar_title='column scaled\nexpression', save = '_invasive_cells_model201710_median_expression_phenograph.pdf' )

In [None]:
vp = sc.pl.stacked_violin(invasive_ROI, var_names=invasive_ROI.var_names, groupby= 'phenograph', colorbar_title='Median expression',dendrogram=True, standard_scale=None, stripplot=True, 
                          jitter=False, size=1,return_fig=True, ax=None)  #row_palette=cluster_colors
vp.add_totals()

In [None]:
vp.savefig('~/figures/stacked_violin__INVASIVE_cells_model201710_expression_phenograph.png')

#### Add invasive clusters as seperate cluster grouping to the initial adata contatining all the cells

In [None]:
#categorize clusters into epithelial, basal etc to compare marker expression between invasive cells and all other cells in the model
invasive_cluster_assignment = []
epithelial_clusters = [4,6,3,37,2,1,7,5, 11, 20, 23, 24, 31,32, 33,34,35]
basal_clusters = [19]
for item in range(len(o)):
    obi = int(c[item])
    if obi in invasive_obi:
        invasive_cluster_assignment.append('invasive')
    else:
        c_label = int(o[item])
        if c_label in epithelial_clusters:
            invasive_cluster_assignment.append('epithelial')
        elif c_label in basal_clusters:
            invasive_cluster_assignment.append('basal')
        else:
            invasive_cluster_assignment.append('other')

In [None]:
adata.obs['invasive'] = pd.Categorical(invasive_cluster_assignment)

In [None]:
sc.set_figure_params(dpi=150, fontsize=12,figsize='6,6')

sc.pl.violin(adata, ['E/P-Cadherin','panCK','HER2 (bis)','CK7'], groupby='invasive', order = ['invasive', 'epithelial', 'basal', 'other'])

In [None]:
sc.set_figure_params(dpi=150, fontsize=12,figsize='6,6')

sc.pl.violin(adata, ['CK8/18','CK19','CK5','CK14'], groupby='invasive', order = ['invasive', 'epithelial', 'basal', 'other'])

In [None]:
sc.set_figure_params(dpi=150, fontsize=12,figsize='6,6')

sc.pl.violin(adata, [ 'CD44', 'CD138',  'Vimentin','pS6'], groupby='invasive',  order = ['invasive', 'epithelial', 'basal', 'other'])

In [None]:
sc.set_figure_params(dpi=150, fontsize=12,figsize='6,6')

sc.pl.violin(adata, ['Ki-67','cPARP+cCasp3', 'phospho-H3', 'Ir193'], groupby='invasive',  order = ['invasive', 'epithelial', 'basal', 'other'])

### Calculate invasive cluster neighbors
First calculate Euclidean distance between the centroid of all the objects in the model, but only recorde the cell label for objects whose distance is within 50um., Then for the invasive cells extract the cell label for cells that are within 50um radius.

In [None]:
object_diameter_im=regionprops(cell_labels)
object_centroid_dict =dict()
object_centroid_dict = skimage_props_to_dict(object_diameter_im, properties=['label','centroid'])

In [None]:
centroid_table = pd.DataFrame.from_dict(object_centroid_dict)
centroid_dict_3d = centroid_table.to_dict('index')

In [None]:
distance_between_cells_dict = {}

for key in centroid_dict_3d.keys():
    
    cell_label = centroid_dict_3d[key]['label']
    distance_between_cells_dict[cell_label] = []

    z = centroid_dict_3d[key]['centroid-0']
    y = centroid_dict_3d[key]['centroid-1']
    x = centroid_dict_3d[key]['centroid-2']

    for other in centroid_dict_3d.keys():

        if other == key:
            continue
        else:

            cell_label_other = centroid_dict_3d[other]['label']
            z_other = centroid_dict_3d[other]['centroid-0']
            y_other = centroid_dict_3d[other]['centroid-1']
            x_other = centroid_dict_3d[other]['centroid-2']

            distance_between_cells = math.sqrt((z_other-z)**2 + (y_other-y)**2 + (x_other-x)**2)
            
            if distance_between_cells <= 50:
                distance_between_cells_dict[cell_label].append(cell_label_other) 



In [None]:
import pickle
def save_obj(obj, name ):
    with open(name, 'wb+') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(name, 'rb') as f:
        return pickle.load(f)

In [None]:
out_dict_name = input_base + 'neighbors_50um_3D.pkl'
save_obj(distance_between_cells_dict, out_dict_name)

In [None]:
out_dict_name = input_base + 'neighbors_50um_3D.pkl'

distance_between_cells_dict = load_obj(out_dict_name) 

In [None]:
invasive_TME = []
for key in distance_between_cells_dict.keys():
    if key in invasive_obi:
            components = distance_between_cells_dict[key]
            for element in components:
                invasive_TME.append(element)

In [None]:
invasive_TME_clusters = {}
for item in invasive_TME:
    invasive_TME_clusters[item] = str(cluster_cell_label_dictionary[item])

In [None]:
tme_table = pd.DataFrame.from_dict(invasive_TME_clusters, 'index')

In [None]:
tme_table.value_counts().sort_values().plot(kind = 'barh', figsize=(10,10))

In [None]:
invasive_TME_assignment = []
epithelial_clusters = [4,6,3,37,2,1,7,5, 11, 20, 23, 24, 31,32, 33,34,35]
basal_clusters = [19]
stroma_clusters = [8,9,29,14,10,15,12,17,22, 26, 27]
for item in range(len(o)):
    obi = int(c[item])
    c_label = int(o[item])
    if obi in invasive_TME:
        if c_label in epithelial_clusters:
            invasive_TME_assignment.append('invasive_epithelial')
        elif c_label in basal_clusters:
            invasive_TME_assignment.append('invasive_basal')
        elif c_label in stroma_clusters:
            invasive_TME_assignment.append('invasive_stroma')
        else:
            invasive_TME_assignment.append('invasive_other')    
    else:
        if c_label in epithelial_clusters:
            invasive_TME_assignment.append('epithelial')
        elif c_label in basal_clusters:
            invasive_TME_assignment.append('basal')
        elif c_label in stroma_clusters:
            invasive_TME_assignment.append('stroma')    
        else:
            invasive_TME_assignment.append('other')

In [None]:
adata.obs['invasive_TME'] = pd.Categorical(invasive_TME_assignment)

Plot marker expression for the invasive TME

In [None]:
sc.set_figure_params(dpi=100, fontsize=5,figsize='6,6')
sc.settings.verbosity = 1

In [None]:
sc.pl.violin(adata, ['E/P-Cadherin','panCK','CK7','CK8/18', 'CK14'], groupby='invasive_TME')

In [None]:
sc.pl.violin(adata, ['CK19','CK5', 'Vimentin'], groupby='invasive_TME')

In [None]:
sc.pl.violin(adata, ['Ki-67','cPARP+cCasp3'], groupby='invasive_TME')

In [None]:
sc.pl.violin(adata, ['CD68', 'Histone H3', 'phospho-H3',], groupby='invasive_TME')

In [None]:
sc.pl.violin(adata, [ 'SMA','vWF+ CD31','Vimentin'], groupby='invasive_TME')

In [None]:
sc.pl.violin(adata, ['CD138','pS6' , 'Collagen I' ], groupby='invasive_TME')

In [None]:
sc.set_figure_params(dpi=150, fontsize=8,figsize='10,10',format='pdf')
sc.pl.violin(adata, ['pS6'], groupby='invasive_TME',order = ['basal', 'invasive_basal', 'epithelial', 'invasive_epithelial', 'stroma', 'invasive_stroma','other', 'invasive_other' ], save = '_model201710_invasive_TME_ps6.pdf')

###### End of notebook