# This is the notebook for cell filtering and lumen distance calculations

## Cell annotation, filtering and lumen distance calculations

In [12]:
import pandas as pd
import numpy as np
import tifffile
import dask.array as da
from dask.array.image import imread
import os
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.ndimage import binary_dilation
import copy
from scipy.ndimage import distance_transform_edt
import matplotlib.pyplot as plt

class SamplePaths:
    def __init__(self, sample):
        self.sample = sample
        self.sample_final_name = sample_list_final[sample_list.index(sample)]
        self.image_path = image_path_list[sample_list.index(sample)]

        #self.segmentation_mask_path = f'/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/labeled_masks/segmentation/cellpose_lunaphore/{sample}_clahe.ome_cp_masks.tif'
        self.reannotated_regions_path = f'/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/labeled_masks/reannotated/masks/{sample}_regions.tif'
        self.reannotated_endocardium_path = f'/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/labeled_masks/reannotated/masks/{sample}_endocardium.tif'
        #self.injury_mask_path = f'/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/labeled_masks/region_masks/{sample}_injury.tif'
        self.markers_path = f'/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/marker_files/AF_corrected/{sample}_markers.csv'
        self.markers = pd.read_csv(self.markers_path)

sample_list = ["Control_12", "Control_13", "Control_14", "4h_96", "4h_97", "24h_83", "24h_86", "48h_76", "48h_79"]

sample_matching = pd.read_csv('/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/sample_name_matching.csv')
sample_list_final = [sample_matching.loc[sample_matching['sample_kb'] == sample, 'sample_final'].values[0] for sample in sample_list]
image_path_list = [sample_matching.loc[sample_matching['sample_kb'] == sample, 'image_path'].values[0] for sample in sample_list]
region_matching = pd.read_csv('/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/region_matching.csv')

In [13]:
cell_data = pd.read_csv('/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/cell_table_size_normalized_cell_labels.csv') # Pixie output - file on Synapse

lumen_distance_binsize = 80 # in pixels
minimum_cell_size = 120 # exact value in micrometers as used in Molecular Cartography data processing 200 * 0.138 / 0.23 = 120 (200 px * 0.138 um/px (MC pixel size) / 0.23 um/px (COMET PA pixel size)= 120 um)
maximum_cell_size = 120000 # exact value in micrometers as used in Molecular Cartography data processing


In [14]:
cell_data['timepoint'] = cell_data['fov'].str.split('_').str[0] # adds timepont column

# initialize values for new columns
cell_data['region'] = -1
cell_data['region_name'] = 'unknown'

cell_data['refined_cell_type'] = 'unknown'
cell_data['final_cell_type'] = 'unknown'

cell_data['endocardial_annotation'] = False
cell_data['exclude_annotation'] = False
cell_data['artefact'] = False

cell_data['distance_from_lumen'] = -1
cell_data['lumen_bin'] = -1

cell_data['size_filter'] = False

In [15]:
for sample in sample_list:
    sample_paths = SamplePaths(sample)
    print(sample_paths.reannotated_regions_path)
    reannotated_regions = tifffile.imread(sample_paths.reannotated_regions_path)
    cell_data.loc[cell_data['fov'] == sample, 'region'] = reannotated_regions[cell_data.loc[cell_data['fov'] == sample, 'Y_centroid'].astype(int), cell_data.loc[cell_data['fov'] == sample, 'X_centroid'].astype(int)]

    distance_from_lumen = distance_transform_edt(reannotated_regions != 1)
    cell_data.loc[cell_data['fov'] == sample, 'distance_from_lumen'] = distance_from_lumen[cell_data.loc[cell_data['fov'] == sample, 'Y_centroid'].astype(int), cell_data.loc[cell_data['fov'] == sample, 'X_centroid'].astype(int)]

    reannotated_endocardium = tifffile.imread(sample_paths.reannotated_endocardium_path)
    cell_data.loc[cell_data['fov'] == sample, 'endocardial_annotation'] = reannotated_endocardium[cell_data.loc[cell_data['fov'] == sample, 'Y_centroid'].astype(int), cell_data.loc[cell_data['fov'] == sample, 'X_centroid'].astype(int)]

/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/labeled_masks/reannotated/masks/Control_12_regions.tif


 6359.60887162]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  cell_data.loc[cell_data['fov'] == sample, 'distance_from_lumen'] = distance_from_lumen[cell_data.loc[cell_data['fov'] == sample, 'Y_centroid'].astype(int), cell_data.loc[cell_data['fov'] == sample, 'X_centroid'].astype(int)]
  cell_data.loc[cell_data['fov'] == sample, 'endocardial_annotation'] = reannotated_endocardium[cell_data.loc[cell_data['fov'] == sample, 'Y_centroid'].astype(int), cell_data.loc[cell_data['fov'] == sample, 'X_centroid'].astype(int)]


/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/labeled_masks/reannotated/masks/Control_13_regions.tif
/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/labeled_masks/reannotated/masks/Control_14_regions.tif
/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/labeled_masks/reannotated/masks/4h_96_regions.tif
/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/labeled_masks/reannotated/masks/4h_97_regions.tif
/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/labeled_masks/reannotated/masks/24h_83_regions.tif
/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/labeled_masks/reannotated/masks/24h_86_regions.tif
/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/labeled_masks/reannotated/masks/48h_76_regions.tif
/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/labeled_masks/reannotated/masks/48h_79_regions.tif


In [23]:
cell_data['region_name'] = cell_data['region'].map(region_matching.set_index('region_id')['region_name']) # match region names to region ids

cell_data['refined_cell_type'] = cell_data['cell_meta_cluster']
cell_data.loc[(cell_data['endocardial_annotation']>0) & (cell_data['cell_meta_cluster'] == 'Endothelial cells'), 'refined_cell_type'] = 'Endocardial cells' # all cells is in the endocardial annotation and classified as "Endothelial cells", are renamed to "Endocardial cells" in the "refined_cell_type" column

max_distance = cell_data['distance_from_lumen'].max()
next_multiple_of_80 = (np.ceil(max_distance / lumen_distance_binsize) * lumen_distance_binsize) # get maximum lumen bin value possible
bin_edges = np.arange(0, next_multiple_of_80 + lumen_distance_binsize, lumen_distance_binsize)
cell_data['lumen_bin'] = pd.cut(cell_data['distance_from_lumen'], bins=bin_edges, labels=range(1, len(bin_edges))) # assign lumen bin based on lumen_distance_binsize

cell_data.loc[cell_data.region_name.isin(['background', 'lumen', 'ignore']), 'exclude_annotation'] = True # if region_name is background, lumen or ignore, cells should be excluded]
cell_data.loc[cell_data.cell_meta_cluster.isin(['out_of_mask', 'background']), 'exclude_annotation'] = True

cell_data['size_filter'] = np.logical_or(cell_data['cell_size'] < minimum_cell_size, cell_data['cell_size'] > maximum_cell_size) # exclude cells below min and above max filter

artefacts = cell_data[(cell_data['Eccentricity'] > 0.9) & (cell_data['Solidity'] < 0.9) & (cell_data['cell_size'] < 500)].index.values # exclude cells with high eccentricity, low solidity and small size
artefacts = np.concatenate((artefacts, cell_data[(cell_data['Solidity'] < 0.1)].index.values)) # additionally exclude cells with very low solidity
artefacts = np.unique(artefacts)
cell_data.loc[artefacts, 'artefact'] = True

cell_data['final_cell_type'] = cell_data['refined_cell_type']
cell_data.loc[cell_data['exclude_annotation'], 'final_cell_type'] = 'exclude'
cell_data.loc[cell_data['size_filter'], 'final_cell_type'] = 'exclude'
cell_data.loc[cell_data['artefact'], 'final_cell_type'] = 'exclude'
cell_data['distance_from_lumen'] = cell_data['distance_from_lumen'].round(2)

cell_data.to_csv('/Users/kbestak/Documents/kbestak/phd/heart_manuscript_revision/cell_table_final.csv', index=False)