In [1]:
import mvstudio.data
import numpy as np
import pickle
from IPython.display import display
from collections import defaultdict

h = mvstudio.data.Hierarchy()
display(h)

DataHierarchy(

Item([1], 7e9fa558-11e2-4846-a468-390064416258, SW9_ROI008_ud.ome, <ItemType.Points: 2>)
Item([1, 1], 6f3daa3c-5c2c-468f-8eb4-626a5652104c, SW9_ROI008_ud.ome, <ItemType.Image: 1>)
Item([1, 2], 218f357e-be63-49ed-82f4-25bd56e56cb1, SW9_ROI008_ud.ome, <ItemType.Points: 2>)
Item([1, 3], 051922f6-88f6-4b1b-8223-e4f871dd9c98, SW9_ROI008_ud.ome, <ItemType.Points: 2>)
Item([1, 3, 1], 859a9ee6-b45f-44d8-adad-f5d63d167ea7, TSNE Embedding, <ItemType.Image: 1>)
Item([2], 329f58bd-ae93-4c4d-96c3-69c1b7a82f75, SW9_ROI007_ud.ome, <ItemType.Points: 2>)
Item([2, 1], 5ede939c-5199-44fb-b2bf-a0b05224d717, SW9_ROI007_ud.ome, <ItemType.Image: 1>)
Item([2, 2], 3a22843f-dd4d-4eba-b0ae-1b3f55244ccf, SW9_ROI007_ud.ome, <ItemType.Points: 2>)
Item([3], 7e878fc7-c3b2-463e-9299-a7fdee8eaa82, SW9_ROI005_ud.ome, <ItemType.Points: 2>)
Item([3, 1], 914f7d2b-232b-4a48-a2be-df88fc05a7a3, SW9_ROI005_ud.ome, <ItemType.Image: 1>)
Item([3, 2], c3a4f60d-78d2-456d-bbad-07bd20f28748, SW9_ROI005_ud.ome, <ItemTy

In [2]:
def count_cells_per_cluster(cellMeansID:str, clusterID:str, cellSegID=None):
    """ 
    Analyse the number of cells for every cluster and optionally get cell identifiers per cluster.

    Data is loaded with dataset IDs which can either be found in the mvstudio.data.Hierarchy() or by right-click the item in Manivault studio and click 'Copy dataset ID'
    cellMeansID = dataset ID of Cluster Means object obtained by creating a Mean Dataset from cell mask clusters in ManiVault Studio.
    clusterID = cluster (mean-shift) of multiple grouped Cluster Means objects of which the object given by cellMeansID should be one.
    optional:
        cellSegID = cell mask cluster object with cell names/identifiers (Generated by Analyze -> Extract Clusters on a cell mask item) 
    
    """
    
    def dprint(text):
        print(text)
        display(text)
    
    output = []
    h = mvstudio.data.Hierarchy()
    try:
        cellMeans = h.getItemByDataID(cellMeansID)
        clusters = h.getItemByDataID(clusterID)
    except SyntaxError:
        dprint('Please provide imageID and clusterID in string format')
        return None
    
    if len(cellMeans._hierarchy_id) == 1:
        dprint('It looks like the ID of the whole dataset is given, provide ID of the cell (or cluster) Means dataset')
        if len(cellMeans._children) > 0:
            alternative = False
            for i in cellMeans._children:
                if i._type.name == 'Points':
                    cellMeansID = i.datasetId
                    dprint(f'Found point set that might be cell means dataset with ID: {cellMeansID}')
                    dprint(f'Using {cellMeansID} for cell means') 
                    cellMeans = h.getItemByDataID(cellMeansID)
                    alternative = True  
            if not alternative:
                dprint('Exiting.....')
                return None
        else:
            dprint('Exiting.....')
            return None           
                
    if cellMeans._type.name == 'Image':
        dprint('It looks like the ID of the image dataset is given, provide ID of the cell (or cluster) Means dataset')
        dprint('Exiting.....')
        return None
    
    meansDatasetIndex = [clusters._hierarchy_id[0]]
    meansDataset = h.getItemByIndex(meansDatasetIndex)
    
    c_names = clusters.cluster.names
    nr_clusters = len(c_names)
    per_cluster_counts = np.zeros(nr_clusters)

    cmname = cellMeans._name
    if cellSegID:       
        per_cluster_cells = defaultdict(list)
        cellSegData = h.getItemByDataID(cellSegID)
        cSN_name = cellSegData.name
        dprint(f'Using cell segmentation in {cSN_name} for cells in {cmname}')
        cellSegNames = cellSegData.cluster.names
        
    dprint('Matching cells to clusters.....') 
    for clusterId in range(nr_clusters):
        points = meansDataset.points[clusters.cluster.indices[clusterId]]
        # point_set = {tuple(p) for p in points}
        point_set = {hash(p.tobytes()) for p in points}
        for j, p1 in enumerate(cellMeans.points):
            # compare = tuple(p1) in point_set
            compare = hash(p1.tobytes()) in point_set
            if compare:
                per_cluster_counts[clusterId] += 1
                if cellSegID:
                    cellName = cellSegNames[j]
                    per_cluster_cells[c_names[clusterId]].append(cellName)

    dprint(f'Total cells per cluster for {cmname}: ')
    for i in range(nr_clusters):
        name = c_names[i]
        count = int(per_cluster_counts[i])
        output.append(f"{name} : {count}")

    if cellSegID:
        return dict(zip(c_names, per_cluster_counts)), per_cluster_cells
    else:
        return dict(zip(c_names, per_cluster_counts))

In [6]:
# With Cell identifiers per cluster.
d = count_cells_per_cluster(cellMeansID='218f357e-be63-49ed-82f4-25bd56e56cb1', clusterID='ab1cc3a8-bcc9-43d2-a8ff-f17d9a2129b8', cellSegID='999715c1-13e2-4c86-a46a-4f53992fdb4c')
display('Cell counts: ', d[0])
display('Cell names per cluster: ', d[1])

# Without Cell identifiers
d = count_cells_per_cluster(cellMeansID='218f357e-be63-49ed-82f4-25bd56e56cb1', clusterID='ab1cc3a8-bcc9-43d2-a8ff-f17d9a2129b8')
display('Cell counts: ', d)


'Using cell segmentation in SW9_ROI008.ome_mask_1 for cells in SW9_ROI008_ud.ome'

'Matching cells to clusters.....'

'Total cells per cluster for SW9_ROI008_ud.ome: '

'Cell counts: '

{'cluster 1': 66.0,
 'cluster 2': 36.0,
 'cluster 3': 42.0,
 'cluster 4': 5.0,
 'cluster 5': 40.0,
 'cluster 6': 16.0,
 'cluster 7': 30.0,
 'cluster 8': 43.0,
 'cluster 9': 57.0,
 'cluster 10': 30.0,
 'cluster 11': 149.0,
 'cluster 12': 28.0,
 'cluster 13': 29.0,
 'cluster 14': 55.0,
 'cluster 15': 29.0,
 'cluster 16': 53.0,
 'cluster 17': 0.0,
 'cluster 18': 28.0,
 'cluster 19': 19.0,
 'cluster 20': 0.0,
 'cluster 21': 63.0,
 'cluster 22': 2.0,
 'cluster 23': 51.0,
 'cluster 24': 8.0,
 'cluster 25': 242.0,
 'cluster 26': 42.0,
 'cluster 27': 2.0,
 'cluster 28': 2.0,
 'cluster 29': 258.0,
 'cluster 30': 42.0,
 'cluster 31': 15.0,
 'cluster 32': 44.0,
 'cluster 33': 1.0,
 'cluster 34': 52.0,
 'cluster 35': 8.0,
 'cluster 36': 27.0,
 'cluster 37': 21.0,
 'cluster 38': 91.0,
 'cluster 39': 8.0,
 'cluster 40': 0.0,
 'cluster 41': 37.0,
 'cluster 42': 53.0,
 'cluster 43': 59.0,
 'cluster 44': 10.0,
 'cluster 45': 0.0,
 'cluster 46': 0.0,
 'cluster 47': 0.0,
 'cluster 48': 0.0,
 'cluster 49'

'Cell names per cluster: '

defaultdict(list,
            {'cluster 1': ['id_0',
              'id_1545',
              'id_1539',
              'id_1535',
              'id_1470',
              'id_1288',
              'id_1271',
              'id_1215',
              'id_1211',
              'id_1204',
              'id_1207',
              'id_1195',
              'id_1196',
              'id_1189',
              'id_1191',
              'id_1194',
              'id_1184',
              'id_970',
              'id_934',
              'id_857',
              'id_651',
              'id_632',
              'id_570',
              'id_563',
              'id_573',
              'id_567',
              'id_548',
              'id_521',
              'id_519',
              'id_486',
              'id_438',
              'id_400',
              'id_382',
              'id_340',
              'id_320',
              'id_262',
              'id_266',
              'id_276',
              'id_253',
              'id_2

'Matching cells to clusters.....'

'Total cells per cluster for SW9_ROI008_ud.ome: '

'Cell counts: '

{'cluster 1': 66.0,
 'cluster 2': 36.0,
 'cluster 3': 42.0,
 'cluster 4': 5.0,
 'cluster 5': 40.0,
 'cluster 6': 16.0,
 'cluster 7': 30.0,
 'cluster 8': 43.0,
 'cluster 9': 57.0,
 'cluster 10': 30.0,
 'cluster 11': 149.0,
 'cluster 12': 28.0,
 'cluster 13': 29.0,
 'cluster 14': 55.0,
 'cluster 15': 29.0,
 'cluster 16': 53.0,
 'cluster 17': 0.0,
 'cluster 18': 28.0,
 'cluster 19': 19.0,
 'cluster 20': 0.0,
 'cluster 21': 63.0,
 'cluster 22': 2.0,
 'cluster 23': 51.0,
 'cluster 24': 8.0,
 'cluster 25': 242.0,
 'cluster 26': 42.0,
 'cluster 27': 2.0,
 'cluster 28': 2.0,
 'cluster 29': 258.0,
 'cluster 30': 42.0,
 'cluster 31': 15.0,
 'cluster 32': 44.0,
 'cluster 33': 1.0,
 'cluster 34': 52.0,
 'cluster 35': 8.0,
 'cluster 36': 27.0,
 'cluster 37': 21.0,
 'cluster 38': 91.0,
 'cluster 39': 8.0,
 'cluster 40': 0.0,
 'cluster 41': 37.0,
 'cluster 42': 53.0,
 'cluster 43': 59.0,
 'cluster 44': 10.0,
 'cluster 45': 0.0,
 'cluster 46': 0.0,
 'cluster 47': 0.0,
 'cluster 48': 0.0,
 'cluster 49'