In [1]:
import os 
import re
import ast
import anndata as ad 
import numpy as np 
import pandas as pd
import scanpy as sc
import seaborn as sns
from shapely.geometry import Polygon
from tqdm import tqdm

In [2]:
Z_SLICE = 4.5

In [3]:
ddir = '/sbgenomics/project-files/HTAN-HTAPP/Merfish'

In [11]:
def extract_cell_coords(adata, z_slice = Z_SLICE):
    
    x_centroid = []
    y_centroid = []
    for i,cell in adata.obs.iterrows():

        print(i)
        
        # find index of middle slice in z-stack 
        middle_slice_index = ast.literal_eval(cell['z_height']).index(Z_SLICE)
    
        # create x,y coords for the boundary of each cell in the middle z-slice 
        coords = list(zip(ast.literal_eval(cell['x_boundary'])[middle_slice_index][0],ast.literal_eval(cell['y_boundary'])[middle_slice_index][0]))
    
        # create a polygon based on the coordinates and extract the cell centroid 
        poly = Polygon(coords)
        x_centroid.append(poly.centroid.x)
        y_centroid.append(poly.centroid.y)
    
    # set min coordinates to zero for convenience and save as new cols in adata.obs 
    x_centroid = np.array(x_centroid)
    y_centroid = np.array(y_centroid)
    adata.obs['x_centroid'] = x_centroid - x_centroid.min()
    adata.obs['y_centroid'] = y_centroid - y_centroid.min()

    return adata

In [5]:
adatas_dict = {}
for i,fh in tqdm(enumerate(os.listdir(ddir))):

    if '313' not in fh:

        print(fh)

        # read anndata files, save in dict with sample name as key, extract cell coords 
        sample_id = re.split('_',fh)[0]
        adatas_dict[sample_id] = ad.read_h5ad(os.path.join(ddir,fh))
        adatas_dict[sample_id] = extract_cell_coords(adatas_dict[sample_id], z_slice = Z_SLICE)

0it [00:00, ?it/s]

HTAPP-514_slice9.h5ad


2it [05:57, 178.57s/it]

HTAPP-812_slice4.h5ad


3it [07:19, 138.65s/it]

HTAPP-853_slice4.h5ad


4it [08:13, 107.01s/it]

HTAPP-878_slice10.h5ad


5it [10:57, 126.66s/it]

HTAPP-880_slice9.h5ad


6it [12:45, 120.69s/it]

HTAPP-917_slice9.h5ad


7it [15:57, 143.32s/it]

HTAPP-944_slice4.h5ad


8it [18:29, 146.32s/it]

HTAPP-944_slice6.h5ad


9it [21:21, 154.08s/it]

HTAPP-944_slice7.h5ad


10it [23:52, 153.28s/it]

HTAPP-982_slice4.h5ad


11it [26:41, 158.13s/it]

HTAPP-982_slice9.h5ad


11it [26:48, 146.23s/it]


IndexError: list index out of range

In [60]:
# adatas_dict['HTAPP-313'].write_h5ad('/sbgenomics/output-files/HTAPP-313_centroids.h5ad')

In [22]:
for di in adatas_dict:

    if '982' not in di:
        print(di)
        adatas_dict[di].write_h5ad(f'/sbgenomics/output-files/{di}_centroids.h5ad')

HTAPP-514
HTAPP-812
HTAPP-853
HTAPP-878
HTAPP-880
HTAPP-917
HTAPP-944


In [19]:
extract_cell_coords(adatas_dict['HTAPP-982'], z_slice = 4)

77222562704546387033025968016910513508
107010632190613325869762232800149808506
118670571700252993575379720252935728760
299524354904223737521321957033388225523
59056111567717235059026491139641235787
304893777972137204284540806924956554301
325951065251268648985275007225216544711
273141140849060041006901365969313940371
112722375769367862257437260888426649257
100595716760438345352937997140249075210
339808360999343426027983385770065989246
42872582680802174951228368951283610067
294297238318735827334812570051773171049
104260949833234354059089325438844935149
191323037641951330207427825246514666910
285744070753663736813671710765395164603
178824359795625219796953580881587789002
193383091087245051072683655023780766956
189967617850288682275188993476547656729
332631455390950219741260036646882692935
94275172489786932666268539998847394693
215592383452622242584558409915285942290
57996440773808455920239126516910995512
147753246544696137441200482795003073471
313648561667026781074092311176399059028
31890

IndexError: list index out of range

In [18]:
ast.literal_eval(adatas_dict['HTAPP-982'].obs.loc['77461362856144902671052476203926501853',]['x_boundary'])

[[],
 [],
 [],
 [],
 [[-30.485601863265046,
   -30.485601863265046,
   -30.485601863265046,
   -30.485601863265046,
   -30.485601863265046,
   -30.485601863265046,
   -30.496501863002777,
   -30.59460186064244,
   -30.59460186064244,
   -30.59460186064244,
   -30.605501860380173,
   -30.703601858019837,
   -30.703601858019837,
   -30.71450185775757,
   -30.812601855397233,
   -30.812601855397233,
   -30.823501855134964,
   -30.92160185277463,
   -30.93250185251236,
   -31.030601850152024,
   -31.041501849889755,
   -31.13960184752942,
   -31.13960184752942,
   -31.15050184726715,
   -31.259501844644547,
   -31.35760184228421,
   -31.368501842021942,
   -31.466601839661607,
   -31.477501839399338,
   -31.575601837039002,
   -31.586501836776733,
   -31.69550183415413,
   -31.793601831793794,
   -31.804501831531525,
   -31.90260182917119,
   -31.91350182890892,
   -32.022501826286316,
   -32.13150182366371,
   -32.229601821303376,
   -32.24050182104111,
   -32.3495018184185,
   -32.458501

In [16]:
adatas_dict['HTAPP-982']

Unnamed: 0_level_0,z_height,x_boundary,y_boundary,x_centroid,y_centroid
cellID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
213594801799290064269282400204395846255,"[0.0, 1.5, 3.0, 4.5, 6.0, 7.5, 9.0]","[[[-2226.726492485404, -2226.726492485404, -22...","[[[-757.1859278678894, -757.2949278652668, -75...",2836.960777,4400.495189
212945178310375726568065140556431296279,"[0.0, 1.5, 3.0, 4.5, 6.0, 7.5, 9.0]","[[], [], [[-1288.202491006255, -1288.202491006...","[[], [], [[-3443.6228037178516, -3443.73180371...",3773.354074,1716.472510
142099011198127048215639085290163908849,"[0.0, 1.5, 3.0, 4.5, 6.0, 7.5, 9.0]","[[[-2134.745489886403, -2134.745489886403, -21...","[[[-925.0309286415577, -925.1399286389351, -92...",2928.712276,4232.097263
9122595568459967017064441135781545797,"[0.0, 1.5, 3.0, 4.5, 6.0, 7.5, 9.0]","[[[-1260.9524916619062, -1260.9524916619062, -...","[[[-365.1429276764393, -365.2519276738167, -36...",3800.503040,4795.415529
324564612296767751117860273509982606338,"[0.0, 1.5, 3.0, 4.5, 6.0, 7.5, 9.0]","[[], [[-1635.3374922782182, -1635.337492278218...","[[], [[223.1940802335739, 223.08508023619652, ...",3428.736031,5380.914812
...,...,...,...,...,...
71530819176895494520110819348339988570,"[0.0, 1.5, 3.0, 4.5, 6.0, 7.5, 9.0]","[[], [], [], [[-1809.8314928919076, -1809.8314...","[[], [], [], [[-2203.2928046882153, -2203.4018...",3251.735982,2954.445629
308109392940485480786121403286943180834,"[0.0, 1.5, 3.0, 4.5, 6.0, 7.5, 9.0]","[[], [], [[-1499.211490741372, -1499.211490741...","[[], [], [[-4064.645052164793, -4064.754052162...",3563.479192,1093.915250
39537984519373219698591956643323835933,"[0.0, 1.5, 3.0, 4.5, 6.0, 7.5, 9.0]","[[[-4862.501571378112, -4862.501571378112, -48...","[[[-725.0786862671375, -725.1876862645149, -72...",199.654922,4433.476565
5223866526326819267681216195514982496,"[0.0, 1.5, 3.0, 4.5, 6.0, 7.5, 9.0]","[[], [[-1579.7474936157464, -1579.747493615746...","[[], [[13.929080456495285, 13.82008045911789, ...",3483.370553,5174.377526
