In [1]:
from SpatialQuery.spatial_query import spatial_query
from SpatialQuery.spatial_query_multiple_fov import spatial_query_multi
import os
import anndata as ad
import pandas as pd
pd.set_option('display.max_colwidth', 1000)

In [2]:
data_path = "/Users/sa3520/BWH/spatial query/python/data/CZI_kidney"

In [3]:
data_files = os.listdir(data_path)

In [4]:
adatas = [ad.read_h5ad(os.path.join(data_path, data)) for data in data_files]

In [5]:
spatial_key = 'X_spatial'
label_key = 'cell_type'
disease_key = 'disease'

In [6]:
disease_list = [adata.obs[disease_key].unique()[0] for adata in adatas]
disease_list = list(set(disease_list))
print(disease_list)

['diabetic kidney disease', 'normal', 'autosomal dominant polycystic kidney disease']


In [7]:
disease_normal_adatas = [adata for adata in adatas if adata.obs[disease_key].unique()[0]=='normal']
disease_diabetic_adatas = [adata for adata in adatas if adata.obs[disease_key].unique()[0]=='diabetic kidney disease']

In [8]:
print(len(disease_normal_adatas))
print(len(disease_diabetic_adatas))

34
26


In [9]:
datasets = ['normal'] * len(disease_normal_adatas) + ['diabetic kidney disease'] * len(disease_diabetic_adatas)

In [10]:
n_obs_sum = 0
for adata in disease_normal_adatas+disease_diabetic_adatas:
    print(adata.n_obs)
    n_obs_sum += adata.n_obs
print(n_obs_sum)

21181
16027
25281
10888
35896
13822
34638
30182
34093
33006
35132
21894
32070
36299
20320
24438
22458
20055
34355
29914
33070
34844
15592
27044
30888
26260
30206
18568
15161
29011
27814
25817
20568
19471
12906
29986
21814
26889
25957
19156
18542
26554
31600
37742
26750
17138
27220
27173
16787
26239
20886
10250
12351
32900
14558
31954
14961
27639
22659
32706
1499580


In [11]:
cell_types = [adata.obs[label_key] for adata in disease_normal_adatas+disease_diabetic_adatas]
cell_types = pd.concat(cell_types)
cell_types.value_counts()

cell_type
kidney proximal convoluted tubule epithelial cell            532273
endothelial cell                                             399542
leukocyte                                                    202569
kidney loop of Henle thick ascending limb epithelial cell    177504
macrophage                                                    48470
kidney distal convoluted tubule epithelial cell               43769
kidney collecting duct principal cell                         33842
blood vessel smooth muscle cell                               26774
kidney interstitial fibroblast                                16220
kidney collecting duct intercalated cell                       8651
podocyte                                                       6423
kidney granular cell                                           1448
mesangial cell                                                 1294
macula densa epithelial cell                                    801
Name: count, dtype: int64

In [12]:
multi_sp = spatial_query_multi(adatas=disease_normal_adatas+disease_diabetic_adatas,
                               datasets=datasets, 
                               spatial_key=spatial_key,
                               label_key=label_key,
                               leaf_size=10)

In [15]:
ct = 'leukocyte'
fp_knn = multi_sp.find_fp_knn(ct=ct, dataset='normal', k=20, dis_duplicates=False, min_support=0.4)

In [16]:
fp_knn

Unnamed: 0,support,itemsets
0,0.562124,"(leukocyte, endothelial cell, kidney proximal convoluted tubule epithelial cell)"
1,0.539357,"(kidney loop of Henle thick ascending limb epithelial cell, leukocyte, endothelial cell)"
2,0.424588,"(leukocyte, macrophage)"
3,0.423078,"(macrophage, endothelial cell)"


In [17]:
ct = 'leukocyte'
fp_dist = multi_sp.find_fp_dist(ct=ct, dataset='normal', max_dist=100, dis_duplicates=False, min_support=0.4)

In [18]:
fp_dist

Unnamed: 0,support,itemsets
0,0.467528,"(kidney loop of Henle thick ascending limb epithelial cell, leukocyte, endothelial cell, kidney proximal convoluted tubule epithelial cell)"
1,0.520607,"(kidney loop of Henle thick ascending limb epithelial cell, leukocyte, macrophage, endothelial cell)"
2,0.413089,"(leukocyte, macrophage, endothelial cell, kidney proximal convoluted tubule epithelial cell)"


In [13]:
ct = 'leukocyte'
motifs = [['kidney loop of Henle thick ascending limb epithelial cell', 'leukocyte', 'endothelial cell', 'kidney proximal convoluted tubule epithelial cell'], 
          ['kidney loop of Henle thick ascending limb epithelial cell', 'leukocyte', 'macrophage', 'endothelial cell'],
          ['leukocyte', 'macrophage', 'endothelial cell', 'kidney proximal convoluted tubule epithelial cell']]
motif_enrichment_knn = []
for motif in motifs:
    tt = multi_sp.motif_enrichment_knn(ct=ct,
                                       motifs=motif,
                                       dataset='normal',
                                      )
    motif_enrichment_knn.append(tt)

In [14]:
motif_enrichment_knn = pd.concat(motif_enrichment_knn)

In [15]:
motif_enrichment_knn

Unnamed: 0,center,motifs,n_center_motif,n_center,n_motif,p-val
0,leukocyte,"[endothelial cell, kidney loop of Henle thick ascending limb epithelial cell, kidney proximal convoluted tubule epithelial cell, leukocyte]",33931,119205,207453,0.0
0,leukocyte,"[endothelial cell, kidney loop of Henle thick ascending limb epithelial cell, leukocyte, macrophage]",31237,119205,193242,0.0
0,leukocyte,"[endothelial cell, kidney proximal convoluted tubule epithelial cell, leukocyte, macrophage]",27554,119205,154032,0.0


In [16]:
ct = 'leukocyte'
motifs = [['kidney loop of Henle thick ascending limb epithelial cell', 'leukocyte', 'endothelial cell', 'kidney proximal convoluted tubule epithelial cell'], 
          ['kidney loop of Henle thick ascending limb epithelial cell', 'leukocyte', 'macrophage', 'endothelial cell'],
          ['leukocyte', 'macrophage', 'endothelial cell', 'kidney proximal convoluted tubule epithelial cell']]
motif_enrichment_dist = []
for motif in motifs:
    tt = multi_sp.motif_enrichment_dist(ct=ct, 
                                       motifs=motif,
                                       dataset='normal')
    motif_enrichment_dist.append(tt)
motif_enrichement_dist = pd.concat(motif_enrichment_dist)

In [None]:
motif_enrichment_dist = pd.concat(motif_enrichment_dist)


In [23]:
motif_enrichment_dist

Unnamed: 0,center,motifs,n_center_motif,n_center,n_motif,p-val
0,leukocyte,"[endothelial cell, kidney loop of Henle thick ascending limb epithelial cell, kidney proximal convoluted tubule epithelial cell, leukocyte]",55720,119205,377815,2.712055e-208
0,leukocyte,"[endothelial cell, kidney loop of Henle thick ascending limb epithelial cell, leukocyte, macrophage]",62046,119205,407282,0.0
0,leukocyte,"[endothelial cell, kidney proximal convoluted tubule epithelial cell, leukocyte, macrophage]",49232,119205,312851,0.0


In [30]:
ct = 'leukocyte'
fp0, fp1 = multi_sp.differential_analysis_knn(ct=ct,
                                     datasets=['normal', 'diabetic kidney disease'],
                                     min_support=0.1,
)

In [31]:
fp0

Unnamed: 0,itemsets,corrected_p_values


In [32]:
fp1

Unnamed: 0,itemsets,corrected_p_values
16,"(kidney loop of Henle thick ascending limb epithelial cell, kidney proximal convoluted tubule epithelial cell, endothelial cell)",2.705346e-09
17,"(kidney loop of Henle thick ascending limb epithelial cell, leukocyte, kidney proximal convoluted tubule epithelial cell)",6.276679e-12
18,"(kidney loop of Henle thick ascending limb epithelial cell, leukocyte, kidney proximal convoluted tubule epithelial cell, endothelial cell)",2.705346e-09


In [43]:
ct = 'leukocyte'
fp0_dist, fp1_dist = multi_sp.differential_analysis_dist(ct=ct,
                                               datasets=['normal', 'diabetic kidney disease'],
                                               min_support=0.1
                                              )

In [44]:
fp0_dist

Unnamed: 0,itemsets,corrected_p_values
0,"(macrophage, kidney proximal convoluted tubule epithelial cell)",1.113583e-10
4,"(leukocyte, macrophage, kidney proximal convoluted tubule epithelial cell)",2.278153e-07
5,"(macrophage, kidney proximal convoluted tubule epithelial cell, endothelial cell)",1.113583e-10
26,"(leukocyte, macrophage, kidney proximal convoluted tubule epithelial cell, endothelial cell)",2.278153e-07


In [45]:
fp1_dist

Unnamed: 0,itemsets,corrected_p_values
