In [1]:
import os
import time

import anndata as ad
import pandas as pd
import numpy as np

from SpatialQuery import spatial_query_multiple

In [2]:
pd.set_option('display.max_colwidth', 1000)
data_path = "/Users/sa3520/BWH/spatial query/python/data/CZI_kidney"
data_files = os.listdir(data_path)
adatas = [ad.read_h5ad(os.path.join(data_path, data)) for data in data_files]


In [3]:
spatial_key = 'X_spatial'
label_key = 'cell_type'
disease_key = 'disease'

disease_list = [adata.obs[disease_key].unique()[0] for adata in adatas]
disease_list = list(set(disease_list))
print(disease_list)

disease_normal_adatas = [adata for adata in adatas if adata.obs[disease_key].unique()[0] == 'normal']
disease_diabetic_adatas = [adata for adata in adatas if adata.obs[disease_key].unique()[0] == 'diabetic kidney disease']

datasets = ['normal'] * len(disease_normal_adatas) + ['diabetic kidney disease'] * len(disease_diabetic_adatas)

del adatas

cell_types = [adata.obs[label_key] for adata in disease_normal_adatas + disease_diabetic_adatas]
cell_types = pd.concat(cell_types)
# cell_types.value_counts()


# Build data: 0.33s for ~1.5M cells
start_time = time.time()
multi_sp = spatial_query_multiple.spatial_query_multiple(adatas=disease_normal_adatas + disease_diabetic_adatas,
                                                         datasets=datasets,
                                                         spatial_key=spatial_key,
                                                         label_key=label_key,
                                                         )
end_time = time.time()
print(f"time of initializing data and building kd tree is {end_time - start_time} seconds.")

n_normal = [adata.n_obs for adata in disease_normal_adatas]
n_dkd = [adata.n_obs for adata in disease_diabetic_adatas]
print(f"total number of normal data: {np.sum(n_normal)}")
print(f"total number of dkd data: {np.sum(n_dkd)}")

del disease_normal_adatas
del disease_diabetic_adatas


['diabetic kidney disease', 'autosomal dominant polycystic kidney disease', 'normal']
time of initializing data and building kd tree is 0.3379359245300293 seconds.
total number of normal data: 886263
total number of dkd data: 613317


In [4]:
import ipywidgets as widgets
from IPython.display import display

# Function to display the current values of parameters
def display_parameters(k, radius, min_support, is_duplicate):
    selected_datasets = ', '.join([checkbox.description for checkbox in dataset_checkboxes if checkbox.value])
    print(f"Current value of k: {k}")
    print(f"Current value of radius: {radius}")
    print(f"Current value of min_support: {min_support:.2f}")
    print(f"Is duplicate considered: {'Yes' if is_duplicate else 'No'}")
    print(f"Selected datasets: {selected_datasets}")

# Sliders and checkboxes for parameters
k_slider = widgets.IntSlider(value=30, min=1, max=500, step=1, description='k (Number of Neighbors):', continuous_update=False)
min_support_slider = widgets.FloatSlider(value=0.5, min=0.0, max=1.0, step=0.01, description='Min Support:', continuous_update=False)
radius_slider = widgets.FloatSlider(value=100, min=1, max=500, step=1, description='Radius:', continuous_update=False)
is_duplicate_checkbox = widgets.Checkbox(value=False, description='Consider Duplicates:', disabled=False)

# Create checkboxes for each dataset
datasets = ['All', 'diabetic kidney disease', 'normal']
dataset_checkboxes = [widgets.Checkbox(value=False, description=dataset) for dataset in datasets]

# Handling the "All" checkbox for datasets
def handle_all_datasets_checkbox_change(change):
    if change['new']:  # If "All" is set to True
        for checkbox in dataset_checkboxes[1:]:
            checkbox.value = True
    elif all(checkbox.value for checkbox in dataset_checkboxes[1:]):  # Only uncheck others if all are checked
        for checkbox in dataset_checkboxes[1:]:
            checkbox.value = False

dataset_checkboxes[0].observe(handle_all_datasets_checkbox_change, 'value')

# Group dataset checkboxes in a container
dataset_checkbox_container = widgets.VBox(dataset_checkboxes)

# Interactive display setup without datasets since they are handled separately
parameter_controls = widgets.interactive(display_parameters, k=k_slider, radius=radius_slider, min_support=min_support_slider, is_duplicate=is_duplicate_checkbox)

# Display everything
display(parameter_controls, dataset_checkbox_container)


interactive(children=(IntSlider(value=30, continuous_update=False, description='k (Number of Neighbors):', max…

VBox(children=(Checkbox(value=False, description='All'), Checkbox(value=False, description='diabetic kidney di…

In [5]:
k = k_slider.value
min_support = min_support_slider.value
is_duplicate = is_duplicate_checkbox.value
radius = radius_slider.value
datasets = [checkbox.description for checkbox in dataset_checkboxes if checkbox.value]

print(k)
print(min_support)
print(is_duplicate)
print(radius)
print(datasets)

30
0.5
False
100.0
['diabetic kidney disease', 'normal']


In [6]:
ct = 'kidney interstitial fibroblast' 
fp_knn = multi_sp.find_fp_knn(
    ct=ct, 
    dataset=datasets,
    k=k,
    min_support=min_support
    
)
fp_knn

Unnamed: 0,itemsets,support
0,"[endothelial cell, kidney loop of Henle thick ascending limb epithelial cell, leukocyte]",0.645191
1,"[endothelial cell, kidney proximal convoluted tubule epithelial cell, leukocyte]",0.599383
2,"[endothelial cell, leukocyte, macrophage]",0.50783
3,"[endothelial cell, kidney interstitial fibroblast]",0.502528


In [7]:
motifs = fp_knn['itemsets'].tolist()
motif_enrich_knn = pd.DataFrame()
for motif in motifs:
    tt = multi_sp.motif_enrichment_knn(
        ct=ct, 
        motifs=motif,
        dataset=datasets,
        k=k,
        min_support=min_support,
        dis_duplicates=is_duplicate,
        max_dist=200
    )
    motif_enrich_knn = pd.concat([motif_enrich_knn, tt], ignore_index=True)
motif_enrich_knn

Unnamed: 0,center,motifs,n_center_motif,n_center,n_motif,p-values,corrected p-values,if_significant
0,kidney interstitial fibroblast,"[endothelial cell, kidney loop of Henle thick ascending limb epithelial cell, leukocyte]",10455,16220,878335,4.945481e-54,4.945481e-54,True
1,kidney interstitial fibroblast,"[endothelial cell, kidney proximal convoluted tubule epithelial cell, leukocyte]",9711,16220,906702,0.938801,0.938801,False
2,kidney interstitial fibroblast,"[endothelial cell, leukocyte, macrophage]",8231,16220,654298,4.3625460000000005e-75,4.3625460000000005e-75,True
3,kidney interstitial fibroblast,"[endothelial cell, kidney interstitial fibroblast]",8137,16220,339695,0.0,0.0,True


In [8]:
ct = 'kidney interstitial fibroblast' 
min_support=0.3
d1, d2 = multi_sp.differential_analysis_knn(
    ct=ct, 
    datasets=datasets,
    k=k,
    min_support=min_support
)


In [9]:
d1[['p-values', 'corrected p-values']]

Unnamed: 0,p-values,corrected p-values


In [10]:
d2[['p-values', 'corrected p-values']]

Unnamed: 0,p-values,corrected p-values
"endothelial cell, kidney loop of Henle thick ascending limb epithelial cell",6.363334e-12,1.113583e-11
"endothelial cell, kidney loop of Henle thick ascending limb epithelial cell, leukocyte",6.363334e-12,1.113583e-11
kidney loop of Henle thick ascending limb epithelial cell,6.363334e-12,1.113583e-11
"kidney loop of Henle thick ascending limb epithelial cell, leukocyte",6.363334e-12,1.113583e-11


In [11]:
# using radius-based neighborhood
ct = 'kidney interstitial fibroblast' 
fp_dist = multi_sp.find_fp_dist(
    ct=ct, 
    dataset=datasets,
    max_dist=radius,
    min_support=min_support,
    dis_duplicates=is_duplicate,
    min_size=0,
)
fp_dist

Unnamed: 0,itemsets,support
0,"[endothelial cell, kidney loop of Henle thick ascending limb epithelial cell, leukocyte, macrophage]",0.496146
1,"[endothelial cell, kidney loop of Henle thick ascending limb epithelial cell, kidney proximal convoluted tubule epithelial cell, leukocyte]",0.44404
2,"[endothelial cell, kidney interstitial fibroblast, kidney loop of Henle thick ascending limb epithelial cell, leukocyte]",0.434482
3,"[endothelial cell, kidney interstitial fibroblast, kidney proximal convoluted tubule epithelial cell, leukocyte]",0.392119
4,"[endothelial cell, kidney proximal convoluted tubule epithelial cell, leukocyte, macrophage]",0.385336
5,"[endothelial cell, kidney interstitial fibroblast, leukocyte, macrophage]",0.372264
6,"[blood vessel smooth muscle cell, endothelial cell, kidney loop of Henle thick ascending limb epithelial cell, leukocyte]",0.34544
7,"[blood vessel smooth muscle cell, endothelial cell, leukocyte, macrophage]",0.314731
8,"[blood vessel smooth muscle cell, endothelial cell, kidney interstitial fibroblast]",0.31319
9,"[endothelial cell, kidney distal convoluted tubule epithelial cell]",0.3069


In [12]:
motifs = fp_dist['itemsets'].tolist()
motif_enrich_dist = pd.DataFrame()
for motif in motifs:
    tt = multi_sp.motif_enrichment_dist(
        ct=ct, 
        motifs=motif,
        dataset=datasets,
        max_dist=radius,
        min_support=min_support,
        dis_duplicates=is_duplicate,
        min_size=0,
    )
    motif_enrich_dist = pd.concat([motif_enrich_dist, tt], ignore_index=True)
motif_enrich_dist

Unnamed: 0,center,motifs,n_center_motif,n_center,n_motif,p-values,corrected p-values,if_significant
0,kidney interstitial fibroblast,"[endothelial cell, kidney loop of Henle thick ascending limb epithelial cell, leukocyte, macrophage]",8046,16220,664898,4.4243989999999995e-42,4.4243989999999995e-42,True
1,kidney interstitial fibroblast,"[endothelial cell, kidney loop of Henle thick ascending limb epithelial cell, kidney proximal convoluted tubule epithelial cell, leukocyte]",7201,16220,679595,0.9910593,0.9910593,False
2,kidney interstitial fibroblast,"[endothelial cell, kidney interstitial fibroblast, kidney loop of Henle thick ascending limb epithelial cell, leukocyte]",7046,16220,332146,0.0,0.0,True
3,kidney interstitial fibroblast,"[endothelial cell, kidney interstitial fibroblast, kidney proximal convoluted tubule epithelial cell, leukocyte]",6359,16220,291527,0.0,0.0,True
4,kidney interstitial fibroblast,"[endothelial cell, kidney proximal convoluted tubule epithelial cell, leukocyte, macrophage]",6249,16220,544616,2.23781e-09,2.23781e-09,True
5,kidney interstitial fibroblast,"[endothelial cell, kidney interstitial fibroblast, leukocyte, macrophage]",6037,16220,284645,0.0,0.0,True
6,kidney interstitial fibroblast,"[blood vessel smooth muscle cell, endothelial cell, kidney loop of Henle thick ascending limb epithelial cell, leukocyte]",5602,16220,380533,2.826037e-150,2.826037e-150,True
7,kidney interstitial fibroblast,"[blood vessel smooth muscle cell, endothelial cell, leukocyte, macrophage]",5104,16220,326808,1.205926e-181,1.205926e-181,True
8,kidney interstitial fibroblast,"[blood vessel smooth muscle cell, endothelial cell, kidney interstitial fibroblast]",5079,16220,201555,0.0,0.0,True
9,kidney interstitial fibroblast,"[endothelial cell, kidney distal convoluted tubule epithelial cell]",4977,16220,391585,1.4484250000000001e-39,1.4484250000000001e-39,True


In [13]:
ct = 'kidney interstitial fibroblast' 
min_support=0.3
d1, d2 = multi_sp.differential_analysis_dist(
    ct=ct, 
    datasets=datasets,
    max_dist=radius,
    min_support=min_support,
    min_size=0
)
