## Analsyis of FF lymph node with SpaGCN

In [1]:
import os,csv,re
import pandas as pd
import numpy as np
import numba
import scanpy as sc
import math
import SpaGCN as spg
from scipy.sparse import issparse
import random, torch
import warnings
warnings.filterwarnings("ignore")
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import SpaGCN as spg
#In order to read in image data, we need to install some package. Here we recommend package "opencv"
#inatll opencv in python
#!pip3 install opencv-python
import cv2

In [2]:
##Read in preprocessed adata
#Read in gene expression and spatial location
adata=sc.read("ff_lymph_node.h5ad")

#read in spatial positions
spatial=pd.read_csv("V1_Human_Lymph_Node/spatial/tissue_positions_list.csv",sep=",",header=None,na_filter=False,index_col=0,names=["barcode", "in_tissue", "array_row", "array_col","pxl_row_in_fullres","pxl_col_in_fullres"]) 

#Read in hitology image
img=cv2.imread("V1_Human_Lymph_Node/spatial/tissue_hires_image.tiff")


In [3]:
##re index spatial table according to barcodes in adata
sorted_barcodes = list(adata.obs_names)

spatial = spatial.reindex(sorted_barcodes)

In [4]:
#Set coordinates
x_array=adata.obs["array_row"].tolist()
y_array=adata.obs["array_col"].tolist()
x_pixel=spatial["pxl_row_in_fullres"].tolist()
y_pixel=spatial["pxl_col_in_fullres"].tolist()

print(img)
#Test coordinates on the image
img_new=img.copy()
for i in range(len(x_pixel)):
    x=x_pixel[i]
    y=y_pixel[i]
    img[int(x-20):int(x+20), int(y-20):int(y+20),:]=0

cv2.imwrite('ff_lymph_node_scanpy_processed_image_map.jpg', img_new)

[[[147 153 150]
  [145 153 149]
  [145 153 150]
  ...
  [146 151 148]
  [147 152 148]
  [147 152 150]]

 [[146 153 150]
  [147 153 150]
  [147 153 149]
  ...
  [148 152 149]
  [148 152 149]
  [146 153 150]]

 [[148 153 149]
  [147 153 149]
  [146 152 149]
  ...
  [147 152 149]
  [147 153 148]
  [147 152 149]]

 ...

 [[147 152 149]
  [146 153 150]
  [147 152 150]
  ...
  [148 153 151]
  [147 153 152]
  [147 153 152]]

 [[147 152 150]
  [147 152 149]
  [148 152 150]
  ...
  [147 153 151]
  [147 153 151]
  [147 153 152]]

 [[147 152 149]
  [147 153 149]
  [147 152 149]
  ...
  [147 153 150]
  [147 153 150]
  [147 153 151]]]


True

In [5]:
#Calculate adjacent matrix
s=1
b=49
#If histlogy image is not available, SpaGCN can calculate the adjacent matrix using the fnction below
adj=spg.calculate_adj_matrix(x=x_pixel,y=y_pixel, histology=False)
print(adj)
np.savetxt('ff_lymph_node_adj.csv', adj, delimiter=',')

Calculateing adj matrix using xy only...
[[   0.     6941.681  4346.4863 ... 5198.136  5022.596  6233.6777]
 [6941.681     0.     3749.8904 ... 5149.467  8384.816   729.543 ]
 [4346.4863 3749.8904    0.     ... 5920.7925 7952.3447 3076.6326]
 ...
 [5198.136  5149.467  5920.7925 ...    0.     3489.1238 4842.522 ]
 [5022.596  8384.816  7952.3447 ... 3489.1238    0.     7935.251 ]
 [6233.6777  729.543  3076.6326 ... 4842.522  7935.251     0.    ]]


In [6]:
#set hyper-parameters
p=0.5 
#Find the l value given p
l=spg.search_l(p, adj, start=0.01, end=1000, tol=0.01, max_run=100)
print(l)

Run 1: l [0.01, 1000], p [0.0, 294.99892573530207]
Run 2: l [0.01, 500.005], p [0.0, 80.76814270019531]
Run 3: l [0.01, 250.0075], p [0.0, 20.529651641845703]
Run 4: l [0.01, 125.00874999999999], p [0.0, 4.572876930236816]
recommended l =  62.509375
62.509375


In [7]:
#Going with 9 clusters as this is corresponds to scanpy clustering
n_clusters=9
#Set seed
r_seed=t_seed=n_seed=100
#Search for suitable resolution
res=spg.search_res(adata, adj, l, n_clusters, start=0.7, step=0.1, tol=5e-3, lr=0.05, max_epochs=20, r_seed=r_seed, t_seed=t_seed, n_seed=n_seed)

Start at res =  0.7 step =  0.1
Initializing cluster centers with louvain, resolution =  0.7
Epoch  0
Epoch  10
Res =  0.7 Num of clusters =  9
recommended res =  0.7


In [8]:
clf=spg.SpaGCN()
clf.set_l(l)
#Set seed
random.seed(r_seed)
torch.manual_seed(t_seed)
np.random.seed(n_seed)
#Run
clf.train(adata,adj,init_spa=True,init="louvain",res=res, tol=5e-3, lr=0.05, max_epochs=200)
y_pred, prob=clf.predict()
adata.obs["pred"]= y_pred
adata.obs["pred"]=adata.obs["pred"].astype('category')
#Do cluster refinement(optional)
#shape="hexagon" for Visium data, "square" for ST data.
adj_2d=spg.calculate_adj_matrix(x=x_array,y=y_array, histology=False)
refined_pred=spg.refine(sample_id=adata.obs.index.tolist(), pred=adata.obs["pred"].tolist(), dis=adj_2d, shape="hexagon")
adata.obs["refined_pred"]=refined_pred
adata.obs["refined_pred"]=adata.obs["refined_pred"].astype('category')

Initializing cluster centers with louvain, resolution =  0.7
Epoch  0
Epoch  10
Epoch  20
Epoch  30
Epoch  40
delta_label  0.003437334743521946 < tol  0.005
Reach tolerance threshold. Stopping training.
Total epoch: 40
Calculateing adj matrix using xy only...


In [9]:
#Plot spatial domains
#Set colors used
plot_color=["#F56867","#FEB915","#C798EE","#59BE86","#7495D3","#D1D1D1","#6D1A9C","#15821E","#3A84E6","#997273","#787878","#DB4C6C","#9E7A7A","#554236","#AF5F3C","#93796C","#F9BD3F","#DAB370","#877F6C","#268785"]
#Plot spatial domains
domains="pred"
num_celltype=len(adata.obs[domains].unique())
adata.uns[domains+"_colors"]=list(plot_color[:num_celltype])
#ax=sc.pl.scatter(adata,alpha=1,x="y_pixel",y="x_pixel",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
#change above line so it comes from adata input
ax=sc.pl.scatter(adata,alpha=1,x="array_row",y="array_col",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
ax.set_aspect('equal', 'box')
ax.axes.invert_yaxis()
plt.savefig("ff_lymph_node_spagcn_pred.png", dpi=600)
plt.close()

#Plot refined spatial domains
domains="refined_pred"
num_celltype=len(adata.obs[domains].unique())
adata.uns[domains+"_colors"]=list(plot_color[:num_celltype])
ax=sc.pl.scatter(adata,alpha=1,x="array_row",y="array_col",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
ax.set_aspect('equal', 'box')
ax.axes.invert_yaxis()
plt.savefig("ff_lymph_node_spagcn_refined_pred.png", dpi=600)
plt.close()

In [10]:
#Use domain 0 as an example
target=0
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.262676239013672], num_nbr [8.429495472186288, 383.3065976714101]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.631338119506836], num_nbr [8.429495472186288, 127.27296248382923]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.815669059753418], num_nbr [8.429495472186288, 44.65459249676585]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.907834529876709], num_nbr [8.429495472186288, 19.459249676584736]
Calculateing adj matrix using xy only...
recommended radius =  2.9539172649383545 num_nbr=12.103492884864165
2.9539172649383545
radius= 2.9539172649383545 average number of neighbors for each spot is 12.103492884864165
 Cluster 0 has neighbors:
Dmain  3 :  1136
Dmain  1 :  986
Dmain  6 :  904
Dmain  4 :  798
Dmain  7 :  674
Dmain  5 :  361
Dmain  8 :  39
[3, 1, 6, 4, 7, 5, 8]
SVGs for domain  0 : []


In [11]:
target=1
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.262676239013672], num_nbr [8.456772334293948, 367.06340057636885]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.631338119506836], num_nbr [8.456772334293948, 121.27089337175792]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.815669059753418], num_nbr [8.456772334293948, 43.283861671469744]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.907834529876709], num_nbr [8.456772334293948, 19.203170028818445]
Calculateing adj matrix using xy only...
recommended radius =  2.9539172649383545 num_nbr=12.044668587896254
2.9539172649383545
radius= 2.9539172649383545 average number of neighbors for each spot is 12.044668587896254
 Cluster 1 has neighbors:
Dmain  4 :  1385
Dmain  5 :  1010
Dmain  0 :  986
Dmain  2 :  202
Dmain  7 :  155
Dmain  8 :  31
[4, 5, 0, 2, 7, 8]
SVGs for domain  1 : ['FDCSP', 'CXCL13',

In [12]:
target=2
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.262676239013672], num_nbr [7.615384615384615, 341.0615384615385]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.631338119506836], num_nbr [7.615384615384615, 110.13846153846154]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.815669059753418], num_nbr [7.615384615384615, 38.343589743589746]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.907834529876709], num_nbr [7.615384615384615, 16.96923076923077]
Calculateing adj matrix using xy only...
recommended radius =  2.9539172649383545 num_nbr=10.707692307692307
2.9539172649383545
radius= 2.9539172649383545 average number of neighbors for each spot is 10.707692307692307
 Cluster 2 has neighbors:
Dmain  4 :  299
[4]
SVGs for domain  2 : ['MT-ND2', 'IGHG2', 'IGHGP', 'MZB1', 'TAGLN', 'MYL9', 'MGP', 'ACTA2']


In [13]:
target=3
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.262676239013672], num_nbr [8.6991341991342, 377.23809523809524]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.631338119506836], num_nbr [8.6991341991342, 127.21861471861472]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.815669059753418], num_nbr [8.6991341991342, 45.40909090909091]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.907834529876709], num_nbr [8.6991341991342, 19.93073593073593]
Calculateing adj matrix using xy only...
recommended radius =  2.9539172649383545 num_nbr=12.441558441558442
2.9539172649383545
radius= 2.9539172649383545 average number of neighbors for each spot is 12.441558441558442
 Cluster 3 has neighbors:
Dmain  0 :  1136
Dmain  8 :  45
[0, 8]
SVGs for domain  3 : ['TRAC', 'CCR7', 'TRBC1', 'PIK3IP1', 'CD3D', 'TCF7', 'CD6', 'IL7R', 'LAT', 'ZAP70', 'SPOCK2', 'CD3E', 'CD2

In [14]:
target=4
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.262676239013672], num_nbr [8.294589178356713, 376.2024048096192]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.631338119506836], num_nbr [8.294589178356713, 124.35270541082164]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.815669059753418], num_nbr [8.294589178356713, 43.53507014028056]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.907834529876709], num_nbr [8.294589178356713, 19.016032064128257]
Calculateing adj matrix using xy only...
recommended radius =  2.9539172649383545 num_nbr=11.891783567134269
2.9539172649383545
radius= 2.9539172649383545 average number of neighbors for each spot is 11.891783567134269
 Cluster 4 has neighbors:
Dmain  1 :  1385
Dmain  0 :  798
Dmain  7 :  308
Dmain  2 :  299
Dmain  5 :  267
Dmain  8 :  60
[1, 0, 7, 2, 5, 8]
SVGs for domain  4 : ['SOD2', 'IGFBP7', 'LY

In [15]:
target=5
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.262676239013672], num_nbr [8.231805929919137, 356.50134770889485]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.631338119506836], num_nbr [8.231805929919137, 118.11859838274933]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.815669059753418], num_nbr [8.231805929919137, 41.943396226415096]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.907834529876709], num_nbr [8.231805929919137, 18.619946091644206]
Calculateing adj matrix using xy only...
recommended radius =  2.9539172649383545 num_nbr=11.679245283018869
2.9539172649383545
radius= 2.9539172649383545 average number of neighbors for each spot is 11.679245283018869
 Cluster 5 has neighbors:
Dmain  1 :  1010
Dmain  4 :  267
Dmain  8 :  62
[1, 4, 8]
SVGs for domain  5 : ['EZR', 'CR2', 'HMGB2', 'ATP5MG', 'POU2AF1', 'H2AFV', 'H2AFZ', 'TCL1A', 'MARC

In [16]:
target=6
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.262676239013672], num_nbr [8.52892561983471, 399.2169421487603]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.631338119506836], num_nbr [8.52892561983471, 128.53512396694214]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.815669059753418], num_nbr [8.52892561983471, 44.79545454545455]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.907834529876709], num_nbr [8.52892561983471, 19.574380165289256]
Calculateing adj matrix using xy only...
recommended radius =  2.9539172649383545 num_nbr=12.204545454545455
2.9539172649383545
radius= 2.9539172649383545 average number of neighbors for each spot is 12.204545454545455
 Cluster 6 has neighbors:
Dmain  0 :  904
Dmain  7 :  728
Dmain  2 :  180
[0, 7, 2]
SVGs for domain  6 : ['XBP1', 'IGLC3', 'MZB1', 'IGHGP', 'BTG2']


In [17]:
target=7
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.262676239013672], num_nbr [8.492063492063492, 396.984126984127]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.631338119506836], num_nbr [8.492063492063492, 130.80555555555554]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.815669059753418], num_nbr [8.492063492063492, 45.28968253968254]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.907834529876709], num_nbr [8.492063492063492, 19.599206349206348]
Calculateing adj matrix using xy only...
recommended radius =  2.9539172649383545 num_nbr=12.214285714285714
2.9539172649383545
radius= 2.9539172649383545 average number of neighbors for each spot is 12.214285714285714
 Cluster 7 has neighbors:
Dmain  6 :  728
Dmain  0 :  674
Dmain  4 :  308
Dmain  2 :  119
[6, 0, 4, 2]
SVGs for domain  7 : ['MMP9', 'EGFL7', 'CEMIP', 'STAB1', 'C1QC', 'TIMP3', 'CLEC4M'

In [19]:
target=8
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.262676239013672], num_nbr [8.807692307692308, 440.59615384615387]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.631338119506836], num_nbr [8.807692307692308, 141.57692307692307]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.815669059753418], num_nbr [8.807692307692308, 48.03846153846154]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.907834529876709], num_nbr [8.807692307692308, 20.615384615384617]
Calculateing adj matrix using xy only...
recommended radius =  2.9539172649383545 num_nbr=12.75
2.9539172649383545
radius= 2.9539172649383545 average number of neighbors for each spot is 12.75
 Cluster 8 has neighbors:
No neighbor domain found, try bigger radius or smaller ratio.
None


TypeError: 'NoneType' object is not subscriptable