## Analyse scanpy pre-processed data with SpaGCN

In [15]:
import os,csv,re
import pandas as pd
import numpy as np
import numba
import scanpy as sc
import math
import SpaGCN as spg
from scipy.sparse import issparse
import random, torch
import warnings
warnings.filterwarnings("ignore")
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import SpaGCN as spg
import cv2

In [16]:
##Read in preprocessed adata
#Read in gene expression and spatial location
adata=sc.read("ffpe_human_prostate.h5ad")

#read in spatial positions
spatial=pd.read_csv("FFPE_Visium_Human_Prostate/spatial/tissue_positions_list.csv",sep=",",header=None,na_filter=False,index_col=0,names=["barcode", "in_tissue", "array_row", "array_col","pxl_row_in_fullres","pxl_col_in_fullres"]) 

#Read in hitology image
img=cv2.imread("FFPE_Visium_Human_Prostate/spatial/tissue_hires_image.tiff")


In [17]:
##re index spatial table according to barcodes in adata
sorted_barcodes = list(adata.obs_names)

spatial = spatial.reindex(sorted_barcodes)

In [18]:
#Set coordinates
x_array=adata.obs["array_row"].tolist()
y_array=adata.obs["array_col"].tolist()
x_pixel=spatial["pxl_row_in_fullres"].tolist()
y_pixel=spatial["pxl_col_in_fullres"].tolist()

print(img)
#Test coordinates on the image
img_new=img.copy()
for i in range(len(x_pixel)):
    x=x_pixel[i]
    y=y_pixel[i]
    img[int(x-20):int(x+20), int(y-20):int(y+20),:]=0

cv2.imwrite('ffpe_prostate_scanpy_processed_image_map.jpg', img_new)

[[[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [251 254 252]
  [251 255 250]
  [251 255 250]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [250 254 249]
  [251 255 250]
  [250 254 249]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [250 254 249]
  [250 254 249]
  [250 254 249]]

 ...

 [[252 254 254]
  [252 255 253]
  [252 255 253]
  ...
  [252 255 253]
  [255 255 255]
  [254 254 254]]

 [[254 254 254]
  [254 254 254]
  [255 255 255]
  ...
  [252 255 253]
  [255 255 255]
  [254 254 254]]

 [[255 255 255]
  [255 255 255]
  [255 255 255]
  ...
  [252 255 253]
  [255 255 255]
  [254 254 254]]]


True

In [19]:
#Calculate adjacent matrix
s=1
b=49
#print(img)
#adj=spg.calculate_adj_matrix(x=x_pixel,y=y_pixel, x_pixel=x_pixel, y_pixel=y_pixel, image=img, beta=b, alpha=s, histology=True)
#If histlogy image is not available, SpaGCN can calculate the adjacent matrix using the fnction below
adj=spg.calculate_adj_matrix(x=x_pixel,y=y_pixel, histology=False)
print(adj)
np.savetxt('ffpe_prostate_adj.csv', adj, delimiter=',')

Calculateing adj matrix using xy only...
[[    0.      9159.846  13606.126  ...  8569.658   6012.218  10953.049 ]
 [ 9159.846      0.     14339.632  ...  5030.9067  4949.651  12477.826 ]
 [13606.126  14339.632      0.     ...  9309.119  10318.396   2659.1692]
 ...
 [ 8569.658   5030.9067  9309.119  ...     0.      2583.568   7543.66  ]
 [ 6012.218   4949.651  10318.396  ...  2583.568      0.      8067.8696]
 [10953.049  12477.826   2659.1692 ...  7543.66    8067.8696     0.    ]]


In [20]:
#set hyper-parameters
p=0.5 
#Find the l value given p
l=spg.search_l(p, adj, start=0.01, end=1000, tol=0.01, max_run=100)
print(l)

Run 1: l [0.01, 1000], p [0.0, 61.544431831095096]
Run 2: l [0.01, 500.005], p [0.0, 16.13810157775879]
Run 3: l [0.01, 250.0075], p [0.0, 3.5948777198791504]
Run 4: l [125.00874999999999, 250.0075], p [0.3360424041748047, 3.5948777198791504]
Run 5: l [125.00874999999999, 187.508125], p [0.3360424041748047, 1.6759705543518066]
Run 6: l [125.00874999999999, 156.2584375], p [0.3360424041748047, 0.9174325466156006]
Run 7: l [125.00874999999999, 140.63359375], p [0.3360424041748047, 0.5990318059921265]
Run 8: l [132.821171875, 140.63359375], p [0.4595450162887573, 0.5990318059921265]
Run 9: l [132.821171875, 136.7273828125], p [0.4595450162887573, 0.5274378061294556]
recommended l =  134.77427734375
134.77427734375


In [21]:
#Going with 14 clusters as this is what I got from previous data - this has 0 index
n_clusters=11
#Set seed
r_seed=t_seed=n_seed=100
#Search for suitable resolution
res=spg.search_res(adata, adj, l, n_clusters, start=0.7, step=0.1, tol=5e-3, lr=0.05, max_epochs=20, r_seed=r_seed, t_seed=t_seed, n_seed=n_seed)

Start at res =  0.7 step =  0.1
Initializing cluster centers with louvain, resolution =  0.7
Epoch  0
Epoch  10
Res =  0.7 Num of clusters =  8
Initializing cluster centers with louvain, resolution =  0.7999999999999999
Epoch  0
Epoch  10
Res =  0.7999999999999999 Num of clusters =  8
Res changed to 0.7999999999999999
Initializing cluster centers with louvain, resolution =  0.8999999999999999
Epoch  0
Epoch  10
Res =  0.8999999999999999 Num of clusters =  10
Res changed to 0.8999999999999999
Initializing cluster centers with louvain, resolution =  0.9999999999999999
Epoch  0
Epoch  10
Res =  0.9999999999999999 Num of clusters =  10
Res changed to 0.9999999999999999
Initializing cluster centers with louvain, resolution =  1.0999999999999999
Epoch  0
Epoch  10
Res =  1.0999999999999999 Num of clusters =  10
Res changed to 1.0999999999999999
Initializing cluster centers with louvain, resolution =  1.2
Epoch  0
Epoch  10
Res =  1.2 Num of clusters =  12
Step changed to 0.05
Initializing cl

In [22]:
clf=spg.SpaGCN()
clf.set_l(l)
#Set seed
random.seed(r_seed)
torch.manual_seed(t_seed)
np.random.seed(n_seed)
#Run
clf.train(adata,adj,init_spa=True,init="louvain",res=res, tol=5e-3, lr=0.05, max_epochs=200)
y_pred, prob=clf.predict()
adata.obs["pred"]= y_pred
adata.obs["pred"]=adata.obs["pred"].astype('category')
#Do cluster refinement(optional)
#shape="hexagon" for Visium data, "square" for ST data.
adj_2d=spg.calculate_adj_matrix(x=x_array,y=y_array, histology=False)
refined_pred=spg.refine(sample_id=adata.obs.index.tolist(), pred=adata.obs["pred"].tolist(), dis=adj_2d, shape="hexagon")
adata.obs["refined_pred"]=refined_pred
adata.obs["refined_pred"]=adata.obs["refined_pred"].astype('category')

Initializing cluster centers with louvain, resolution =  1.1078124999999999
Epoch  0
Epoch  10
Epoch  20
Epoch  30
delta_label  0.0039005363237445147 < tol  0.005
Reach tolerance threshold. Stopping training.
Total epoch: 31
Calculateing adj matrix using xy only...


In [23]:
#Plot spatial domains
#Set colors used
plot_color=["#F56867","#FEB915","#C798EE","#59BE86","#7495D3","#D1D1D1","#6D1A9C","#15821E","#3A84E6","#997273","#787878","#DB4C6C","#9E7A7A","#554236","#AF5F3C","#93796C","#F9BD3F","#DAB370","#877F6C","#268785"]
#Plot spatial domains
domains="pred"
num_celltype=len(adata.obs[domains].unique())
adata.uns[domains+"_colors"]=list(plot_color[:num_celltype])
#ax=sc.pl.scatter(adata,alpha=1,x="y_pixel",y="x_pixel",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
#change above line so it comes from adata input
ax=sc.pl.scatter(adata,alpha=1,x="array_row",y="array_col",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
ax.set_aspect('equal', 'box')
ax.axes.invert_yaxis()
plt.savefig("/Users/natalie.charitakis/Documents/Documents - MCRI-U0197/Work/Projects/PhD Project/Benchmarking SRT/ffpe_prostate/spagcn_pred.png", dpi=600)
plt.close()

#Plot refined spatial domains
domains="refined_pred"
num_celltype=len(adata.obs[domains].unique())
adata.uns[domains+"_colors"]=list(plot_color[:num_celltype])
ax=sc.pl.scatter(adata,alpha=1,x="array_row",y="array_col",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
ax.set_aspect('equal', 'box')
ax.axes.invert_yaxis()
plt.savefig("ffpe_prostate_spagcn_refined_pred.png", dpi=600)
plt.close()

In [24]:
target=0
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 14.142135620117188], num_nbr [1.0, 235.26536312849163]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.778174579143524], num_nbr [1.0, 78.05027932960894]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.5961940586566925], num_nbr [1.0, 30.58659217877095]
Calculateing adj matrix using xy only...
recommended radius =  3.0052037984132767 num_nbr=10.966480446927374
3.1622776601683795
radius= 3.1622776601683795 average number of neighbors for each spot is 17.550279329608937
 Cluster 0 has neighbors:
Dmain  1 :  700
Dmain  2 :  389
Dmain  4 :  296
Dmain  5 :  290
Dmain  8 :  168
[1, 2, 4, 5, 8]
SVGs for domain  0 : ['MYLK', 'DES', 'SYNM', 'CNN1', 'SYNPO2', 'SORBS1', 'SMTN', 'PCP4', 'C12orf75']


In [26]:
target=1
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 14.142135620117188], num_nbr [1.0, 190.83984375]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.778174579143524], num_nbr [1.0, 67.9375]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.5961940586566925], num_nbr [1.0, 27.16015625]
Calculateing adj matrix using xy only...
Run 4: radius [3.0052037984132767, 4.5961940586566925], num_nbr [9.9296875, 27.16015625]
Calculateing adj matrix using xy only...
Run 5: radius [3.0052037984132767, 3.8006989285349846], num_nbr [9.9296875, 15.69921875]
Calculateing adj matrix using xy only...
Run 6: radius [3.0052037984132767, 3.4029513634741306], num_nbr [9.9296875, 15.69921875]
Calculateing adj matrix using xy only...
Run 7: radius [3.0052037984132767, 3.2040775809437037], num_nbr [9.9296875, 15.69921875]
Calculateing adj matrix using 

Run 64: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.9296875, 15.69921875]
Calculateing adj matrix using xy only...
Run 65: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.9296875, 15.69921875]
Calculateing adj matrix using xy only...
Run 66: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.9296875, 15.69921875]
Calculateing adj matrix using xy only...
Run 67: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.9296875, 15.69921875]
Calculateing adj matrix using xy only...
Run 68: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.9296875, 15.69921875]
Calculateing adj matrix using xy only...
Run 69: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.9296875, 15.69921875]
Calculateing adj matrix using xy only...
Run 70: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.9296875, 15.69921875]
Calculateing adj matrix using xy only...
Run 71: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.9296875, 15.69921875]
C

In [27]:
target=2
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 14.142135620117188], num_nbr [1.0, 194.7012195121951]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.778174579143524], num_nbr [1.0, 67.92682926829268]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.5961940586566925], num_nbr [1.0, 27.23170731707317]
Calculateing adj matrix using xy only...
recommended radius =  3.0052037984132767 num_nbr=10.079268292682928
3.1622776601683795
radius= 3.1622776601683795 average number of neighbors for each spot is 15.951219512195122
 Cluster 2 has neighbors:
Dmain  0 :  389
Dmain  5 :  360
Dmain  8 :  326
Dmain  1 :  301
Dmain  7 :  183
Dmain  4 :  118
[0, 5, 8, 1, 7, 4]
SVGs for domain  2 : ['IGFBP5', 'TIMP3', 'VIM', 'CD74', 'A2M', 'HLA-E', 'TAGLN2', 'BGN']


In [28]:
target=3
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 14.142135620117188], num_nbr [1.0, 188.70247933884298]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.778174579143524], num_nbr [1.0, 68.74793388429752]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.5961940586566925], num_nbr [1.0, 28.429752066115704]
Calculateing adj matrix using xy only...
recommended radius =  3.0052037984132767 num_nbr=10.764462809917354
3.1622776601683795
radius= 3.1622776601683795 average number of neighbors for each spot is 16.859504132231404
 Cluster 3 has neighbors:
Dmain  1 :  600
Dmain  7 :  105
[1, 7]
SVGs for domain  3 : ['MSMB', 'AZGP1', 'FKBP5', 'KLK2', 'NR4A1', 'XBP1', 'VEGFA', 'CD9', 'HERPUD1', 'RDH11', 'NKX3-1', 'FASN', 'SLC45A3', 'SGK1', 'AMD1', 'STEAP2', 'PLPP1', 'DBI', 'NCAPD3', 'CPE', 'TPD52', 'ELL2', 'SPINT2', 'GREB1', 'KLK4', 'N

In [29]:
target=4
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 14.142135620117188], num_nbr [1.0, 155.14798206278027]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.778174579143524], num_nbr [1.0, 58.771300448430495]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.5961940586566925], num_nbr [1.0, 25.650224215246638]
Calculateing adj matrix using xy only...
Run 4: radius [3.0052037984132767, 4.5961940586566925], num_nbr [9.834080717488789, 25.650224215246638]
Calculateing adj matrix using xy only...
Run 5: radius [3.0052037984132767, 3.8006989285349846], num_nbr [9.834080717488789, 15.385650224215247]
Calculateing adj matrix using xy only...
Run 6: radius [3.0052037984132767, 3.4029513634741306], num_nbr [9.834080717488789, 15.385650224215247]
Calculateing adj matrix using xy only...
Run 7: radius [3.0052037984132767, 3.2040775809437

Run 58: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.834080717488789, 15.385650224215247]
Calculateing adj matrix using xy only...
Run 59: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.834080717488789, 15.385650224215247]
Calculateing adj matrix using xy only...
Run 60: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.834080717488789, 15.385650224215247]
Calculateing adj matrix using xy only...
Run 61: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.834080717488789, 15.385650224215247]
Calculateing adj matrix using xy only...
Run 62: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.834080717488789, 15.385650224215247]
Calculateing adj matrix using xy only...
Run 63: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.834080717488789, 15.385650224215247]
Calculateing adj matrix using xy only...
Run 64: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.834080717488789, 15.385650224215247]
Calculateing adj matrix us

In [30]:
target=5
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 14.142135620117188], num_nbr [1.0, 240.95]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.778174579143524], num_nbr [1.0, 82.85454545454546]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.5961940586566925], num_nbr [1.0, 32.95454545454545]
Calculateing adj matrix using xy only...
recommended radius =  3.0052037984132767 num_nbr=11.854545454545455
3.1622776601683795
radius= 3.1622776601683795 average number of neighbors for each spot is 18.972727272727273
 Cluster 5 has neighbors:
Dmain  2 :  360
Dmain  8 :  341
Dmain  0 :  290
Dmain  6 :  268
Dmain  9 :  157
Dmain  4 :  123
[2, 8, 0, 6, 9, 4]
SVGs for domain  5 : ['IGKC', 'IGFBP6', 'CCN2']


In [31]:
target=6
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 14.142135620117188], num_nbr [1.0, 205.04054054054055]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.778174579143524], num_nbr [1.0, 75.01801801801801]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.5961940586566925], num_nbr [1.0, 30.743243243243242]
Calculateing adj matrix using xy only...
recommended radius =  3.0052037984132767 num_nbr=11.337837837837839
3.1622776601683795
radius= 3.1622776601683795 average number of neighbors for each spot is 18.013513513513512
 Cluster 6 has neighbors:
Dmain  9 :  873
Dmain  5 :  268
Dmain  8 :  154
[9, 5, 8]
SVGs for domain  6 : ['IGFBP7', 'SPARC', 'CLU', 'DES', 'CNN1', 'CCN2', 'PTGDS', 'LUM', 'MASP1', 'FILIP1L', 'ELN', 'FRZB']


In [32]:
target=7
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 14.142135620117188], num_nbr [1.0, 195.27184466019418]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.778174579143524], num_nbr [1.0, 71.77669902912622]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.5961940586566925], num_nbr [1.0, 29.58252427184466]
Calculateing adj matrix using xy only...
recommended radius =  3.0052037984132767 num_nbr=10.922330097087379
3.1622776601683795
radius= 3.1622776601683795 average number of neighbors for each spot is 17.33009708737864
 Cluster 7 has neighbors:
Dmain  2 :  183
Dmain  1 :  169
Dmain  8 :  94
Dmain  9 :  93
[2, 1, 8, 9]
SVGs for domain  7 : ['AZGP1', 'KLK2', 'ACPP', 'TSC22D3', 'MSMB', 'CD9', 'DDIT4', 'SGK1', 'PABPC1', 'KLK3', 'FKBP5', 'XBP1', 'GLUL', 'EEF1G', 'VEGFA', 'ATP1A1', 'TXNIP', 'TFCP2L1', 'KRT15', 'TACSTD2', 'HERPUD1

In [33]:
target=8
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 14.142135620117188], num_nbr [1.0, 192.11811023622047]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.778174579143524], num_nbr [1.0, 70.56692913385827]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.5961940586566925], num_nbr [1.0, 29.078740157480315]
Calculateing adj matrix using xy only...
recommended radius =  3.0052037984132767 num_nbr=10.874015748031496
3.1622776601683795
radius= 3.1622776601683795 average number of neighbors for each spot is 17.212598425196852
 Cluster 8 has neighbors:
Dmain  5 :  341
Dmain  2 :  326
Dmain  6 :  154
Dmain  7 :  94
Dmain  9 :  72
[5, 2, 6, 7, 9]
SVGs for domain  8 : ['XBP1', 'CD74', 'WFDC2', 'ZFP36L1', 'AZGP1', 'MT1X', 'DHRS3', 'HLA-DRA', 'KRT15', 'SAT1', 'ANXA2', 'NPC2', 'TSC22D1', 'NFIB', 'CD9', 'S100A11', 'SOD2', 'SYNE2', 'TOB1

In [34]:
target=9
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 14.142135620117188], num_nbr [1.0, 204.63235294117646]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.778174579143524], num_nbr [1.0, 75.88970588235294]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.5961940586566925], num_nbr [1.0, 31.272058823529413]
Calculateing adj matrix using xy only...
recommended radius =  3.0052037984132767 num_nbr=11.551470588235293
3.1622776601683795
radius= 3.1622776601683795 average number of neighbors for each spot is 18.279411764705884
 Cluster 9 has neighbors:
Dmain  6 :  873
Dmain  5 :  157
Dmain  7 :  93
Dmain  8 :  72
[6, 5, 7, 8]
SVGs for domain  9 : ['AZGP1', 'MSMB', 'ACPP', 'KLK2', 'SGK1', 'MT1X', 'HERPUD1', 'CPE', 'VEGFA', 'SLC45A3', 'KRT5', 'AMD1', 'RDH11', 'PLPP1', 'DBI', 'STEAP2', 'KLK4', 'NKX3-1', 'FXYD3', 'NCAPD3', 'NEFH', 'T