## Identify SVGs in FF Invasive Ductal Carcinoma Breast Tissue using SpaGCN

In [1]:
import os,csv,re
import pandas as pd
import numpy as np
import numba
import scanpy as sc
import math
import SpaGCN as spg
from scipy.sparse import issparse
import random, torch
import warnings
warnings.filterwarnings("ignore")
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import SpaGCN as spg
import cv2

In [2]:
##Read in preprocessed adata
#Read in gene expression and spatial location
adata=sc.read("ff_breast_cancer.h5ad")

#read in spatial positions
spatial=pd.read_csv("V1_Breast_Cancer_Block_A_Section_1/spatial/tissue_positions_list.csv",sep=",",header=None,na_filter=False,index_col=0,names=["barcode", "in_tissue", "array_row", "array_col","pxl_row_in_fullres","pxl_col_in_fullres"]) 

#Read in hitology image
img=cv2.imread("V1_Breast_Cancer_Block_A_Section_1/spatial/tissue_hires_image.tiff")


In [3]:
##re index spatial table according to barcodes in adata
sorted_barcodes = list(adata.obs_names)

spatial = spatial.reindex(sorted_barcodes)

In [4]:
#Set coordinates
x_array=adata.obs["array_row"].tolist()
y_array=adata.obs["array_col"].tolist()
x_pixel=spatial["pxl_row_in_fullres"].tolist()
y_pixel=spatial["pxl_col_in_fullres"].tolist()

print(img)
#Test coordinates on the image
img_new=img.copy()
for i in range(len(x_pixel)):
    x=x_pixel[i]
    y=y_pixel[i]
    img[int(x-20):int(x+20), int(y-20):int(y+20),:]=0

cv2.imwrite('ff_human_breast_cancer_scanpy_processed_image_map.jpg', img_new)

[[[188 191 189]
  [188 192 190]
  [189 191 189]
  ...
  [188 192 191]
  [186 191 189]
  [189 193 190]]

 [[188 191 190]
  [188 192 189]
  [188 192 190]
  ...
  [188 192 191]
  [188 191 189]
  [188 193 190]]

 [[188 192 190]
  [188 192 190]
  [188 192 190]
  ...
  [188 192 191]
  [188 192 191]
  [189 192 190]]

 ...

 [[189 191 189]
  [188 191 189]
  [188 191 189]
  ...
  [187 190 187]
  [186 189 187]
  [186 189 187]]

 [[188 191 188]
  [188 190 189]
  [188 191 189]
  ...
  [186 190 187]
  [186 189 187]
  [186 190 187]]

 [[188 190 188]
  [188 190 188]
  [189 191 188]
  ...
  [186 190 188]
  [186 190 187]
  [186 189 187]]]


True

In [5]:
#Calculate adjacent matrix
s=1
b=49
#print(img)
#adj=spg.calculate_adj_matrix(x=x_pixel,y=y_pixel, x_pixel=x_pixel, y_pixel=y_pixel, image=img, beta=b, alpha=s, histology=True)
#If histlogy image is not available, SpaGCN can calculate the adjacent matrix using the fnction below
adj=spg.calculate_adj_matrix(x=x_pixel,y=y_pixel, histology=False)
print(adj)
np.savetxt('ff_human_breast_cancer_adj.csv', adj, delimiter=',')

Calculateing adj matrix using xy only...
[[    0.     11531.981  12172.211  ...  8408.998  10113.489  10308.903 ]
 [11531.981      0.      2965.8894 ...  3149.5564  1520.0662  3500.6543]
 [12172.211   2965.8894     0.     ...  4744.0938  3862.6013  1969.3978]
 ...
 [ 8408.998   3149.5564  4744.0938 ...     0.      1706.5708  3705.9834]
 [10113.489   1520.0662  3862.6013 ...  1706.5708     0.      3586.7124]
 [10308.903   3500.6543  1969.3978 ...  3705.9834  3586.7124     0.    ]]


In [6]:
#set hyper-parameters
p=0.5 
#Find the l value given p
l=spg.search_l(p, adj, start=0.01, end=1000, tol=0.01, max_run=100)
print(l)

Run 1: l [0.01, 1000], p [0.0, 74.49452308851099]
Run 2: l [0.01, 500.005], p [0.0, 19.475013732910156]
Run 3: l [0.01, 250.0075], p [0.0, 4.418020248413086]
Run 4: l [125.00874999999999, 250.0075], p [0.4886806011199951, 4.418020248413086]
Run 5: l [125.00874999999999, 187.508125], p [0.4886806011199951, 2.122544765472412]
Run 6: l [125.00874999999999, 156.2584375], p [0.4886806011199951, 1.2107300758361816]
Run 7: l [125.00874999999999, 140.63359375], p [0.4886806011199951, 0.8212234973907471]
Run 8: l [125.00874999999999, 132.821171875], p [0.4886806011199951, 0.6467751264572144]
Run 9: l [125.00874999999999, 128.91496093749998], p [0.4886806011199951, 0.5655173063278198]
Run 10: l [125.00874999999999, 126.96185546874999], p [0.4886806011199951, 0.5265238285064697]
recommended l =  125.98530273437498
125.98530273437498


In [7]:
#Going with 14 clusters as this is what I got from previous data - this has 0 index
n_clusters=14
#Set seed
r_seed=t_seed=n_seed=100
#Search for suitable resolution
res=spg.search_res(adata, adj, l, n_clusters, start=0.7, step=0.1, tol=5e-3, lr=0.05, max_epochs=20, r_seed=r_seed, t_seed=t_seed, n_seed=n_seed)

Start at res =  0.7 step =  0.1
Initializing cluster centers with louvain, resolution =  0.7
Epoch  0
Epoch  10
Res =  0.7 Num of clusters =  13
Initializing cluster centers with louvain, resolution =  0.7999999999999999
Epoch  0
Epoch  10
Res =  0.7999999999999999 Num of clusters =  14
recommended res =  0.7999999999999999


In [8]:
clf=spg.SpaGCN()
clf.set_l(l)
#Set seed
random.seed(r_seed)
torch.manual_seed(t_seed)
np.random.seed(n_seed)
#Run
clf.train(adata,adj,init_spa=True,init="louvain",res=res, tol=5e-3, lr=0.05, max_epochs=200)
y_pred, prob=clf.predict()
adata.obs["pred"]= y_pred
adata.obs["pred"]=adata.obs["pred"].astype('category')
#Do cluster refinement(optional)
#shape="hexagon" for Visium data, "square" for ST data.
adj_2d=spg.calculate_adj_matrix(x=x_array,y=y_array, histology=False)
refined_pred=spg.refine(sample_id=adata.obs.index.tolist(), pred=adata.obs["pred"].tolist(), dis=adj_2d, shape="hexagon")
adata.obs["refined_pred"]=refined_pred
adata.obs["refined_pred"]=adata.obs["refined_pred"].astype('category')

Initializing cluster centers with louvain, resolution =  0.7999999999999999
Epoch  0
Epoch  10
Epoch  20
Epoch  30
delta_label  0.0046204620462046205 < tol  0.005
Reach tolerance threshold. Stopping training.
Total epoch: 34
Calculateing adj matrix using xy only...


In [9]:
#Plot spatial domains
#Set colors used
plot_color=["#F56867","#FEB915","#C798EE","#59BE86","#7495D3","#D1D1D1","#6D1A9C","#15821E","#3A84E6","#997273","#787878","#DB4C6C","#9E7A7A","#554236","#AF5F3C","#93796C","#F9BD3F","#DAB370","#877F6C","#268785"]
#Plot spatial domains
domains="pred"
num_celltype=len(adata.obs[domains].unique())
adata.uns[domains+"_colors"]=list(plot_color[:num_celltype])
#ax=sc.pl.scatter(adata,alpha=1,x="y_pixel",y="x_pixel",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
#change above line so it comes from adata input
ax=sc.pl.scatter(adata,alpha=1,x="array_row",y="array_col",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
ax.set_aspect('equal', 'box')
ax.axes.invert_yaxis()
plt.savefig("ff_human_breast_cancer_spagcn_pred.png", dpi=600)
plt.close()

#Plot refined spatial domains
domains="refined_pred"
num_celltype=len(adata.obs[domains].unique())
adata.uns[domains+"_colors"]=list(plot_color[:num_celltype])
ax=sc.pl.scatter(adata,alpha=1,x="array_row",y="array_col",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
ax.set_aspect('equal', 'box')
ax.axes.invert_yaxis()
plt.savefig("ff_human_breast_cancer_spagcn_refined_pred.png", dpi=600)
plt.close()

In [11]:
#Use domain 0 as an example
target=0
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.492422103881836], num_nbr [1.0, 258.84939759036143]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.953317821025848], num_nbr [1.0, 83.24096385542168]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.183765679597855], num_nbr [1.0, 31.18975903614458]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2989896088838577], num_nbr [1.0, 15.487951807228916]
Calculateing adj matrix using xy only...
Run 5: radius [2.3566015735268593, 3.2989896088838577], num_nbr [7.081325301204819, 15.487951807228916]
Calculateing adj matrix using xy only...
Run 6: radius [2.8277955912053585, 3.2989896088838577], num_nbr [7.081325301204819, 15.487951807228916]
Calculateing adj matrix using xy only...
Run 7: radius [3.063392600044608, 3.2989896088838577], num_nbr [9.8

Run 58: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.891566265060241, 15.487951807228916]
Calculateing adj matrix using xy only...
Run 59: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.891566265060241, 15.487951807228916]
Calculateing adj matrix using xy only...
Run 60: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.891566265060241, 15.487951807228916]
Calculateing adj matrix using xy only...
Run 61: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.891566265060241, 15.487951807228916]
Calculateing adj matrix using xy only...
Run 62: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.891566265060241, 15.487951807228916]
Calculateing adj matrix using xy only...
Run 63: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.891566265060241, 15.487951807228916]
Calculateing adj matrix using xy only...
Run 64: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.891566265060241, 15.487951807228916]
Calculateing adj matrix us

In [13]:
target=1
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.492422103881836], num_nbr [1.0, 235.49279538904898]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.953317821025848], num_nbr [1.0, 84.9135446685879]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.183765679597855], num_nbr [1.0, 33.10374639769452]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2989896088838577], num_nbr [1.0, 16.371757925072046]
Calculateing adj matrix using xy only...
Run 5: radius [2.3566015735268593, 3.2989896088838577], num_nbr [7.412103746397695, 16.371757925072046]
Calculateing adj matrix using xy only...
Run 6: radius [2.8277955912053585, 3.2989896088838577], num_nbr [7.412103746397695, 16.371757925072046]
Calculateing adj matrix using xy only...
recommended radius =  3.063392600044608 num_nbr=10.39193083573487
ra

In [14]:
target=2
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.492422103881836], num_nbr [1.0, 311.531914893617]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.953317821025848], num_nbr [1.0, 105.12765957446808]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.183765679597855], num_nbr [1.0, 38.54609929078014]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2989896088838577], num_nbr [1.0, 18.46808510638298]
Calculateing adj matrix using xy only...
Run 5: radius [2.3566015735268593, 3.2989896088838577], num_nbr [8.131205673758865, 18.46808510638298]
Calculateing adj matrix using xy only...
Run 6: radius [2.8277955912053585, 3.2989896088838577], num_nbr [8.131205673758865, 18.46808510638298]
Calculateing adj matrix using xy only...
recommended radius =  3.063392600044608 num_nbr=11.581560283687944
radi

In [15]:
target=3
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.492422103881836], num_nbr [1.0, 344.2432432432432]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.953317821025848], num_nbr [1.0, 111.13243243243244]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.183765679597855], num_nbr [1.0, 40.851351351351354]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2989896088838577], num_nbr [1.0, 19.45945945945946]
Calculateing adj matrix using xy only...
Run 5: radius [2.3566015735268593, 3.2989896088838577], num_nbr [8.475675675675676, 19.45945945945946]
Calculateing adj matrix using xy only...
Run 6: radius [2.8277955912053585, 3.2989896088838577], num_nbr [8.475675675675676, 19.45945945945946]
Calculateing adj matrix using xy only...
recommended radius =  3.063392600044608 num_nbr=12.162162162162161
ra

In [16]:
target=4
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.492422103881836], num_nbr [1.0, 263.64312267657994]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.953317821025848], num_nbr [1.0, 99.85130111524164]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.183765679597855], num_nbr [1.0, 38.014869888475836]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2989896088838577], num_nbr [1.0, 18.304832713754646]
Calculateing adj matrix using xy only...
Run 5: radius [2.3566015735268593, 3.2989896088838577], num_nbr [8.066914498141264, 18.304832713754646]
Calculateing adj matrix using xy only...
Run 6: radius [2.8277955912053585, 3.2989896088838577], num_nbr [8.066914498141264, 18.304832713754646]
Calculateing adj matrix using xy only...
recommended radius =  3.063392600044608 num_nbr=11.486988847583643

In [17]:
target=5
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.492422103881836], num_nbr [1.0, 312.10877192982457]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.953317821025848], num_nbr [1.0, 98.25614035087719]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.183765679597855], num_nbr [1.0, 34.95789473684211]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2989896088838577], num_nbr [1.0, 16.856140350877194]
Calculateing adj matrix using xy only...
Run 5: radius [2.3566015735268593, 3.2989896088838577], num_nbr [7.585964912280701, 16.856140350877194]
Calculateing adj matrix using xy only...
Run 6: radius [2.8277955912053585, 3.2989896088838577], num_nbr [7.585964912280701, 16.856140350877194]
Calculateing adj matrix using xy only...
recommended radius =  3.063392600044608 num_nbr=10.670175438596491


In [18]:
target=6
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.492422103881836], num_nbr [1.0, 273.0047619047619]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.953317821025848], num_nbr [1.0, 100.49047619047619]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.183765679597855], num_nbr [1.0, 38.352380952380955]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2989896088838577], num_nbr [1.0, 18.62857142857143]
Calculateing adj matrix using xy only...
Run 5: radius [2.3566015735268593, 3.2989896088838577], num_nbr [8.266666666666667, 18.62857142857143]
Calculateing adj matrix using xy only...
Run 6: radius [2.8277955912053585, 3.2989896088838577], num_nbr [8.266666666666667, 18.62857142857143]
Calculateing adj matrix using xy only...
recommended radius =  3.063392600044608 num_nbr=11.728571428571428
ra

In [19]:
target=7
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.492422103881836], num_nbr [1.0, 338.68279569892474]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.953317821025848], num_nbr [1.0, 115.98924731182795]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.183765679597855], num_nbr [1.0, 42.36021505376344]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2989896088838577], num_nbr [1.0, 20.091397849462364]
Calculateing adj matrix using xy only...
Run 5: radius [2.3566015735268593, 3.2989896088838577], num_nbr [8.725806451612904, 20.091397849462364]
Calculateing adj matrix using xy only...
Run 6: radius [2.8277955912053585, 3.2989896088838577], num_nbr [8.725806451612904, 20.091397849462364]
Calculateing adj matrix using xy only...
recommended radius =  3.063392600044608 num_nbr=12.53225806451613


In [20]:
target=8
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.492422103881836], num_nbr [1.0, 337.0285714285714]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.953317821025848], num_nbr [1.0, 113.08163265306122]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.183765679597855], num_nbr [1.0, 41.10612244897959]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2989896088838577], num_nbr [1.0, 19.559183673469388]
Calculateing adj matrix using xy only...
Run 5: radius [2.3566015735268593, 3.2989896088838577], num_nbr [8.518367346938776, 19.559183673469388]
Calculateing adj matrix using xy only...
Run 6: radius [2.8277955912053585, 3.2989896088838577], num_nbr [8.518367346938776, 19.559183673469388]
Calculateing adj matrix using xy only...
recommended radius =  3.063392600044608 num_nbr=12.2
radius= 3.1622

In [21]:
target=9
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.492422103881836], num_nbr [1.0, 378.6054054054054]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.953317821025848], num_nbr [1.0, 123.17837837837838]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.183765679597855], num_nbr [1.0, 43.6972972972973]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2989896088838577], num_nbr [1.0, 20.556756756756755]
Calculateing adj matrix using xy only...
Run 5: radius [2.3566015735268593, 3.2989896088838577], num_nbr [8.881081081081081, 20.556756756756755]
Calculateing adj matrix using xy only...
Run 6: radius [2.8277955912053585, 3.2989896088838577], num_nbr [8.881081081081081, 20.556756756756755]
Calculateing adj matrix using xy only...
recommended radius =  3.063392600044608 num_nbr=12.783783783783784
r

In [22]:
target=10
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.492422103881836], num_nbr [1.0, 286.9158878504673]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.953317821025848], num_nbr [1.0, 98.5607476635514]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.183765679597855], num_nbr [1.0, 36.691588785046726]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2989896088838577], num_nbr [1.0, 17.785046728971963]
Calculateing adj matrix using xy only...
Run 5: radius [2.3566015735268593, 3.2989896088838577], num_nbr [7.850467289719626, 17.785046728971963]
Calculateing adj matrix using xy only...
Run 6: radius [2.8277955912053585, 3.2989896088838577], num_nbr [7.850467289719626, 17.785046728971963]
Calculateing adj matrix using xy only...
recommended radius =  3.063392600044608 num_nbr=11.205607476635514
r

In [23]:
target=11
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.492422103881836], num_nbr [1.0, 285.9734513274336]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.953317821025848], num_nbr [1.0, 109.93805309734513]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.183765679597855], num_nbr [1.0, 42.16814159292036]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2989896088838577], num_nbr [1.0, 20.123893805309734]
Calculateing adj matrix using xy only...
Run 5: radius [2.3566015735268593, 3.2989896088838577], num_nbr [8.716814159292035, 20.123893805309734]
Calculateing adj matrix using xy only...
Run 6: radius [2.8277955912053585, 3.2989896088838577], num_nbr [8.716814159292035, 20.123893805309734]
Calculateing adj matrix using xy only...
recommended radius =  3.063392600044608 num_nbr=12.530973451327434


In [24]:
target=12
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.492422103881836], num_nbr [1.0, 303.4691358024691]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.953317821025848], num_nbr [1.0, 96.65432098765432]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.183765679597855], num_nbr [1.0, 34.34567901234568]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2989896088838577], num_nbr [1.0, 16.679012345679013]
Calculateing adj matrix using xy only...
Run 5: radius [2.3566015735268593, 3.2989896088838577], num_nbr [7.592592592592593, 16.679012345679013]
Calculateing adj matrix using xy only...
Run 6: radius [2.8277955912053585, 3.2989896088838577], num_nbr [7.592592592592593, 16.679012345679013]
Calculateing adj matrix using xy only...
recommended radius =  3.063392600044608 num_nbr=10.592592592592593
r

In [26]:
target=14
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.492422103881836], num_nbr [1.0, 189.27777777777777]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.953317821025848], num_nbr [1.0, 64.72222222222223]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.183765679597855], num_nbr [1.0, 27.38888888888889]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2989896088838577], num_nbr [1.0, 14.38888888888889]
Calculateing adj matrix using xy only...
Run 5: radius [2.3566015735268593, 3.2989896088838577], num_nbr [7.277777777777778, 14.38888888888889]
Calculateing adj matrix using xy only...
Run 6: radius [2.8277955912053585, 3.2989896088838577], num_nbr [7.277777777777778, 14.38888888888889]
Calculateing adj matrix using xy only...
Run 7: radius [3.063392600044608, 3.2989896088838577], num_nbr [9.5555

Run 66: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.555555555555555, 14.38888888888889]
Calculateing adj matrix using xy only...
Run 67: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.555555555555555, 14.38888888888889]
Calculateing adj matrix using xy only...
Run 68: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.555555555555555, 14.38888888888889]
Calculateing adj matrix using xy only...
Run 69: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.555555555555555, 14.38888888888889]
Calculateing adj matrix using xy only...
Run 70: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.555555555555555, 14.38888888888889]
Calculateing adj matrix using xy only...
Run 71: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.555555555555555, 14.38888888888889]
Calculateing adj matrix using xy only...
Run 72: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.555555555555555, 14.38888888888889]
Calculateing adj matrix using xy 

TypeError: 'NoneType' object is not subscriptable