# Analysis of FF Endometrial Adenocaricnoma Data with SpaGCN

In [21]:
import os,csv,re
import pandas as pd
import numpy as np
import numba
import scanpy as sc
import math
import SpaGCN as spg
from scipy.sparse import issparse
import random, torch
import warnings
warnings.filterwarnings("ignore")
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import SpaGCN as spg
import cv2

In [22]:
##Read in preprocessed adata
#Read in gene expression and spatial location
adata=sc.read("ff_ovarian_cancer_procesed.h5ad")

#read in spatial positions - this file will have been automatically pulled when downloading the dataset with scanpy
spatial=pd.read_csv("data/Parent_Visium_Human_OvarianCancer/spatial/tissue_positions_list.csv",sep=",",header=None,na_filter=False,index_col=0,names=["barcode", "in_tissue", "array_row", "array_col","pxl_row_in_fullres","pxl_col_in_fullres"]) 

#Read in hitology image
img=cv2.imread("/data/Parent_Visium_Human_OvarianCancer/spatial/tissue_hires_image.tiff")


In [23]:
##re index spatial table according to barcodes in adata
sorted_barcodes = list(adata.obs_names)

spatial = spatial.reindex(sorted_barcodes)

In [24]:
#Set coordinates
x_array=adata.obs["array_row"].tolist()
y_array=adata.obs["array_col"].tolist()
x_pixel=spatial["pxl_row_in_fullres"].tolist()
y_pixel=spatial["pxl_col_in_fullres"].tolist()

print(img)
#Test coordinates on the image
img_new=img.copy()
for i in range(len(x_pixel)):
    x=x_pixel[i]
    y=y_pixel[i]
    img[int(x-20):int(x+20), int(y-20):int(y+20),:]=0

cv2.imwrite('ff_ovarian_cancer_processed_image_map.jpg', img_new)

[[[ 0  0  0]
  [ 0  0  0]
  [ 0  0  0]
  ...
  [ 5  4  4]
  [ 5  4  4]
  [ 5  4  4]]

 [[ 4  2  4]
  [ 4  2  4]
  [ 4  2  4]
  ...
  [ 5  4  4]
  [ 6  4  4]
  [ 5  4  4]]

 [[ 4  2  4]
  [ 4  2  4]
  [ 4  2  4]
  ...
  [ 6  4  4]
  [ 6  4  4]
  [ 5  4  4]]

 ...

 [[17  2  4]
  [17  2  4]
  [17  2  4]
  ...
  [ 7  6  4]
  [ 7  6  4]
  [ 7  6  4]]

 [[18  2  4]
  [18  2  4]
  [18  2  4]
  ...
  [ 7  6  4]
  [ 7  6  4]
  [ 7  6  4]]

 [[18  2  4]
  [18  2  4]
  [18  2  4]
  ...
  [ 7  6  4]
  [ 7  6  4]
  [ 7  6  4]]]


True

In [25]:
#Calculate adjacent matrix
s=1
b=49
#print(img)
#adj=spg.calculate_adj_matrix(x=x_pixel,y=y_pixel, x_pixel=x_pixel, y_pixel=y_pixel, image=img, beta=b, alpha=s, histology=True)
#If histlogy image is not available, SpaGCN can calculate the adjacent matrix using the fnction below
adj=spg.calculate_adj_matrix(x=x_pixel,y=y_pixel, histology=False)
print(adj)
np.savetxt('ff_ovarian_cancer_adj.csv', adj, delimiter=',')

Calculateing adj matrix using xy only...
[[    0.     11589.354  12232.861  ...  8451.345  10163.626  10359.438 ]
 [11589.354      0.      2980.9993 ...  3164.737   1528.1237  3518.0513]
 [12232.861   2980.9993     0.     ...  4767.6763  3883.0002  1979.8254]
 ...
 [ 8451.345   3164.737   4767.6763 ...     0.      1714.4052  3723.7283]
 [10163.626   1528.1237  3883.0002 ...  1714.4052     0.      3605.0483]
 [10359.438   3518.0513  1979.8254 ...  3723.7283  3605.0483     0.    ]]


In [26]:
#set hyper-parameters
p=0.5 
#Find the l value given p
l=spg.search_l(p, adj, start=0.01, end=1000, tol=0.01, max_run=100)
print(l)

Run 1: l [0.01, 1000], p [0.0, 71.15180728966719]
Run 2: l [0.01, 500.005], p [0.0, 18.592342376708984]
Run 3: l [0.01, 250.0075], p [0.0, 4.235416412353516]
Run 4: l [125.00874999999999, 250.0075], p [0.46411025524139404, 4.235416412353516]
Run 5: l [125.00874999999999, 187.508125], p [0.46411025524139404, 2.03464674949646]
Run 6: l [125.00874999999999, 156.2584375], p [0.46411025524139404, 1.1579639911651611]
Run 7: l [125.00874999999999, 140.63359375], p [0.46411025524139404, 0.7833660840988159]
Run 8: l [125.00874999999999, 132.821171875], p [0.46411025524139404, 0.615770697593689]
Run 9: l [125.00874999999999, 128.91496093749998], p [0.46411025524139404, 0.5377852916717529]
recommended l =  126.96185546874999
126.96185546874999


In [27]:
#Going with 14 clusters as this is what I got from previous data - this has 0 index
n_clusters=11
#Set seed
r_seed=t_seed=n_seed=100
#Search for suitable resolution
res=spg.search_res(adata, adj, l, n_clusters, start=0.7, step=0.1, tol=5e-3, lr=0.05, max_epochs=20, r_seed=r_seed, t_seed=t_seed, n_seed=n_seed)

Start at res =  0.7 step =  0.1
Initializing cluster centers with louvain, resolution =  0.7
Epoch  0
Epoch  10
Res =  0.7 Num of clusters =  9
Initializing cluster centers with louvain, resolution =  0.7999999999999999
Epoch  0
Epoch  10
Res =  0.7999999999999999 Num of clusters =  10
Res changed to 0.7999999999999999
Initializing cluster centers with louvain, resolution =  0.8999999999999999
Epoch  0
Epoch  10
Res =  0.8999999999999999 Num of clusters =  11
recommended res =  0.8999999999999999


In [28]:
clf=spg.SpaGCN()
clf.set_l(l)
#Set seed
random.seed(r_seed)
torch.manual_seed(t_seed)
np.random.seed(n_seed)
#Run
clf.train(adata,adj,init_spa=True,init="louvain",res=res, tol=5e-3, lr=0.05, max_epochs=200)
y_pred, prob=clf.predict()
adata.obs["pred"]= y_pred
adata.obs["pred"]=adata.obs["pred"].astype('category')
#Do cluster refinement(optional)
#shape="hexagon" for Visium data, "square" for ST data.
adj_2d=spg.calculate_adj_matrix(x=x_array,y=y_array, histology=False)
refined_pred=spg.refine(sample_id=adata.obs.index.tolist(), pred=adata.obs["pred"].tolist(), dis=adj_2d, shape="hexagon")
adata.obs["refined_pred"]=refined_pred
adata.obs["refined_pred"]=adata.obs["refined_pred"].astype('category')

Initializing cluster centers with louvain, resolution =  0.8999999999999999
Epoch  0
Epoch  10
Epoch  20
Epoch  30
Epoch  40
delta_label  0.004934790271413465 < tol  0.005
Reach tolerance threshold. Stopping training.
Total epoch: 43
Calculateing adj matrix using xy only...


In [29]:
#Set colors used
plot_color=["#F56867","#FEB915","#C798EE","#59BE86","#7495D3","#D1D1D1","#6D1A9C","#15821E","#3A84E6","#997273","#787878","#DB4C6C","#9E7A7A","#554236","#AF5F3C","#93796C","#F9BD3F","#DAB370","#877F6C","#268785"]
#Plot spatial domains
domains="pred"
num_celltype=len(adata.obs[domains].unique())
adata.uns[domains+"_colors"]=list(plot_color[:num_celltype])
#ax=sc.pl.scatter(adata,alpha=1,x="y_pixel",y="x_pixel",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
#change above line so it comes from adata input
ax=sc.pl.scatter(adata,alpha=1,x="array_row",y="array_col",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
ax.set_aspect('equal', 'box')
ax.axes.invert_yaxis()
plt.savefig("ff_ovarian_cancer_spagcn_pred.png", dpi=600)
plt.close()

#Plot refined spatial domains
domains="refined_pred"
num_celltype=len(adata.obs[domains].unique())
adata.uns[domains+"_colors"]=list(plot_color[:num_celltype])
ax=sc.pl.scatter(adata,alpha=1,x="array_row",y="array_col",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
ax.set_aspect('equal', 'box')
ax.axes.invert_yaxis()
plt.savefig("ff_ovarian_cancer_spagcn_refined_pred.png", dpi=600)
plt.close()

In [30]:
#Use number of domains as number of clusters found from scanpy analysis
target=0
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.124515533447266], num_nbr [1.0, 276.9784482758621]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.769364535808563], num_nbr [1.0, 85.31896551724138]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.091789036989212], num_nbr [1.0, 27.54741379310345]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2530012875795364], num_nbr [1.0, 15.935344827586206]
Calculateing adj matrix using xy only...
Run 5: radius [2.3336074128746986, 3.2530012875795364], num_nbr [7.163793103448276, 15.935344827586206]
Calculateing adj matrix using xy only...
Run 6: radius [2.7933043502271175, 3.2530012875795364], num_nbr [7.163793103448276, 15.935344827586206]
Calculateing adj matrix using xy only...
recommended radius =  3.023152818903327 num_nbr=10.051724137931034
3

In [31]:
target=1
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.124515533447266], num_nbr [1.0, 326.08988764044943]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.769364535808563], num_nbr [1.0, 102.6685393258427]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.091789036989212], num_nbr [1.0, 33.15449438202247]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2530012875795364], num_nbr [1.0, 19.176966292134832]
Calculateing adj matrix using xy only...
Run 5: radius [2.3336074128746986, 3.2530012875795364], num_nbr [8.379213483146067, 19.176966292134832]
Calculateing adj matrix using xy only...
Run 6: radius [2.7933043502271175, 3.2530012875795364], num_nbr [8.379213483146067, 19.176966292134832]
Calculateing adj matrix using xy only...
recommended radius =  3.023152818903327 num_nbr=12.025280898876405


In [32]:
target=2
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.124515533447266], num_nbr [1.0, 307.7352941176471]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.769364535808563], num_nbr [1.0, 95.56372549019608]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.091789036989212], num_nbr [1.0, 30.80392156862745]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2530012875795364], num_nbr [1.0, 17.845588235294116]
Calculateing adj matrix using xy only...
Run 5: radius [2.3336074128746986, 3.2530012875795364], num_nbr [7.997549019607843, 17.845588235294116]
Calculateing adj matrix using xy only...
Run 6: radius [2.7933043502271175, 3.2530012875795364], num_nbr [7.997549019607843, 17.845588235294116]
Calculateing adj matrix using xy only...
recommended radius =  3.023152818903327 num_nbr=11.27450980392157
3.

In [33]:
target=3
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.124515533447266], num_nbr [1.0, 253.91849529780563]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.769364535808563], num_nbr [1.0, 82.65830721003135]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.091789036989212], num_nbr [1.0, 27.144200626959247]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2530012875795364], num_nbr [1.0, 16.03448275862069]
Calculateing adj matrix using xy only...
Run 5: radius [2.3336074128746986, 3.2530012875795364], num_nbr [7.35423197492163, 16.03448275862069]
Calculateing adj matrix using xy only...
Run 6: radius [2.7933043502271175, 3.2530012875795364], num_nbr [7.35423197492163, 16.03448275862069]
Calculateing adj matrix using xy only...
recommended radius =  3.023152818903327 num_nbr=10.244514106583072
3.16

In [34]:
target=4
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.124515533447266], num_nbr [1.0, 289.50602409638554]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.769364535808563], num_nbr [1.0, 90.75502008032129]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.091789036989212], num_nbr [1.0, 29.27309236947791]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2530012875795364], num_nbr [1.0, 17.160642570281123]
Calculateing adj matrix using xy only...
Run 5: radius [2.3336074128746986, 3.2530012875795364], num_nbr [7.738955823293173, 17.160642570281123]
Calculateing adj matrix using xy only...
Run 6: radius [2.7933043502271175, 3.2530012875795364], num_nbr [7.738955823293173, 17.160642570281123]
Calculateing adj matrix using xy only...
recommended radius =  3.023152818903327 num_nbr=10.8714859437751
3.

In [35]:
target=5
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.124515533447266], num_nbr [1.0, 322.82089552238807]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.769364535808563], num_nbr [1.0, 98.65298507462687]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.091789036989212], num_nbr [1.0, 31.205223880597014]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2530012875795364], num_nbr [1.0, 17.94402985074627]
Calculateing adj matrix using xy only...
Run 5: radius [2.3336074128746986, 3.2530012875795364], num_nbr [7.83955223880597, 17.94402985074627]
Calculateing adj matrix using xy only...
Run 6: radius [2.7933043502271175, 3.2530012875795364], num_nbr [7.83955223880597, 17.94402985074627]
Calculateing adj matrix using xy only...
recommended radius =  3.023152818903327 num_nbr=11.197761194029852
3.16

In [36]:
target=6
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.124515533447266], num_nbr [1.0, 185.953125]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.769364535808563], num_nbr [1.0, 68.20703125]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.091789036989212], num_nbr [1.0, 24.40625]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2530012875795364], num_nbr [1.0, 14.78125]
Calculateing adj matrix using xy only...
Run 5: radius [2.3336074128746986, 3.2530012875795364], num_nbr [7.01171875, 14.78125]
Calculateing adj matrix using xy only...
Run 6: radius [2.7933043502271175, 3.2530012875795364], num_nbr [7.01171875, 14.78125]
Calculateing adj matrix using xy only...
Run 7: radius [3.023152818903327, 3.2530012875795364], num_nbr [9.59375, 14.78125]
Calculateing adj matrix using xy only...
Run 8: rad

Run 67: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.59375, 14.78125]
Calculateing adj matrix using xy only...
Run 68: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.59375, 14.78125]
Calculateing adj matrix using xy only...
Run 69: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.59375, 14.78125]
Calculateing adj matrix using xy only...
Run 70: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.59375, 14.78125]
Calculateing adj matrix using xy only...
Run 71: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.59375, 14.78125]
Calculateing adj matrix using xy only...
Run 72: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.59375, 14.78125]
Calculateing adj matrix using xy only...
Run 73: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.59375, 14.78125]
Calculateing adj matrix using xy only...
Run 74: radius [3.162277660168379, 3.1622776601683795], num_nbr [9.59375, 14.78125]
Calculateing adj matrix using xy only...


In [37]:
target=7
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.124515533447266], num_nbr [1.0, 252.05527638190955]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.769364535808563], num_nbr [1.0, 89.19597989949749]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.091789036989212], num_nbr [1.0, 30.326633165829147]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2530012875795364], num_nbr [1.0, 17.894472361809044]
Calculateing adj matrix using xy only...
Run 5: radius [2.3336074128746986, 3.2530012875795364], num_nbr [8.06532663316583, 17.894472361809044]
Calculateing adj matrix using xy only...
Run 6: radius [2.7933043502271175, 3.2530012875795364], num_nbr [8.06532663316583, 17.894472361809044]
Calculateing adj matrix using xy only...
recommended radius =  3.023152818903327 num_nbr=11.366834170854272
3

In [38]:
target=8
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.124515533447266], num_nbr [1.0, 336.66013071895424]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.769364535808563], num_nbr [1.0, 107.8529411764706]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.091789036989212], num_nbr [1.0, 34.97712418300654]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2530012875795364], num_nbr [1.0, 20.166666666666668]
Calculateing adj matrix using xy only...
Run 5: radius [2.3336074128746986, 3.2530012875795364], num_nbr [8.777777777777779, 20.166666666666668]
Calculateing adj matrix using xy only...
Run 6: radius [2.7933043502271175, 3.2530012875795364], num_nbr [8.777777777777779, 20.166666666666668]
Calculateing adj matrix using xy only...
recommended radius =  3.023152818903327 num_nbr=12.588235294117647


In [39]:
target=9
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.124515533447266], num_nbr [1.0, 347.5483870967742]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.769364535808563], num_nbr [1.0, 99.25806451612904]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.091789036989212], num_nbr [1.0, 30.483870967741936]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2530012875795364], num_nbr [1.0, 17.580645161290324]
Calculateing adj matrix using xy only...
Run 5: radius [2.3336074128746986, 3.2530012875795364], num_nbr [7.763440860215054, 17.580645161290324]
Calculateing adj matrix using xy only...
Run 6: radius [2.7933043502271175, 3.2530012875795364], num_nbr [7.763440860215054, 17.580645161290324]
Calculateing adj matrix using xy only...
recommended radius =  3.023152818903327 num_nbr=11.075268817204302


In [40]:
target=10
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 16.124515533447266], num_nbr [1.0, 281.0331125827815]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 8.769364535808563], num_nbr [1.0, 93.21192052980132]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 5.091789036989212], num_nbr [1.0, 30.443708609271525]
Calculateing adj matrix using xy only...
Run 4: radius [1.4142135381698608, 3.2530012875795364], num_nbr [1.0, 17.589403973509935]
Calculateing adj matrix using xy only...
Run 5: radius [2.3336074128746986, 3.2530012875795364], num_nbr [7.754966887417218, 17.589403973509935]
Calculateing adj matrix using xy only...
Run 6: radius [2.7933043502271175, 3.2530012875795364], num_nbr [7.754966887417218, 17.589403973509935]
Calculateing adj matrix using xy only...
recommended radius =  3.023152818903327 num_nbr=11.019867549668874
