## Analyse FFPE Invasive Ductal Carcinoma Breast Tissue to identify SVGs with SpaGCN

In [1]:
import os,csv,re
import pandas as pd
import numpy as np
import numba
import scanpy as sc
import math
import SpaGCN as spg
from scipy.sparse import issparse
import random, torch
import warnings
warnings.filterwarnings("ignore")
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import SpaGCN as spg
#In order to read in image data, we need to install some package. Here we recommend package "opencv"
#inatll opencv in python
#!pip3 install opencv-python
import cv2

In [3]:
##Read in preprocessed adata
#Read in gene expression and spatial location
adata=sc.read("ffpe_human_breast_cancer_procesed.h5ad")

#read in spatial positions
spatial=pd.read_csv("spatial/tissue_positions_list.csv",sep=",",header=None,na_filter=False,index_col=0,names=["barcode", "in_tissue", "array_row", "array_col","pxl_row_in_fullres","pxl_col_in_fullres"]) 

#Read in hitology image
img=cv2.imread("spatial/tissue_hires_image.tiff")


In [4]:
##re index spatial table according to barcodes in adata
sorted_barcodes = list(adata.obs_names)

spatial = spatial.reindex(sorted_barcodes)

In [5]:
#Set coordinates
x_array=adata.obs["array_row"].tolist()
y_array=adata.obs["array_col"].tolist()
x_pixel=spatial["pxl_row_in_fullres"].tolist()
y_pixel=spatial["pxl_col_in_fullres"].tolist()

print(img)
#Test coordinates on the image
img_new=img.copy()
for i in range(len(x_pixel)):
    x=x_pixel[i]
    y=y_pixel[i]
    img[int(x-20):int(x+20), int(y-20):int(y+20),:]=0

cv2.imwrite('ffpe_breast_cancer_scanpy_processed_image_map.jpg', img_new)

[[[255 255 255]
  [254 254 254]
  [253 254 253]
  ...
  [253 254 251]
  [253 254 252]
  [254 254 254]]

 [[255 255 255]
  [254 254 254]
  [253 254 253]
  ...
  [252 253 251]
  [253 253 252]
  [254 254 254]]

 [[255 255 255]
  [254 254 254]
  [253 254 253]
  ...
  [252 253 251]
  [252 253 251]
  [254 254 253]]

 ...

 [[254 254 254]
  [254 254 254]
  [254 254 254]
  ...
  [254 255 254]
  [255 255 255]
  [254 254 254]]

 [[254 254 254]
  [254 254 254]
  [254 254 254]
  ...
  [254 254 254]
  [254 254 254]
  [253 253 253]]

 [[254 254 254]
  [254 254 254]
  [254 254 254]
  ...
  [254 254 254]
  [254 254 254]
  [253 253 253]]]


True

In [6]:
#Calculate adjacent matrix
s=1
b=49
#If histlogy image is not available, SpaGCN can calculate the adjacent matrix using the fnction below
adj=spg.calculate_adj_matrix(x=x_pixel,y=y_pixel, histology=False)
print(adj)
np.savetxt('ffpe_breast_cancer_adj.csv', adj, delimiter=',')

Calculateing adj matrix using xy only...
[[    0.     12251.44    9159.731  ...  8569.803   6011.6294 10744.493 ]
 [12251.44       0.     15727.283  ... 10983.615  10986.767   1615.0978]
 [ 9159.731  15727.283      0.     ...  5030.6772  4949.3857 14865.559 ]
 ...
 [ 8569.803  10983.615   5030.6772 ...     0.      2584.3408 10338.584 ]
 [ 6011.6294 10986.767   4949.3857 ...  2584.3408     0.     10002.347 ]
 [10744.493   1615.0978 14865.559  ... 10338.584  10002.347      0.    ]]


In [7]:
#set hyper-parameters
p=0.5 
#Find the l value given p
l=spg.search_l(p, adj, start=0.01, end=1000, tol=0.01, max_run=100)
print(l)

Run 1: l [0.01, 1000], p [0.0, 63.98147318506756]
Run 2: l [0.01, 500.005], p [0.0, 16.82515525817871]
Run 3: l [0.01, 250.0075], p [0.0, 3.776993751525879]
Run 4: l [125.00874999999999, 250.0075], p [0.3542438745498657, 3.776993751525879]
Run 5: l [125.00874999999999, 187.508125], p [0.3542438745498657, 1.764664888381958]
Run 6: l [125.00874999999999, 156.2584375], p [0.3542438745498657, 0.966727614402771]
Run 7: l [125.00874999999999, 140.63359375], p [0.3542438745498657, 0.6313732862472534]
Run 8: l [132.821171875, 140.63359375], p [0.48439955711364746, 0.6313732862472534]
Run 9: l [132.821171875, 136.7273828125], p [0.48439955711364746, 0.5559405088424683]
Run 10: l [132.821171875, 134.77427734375], p [0.48439955711364746, 0.5196648836135864]
recommended l =  133.797724609375
133.797724609375


In [8]:
#Going with 9 clusters as this is what I got from previous data - this has 0 index
n_clusters=15
#Set seed
r_seed=t_seed=n_seed=100
#Search for suitable resolution
res=spg.search_res(adata, adj, l, n_clusters, start=0.7, step=0.1, tol=5e-3, lr=0.05, max_epochs=20, r_seed=r_seed, t_seed=t_seed, n_seed=n_seed)

Start at res =  0.7 step =  0.1
Initializing cluster centers with louvain, resolution =  0.7
Epoch  0
Epoch  10
Res =  0.7 Num of clusters =  10
Initializing cluster centers with louvain, resolution =  0.7999999999999999
Epoch  0
Epoch  10
Res =  0.7999999999999999 Num of clusters =  12
Res changed to 0.7999999999999999
Initializing cluster centers with louvain, resolution =  0.8999999999999999
Epoch  0
Epoch  10
Res =  0.8999999999999999 Num of clusters =  14
Res changed to 0.8999999999999999
Initializing cluster centers with louvain, resolution =  0.9999999999999999
Epoch  0
Epoch  10
Res =  0.9999999999999999 Num of clusters =  14
Res changed to 0.9999999999999999
Initializing cluster centers with louvain, resolution =  1.0999999999999999
Epoch  0
Epoch  10
Res =  1.0999999999999999 Num of clusters =  15
recommended res =  1.0999999999999999


In [9]:
clf=spg.SpaGCN()
clf.set_l(l)
#Set seed
random.seed(r_seed)
torch.manual_seed(t_seed)
np.random.seed(n_seed)
#Run
clf.train(adata,adj,init_spa=True,init="louvain",res=res, tol=5e-3, lr=0.05, max_epochs=200)
y_pred, prob=clf.predict()
adata.obs["pred"]= y_pred
adata.obs["pred"]=adata.obs["pred"].astype('category')
#Do cluster refinement(optional)
#shape="hexagon" for Visium data, "square" for ST data.
adj_2d=spg.calculate_adj_matrix(x=x_array,y=y_array, histology=False)
refined_pred=spg.refine(sample_id=adata.obs.index.tolist(), pred=adata.obs["pred"].tolist(), dis=adj_2d, shape="hexagon")
adata.obs["refined_pred"]=refined_pred
adata.obs["refined_pred"]=adata.obs["refined_pred"].astype('category')

Initializing cluster centers with louvain, resolution =  1.0999999999999999
Epoch  0
Epoch  10
Epoch  20
Epoch  30
delta_label  0.004775549188156638 < tol  0.005
Reach tolerance threshold. Stopping training.
Total epoch: 31
Calculateing adj matrix using xy only...


In [10]:
#Plot spatial domains
#Set colors used
plot_color=["#F56867","#FEB915","#C798EE","#59BE86","#7495D3","#D1D1D1","#6D1A9C","#15821E","#3A84E6","#997273","#787878","#DB4C6C","#9E7A7A","#554236","#AF5F3C","#93796C","#F9BD3F","#DAB370","#877F6C","#268785"]
#Plot spatial domains
domains="pred"
num_celltype=len(adata.obs[domains].unique())
adata.uns[domains+"_colors"]=list(plot_color[:num_celltype])
#ax=sc.pl.scatter(adata,alpha=1,x="y_pixel",y="x_pixel",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
#change above line so it comes from adata input
ax=sc.pl.scatter(adata,alpha=1,x="array_row",y="array_col",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
ax.set_aspect('equal', 'box')
ax.axes.invert_yaxis()
plt.savefig("ffpe_breast_cancer_spagcn_pred.png", dpi=600)
plt.close()

#Plot refined spatial domains
domains="refined_pred"
num_celltype=len(adata.obs[domains].unique())
adata.uns[domains+"_colors"]=list(plot_color[:num_celltype])
ax=sc.pl.scatter(adata,alpha=1,x="array_row",y="array_col",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
ax.set_aspect('equal', 'box')
ax.axes.invert_yaxis()
plt.savefig("ffpe_breast_cancer_spagcn_refined_pred.png", dpi=600)
plt.close()

In [11]:
#Use domain 0 as an example
target=0
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 242.28458498023716]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 83.0909090909091]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 32.91304347826087]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=11.928853754940711
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 11.928853754940711
 Cluster 0 has neighbors:
Dmain  4 :  438
Dmain  5 :  232
Dmain  7 :  153
Dmain  1 :  148
Dmain  2 :  121
[4, 5, 7, 1, 2]
SVGs for domain  0 : ['MMP2', 'DCN', 'IGLC1', 'IGHA1', 'IGHG1', 'IGHG3', 'JCHAIN', 'IGHM', 'CCL19']


In [12]:
target=1
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 196.91954022988506]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 73.43103448275862]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 30.775862068965516]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=11.494252873563218
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 11.494252873563218
 Cluster 1 has neighbors:
Dmain  4 :  185
Dmain  5 :  152
Dmain  0 :  148
Dmain  11 :  68
Dmain  12 :  55
Dmain  14 :  31
[4, 5, 0, 11, 12, 14]
SVGs for domain  1 : ['CXCR4', 'ZFP36L2', 'HLA-DPB1', 'HLA-DRA', 'IFI44L', 'LSP1', 'TXNIP', 'TRBC2', 'ARHGDIB', 'WIPF1', 'IL32', 'TRAC', 'IFITM1', 'TAP1', 'LYZ', 'CORO1A',

In [13]:
target=2
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 187.02459016393442]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 65.81147540983606]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 27.598360655737704]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=10.385245901639344
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 10.385245901639344
 Cluster 2 has neighbors:
Dmain  5 :  196
Dmain  4 :  173
Dmain  13 :  77
[5, 4, 13]
SVGs for domain  2 : ['SERPINF1', 'TAGLN', 'SFRP4']


In [14]:
target=3
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 191.15172413793104]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 68.89655172413794]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 28.489655172413794]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=10.820689655172414
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 10.820689655172414
 Cluster 3 has neighbors:
Dmain  4 :  249
[4]
SVGs for domain  3 : ['MAL2', 'NFIB', 'PTK2', 'SELENOM', 'ELF3', 'HINT1', 'HDGF', 'ARHGEF38', 'TMEM9', 'H3F3B', 'SUMO1', 'GLO1', 'ENAH', 'WDR45B', 'USP36', 'SLC38A1', 'MARCKSL1', 'ASS1', 'TMEM123', 'CSF3R', 'TSPAN15', 'NAMPT', 'BSPRY', 'TFAP2B', 'STT3B', 'VEGFA', 'OBSL

In [15]:
target=4
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 222.7843137254902]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 77.20392156862745]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 31.29019607843137]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=11.525490196078431
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 11.525490196078431
 Cluster 4 has neighbors:
Dmain  0 :  438
Dmain  7 :  300
Dmain  6 :  289
Dmain  3 :  249
Dmain  1 :  185
Dmain  2 :  173
Dmain  5 :  129
Dmain  11 :  88
Dmain  12 :  77
Dmain  13 :  45
[0, 7, 6, 3, 1, 2, 5, 11, 12, 13]
SVGs for domain  4 : ['POSTN', 'CST3', 'VIM', 'IGFBP7', 'LUM', 'DCN', 'COL6A3', 'MMP2', 'MYL9', '

In [16]:
target=5
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 170.89300411522635]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 62.333333333333336]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 26.48971193415638]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=10.08230452674897
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 10.08230452674897
 Cluster 5 has neighbors:
Dmain  0 :  232
Dmain  2 :  196
Dmain  1 :  152
Dmain  4 :  129
Dmain  9 :  126
Dmain  8 :  108
[0, 2, 1, 4, 9, 8]
SVGs for domain  5 : ['SFRP2', 'SFRP4', 'JCHAIN', 'IGHG4', 'HMOX1', 'IGKV4-1']


In [17]:
target=6
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 236.51295336787564]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 80.50777202072538]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 32.145077720207254]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=11.901554404145077
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 11.901554404145077
 Cluster 6 has neighbors:
Dmain  7 :  298
Dmain  4 :  289
Dmain  11 :  137
Dmain  12 :  48
[7, 4, 11, 12]
SVGs for domain  6 : ['SCGB2A2', 'APOD', 'LLGL2', 'DHCR24', 'LSR', 'DDR1', 'SELENBP1', 'PTPRF', 'FOXA1', 'MGST1', 'EPCAM', 'SCGB1D2', 'TMEM9', 'SLPI', 'CLDN4', 'CPD', 'DEGS1', 'PIGR', 'MUC1', 'PDZK1IP1', 'HILP

In [18]:
target=7
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 244.79329608938548]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 81.96648044692738]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 31.75418994413408]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=11.335195530726256
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 11.335195530726256
 Cluster 7 has neighbors:
Dmain  4 :  300
Dmain  6 :  298
Dmain  10 :  179
Dmain  0 :  153
Dmain  11 :  114
[4, 6, 10, 0, 11]
SVGs for domain  7 : ['RARRES1', 'LTF', 'FGB', 'FGG', 'CAPN13']


In [19]:
target=8
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 194.77611940298507]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 75.85074626865672]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 31.37313432835821]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=11.64179104477612
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 11.64179104477612
 Cluster 8 has neighbors:
Dmain  9 :  98
Dmain  10 :  81
[9, 10]
SVGs for domain  8 : ['BGN', 'COL6A2', 'HSPG2', 'TPM2', 'POSTN', 'IGFBP7', 'MYL9', 'COL6A3', 'TPM1', 'COL6A1', 'COL5A2', 'ACTA2', 'TIMP3', 'SULF1', 'A2M', 'CDH11', 'COL4A1', 'PRRX1', 'TGM2', 'COL4A2', 'PMEPA1', 'ADAMTS2', 'LAMA4', 'PALLD', 'MYLK']


In [21]:
target=9
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 209.27659574468086]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 79.02127659574468]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 32.751773049645394]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=12.049645390070921
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 12.049645390070921
 Cluster 9 has neighbors:
Dmain  5 :  126
Dmain  8 :  98
[5, 8]
SVGs for domain  9 : ['APOE', 'LAPTM5', 'TYROBP', 'CSTB', 'TGFBI', 'SDCBP', 'GPNMB', 'APOC1', 'LYZ', 'SAT1', 'IFI30', 'ELL2', 'CD163', 'C1QB', 'CTSL', 'IGHG3', 'ASAH1', 'MFSD1', 'CAPG', 'HMOX1', 'CTSA', 'LITAF', 'CD68', 'LGMN', 'CTSS', 'CREG1', 'SGK1'

In [22]:
target=10
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 245.82758620689654]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 81.72413793103448]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 30.74712643678161]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=10.804597701149426
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 10.804597701149426
 Cluster 10 has neighbors:
Dmain  7 :  179
Dmain  8 :  81
[7, 8]
SVGs for domain  10 : ['CD68', 'ISG15', 'APOE', 'GPNMB', 'TGFBI', 'IFIT3', 'FCER1G', 'SAT1', 'OAS1', 'CAPG', 'PLSCR1', 'ACP5', 'TXNIP', 'GRN', 'CREG1', 'TYMP', 'LYZ', 'CTSS', 'CTSZ', 'HEXB', 'LGMN', 'RNASE1', 'SLC11A1', 'LAPTM5', 'FPR3', 'IFI30', 'FCG

In [23]:
target=11
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 207.65151515151516]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 80.13636363636364]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 33.24242424242424]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=12.303030303030303
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 12.303030303030303
 Cluster 11 has neighbors:
Dmain  6 :  137
Dmain  7 :  114
[6, 7]
SVGs for domain  11 : ['IFITM1', 'LCN2', 'OAS2', 'ISG15', 'SOD2', 'STAT1', 'FXYD3', 'SLPI', 'IRF7', 'RBCK1', 'HLA-E', 'S100A8', 'CHI3L2', 'OAS3', 'OAS1', 'ART3', 'PDZK1IP1', 'OPTN', 'GPRC5A', 'GTPBP1', 'CEBPD', 'C3', 'NCOA7', 'MT2A', 'RARRES1', 'PARP

In [24]:
target=12
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 246.71014492753622]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 80.15942028985508]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 31.753623188405797]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=11.63768115942029
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 11.63768115942029
 Cluster 12 has neighbors:
No neighbor domain found, try bigger radius or smaller ratio.
None


TypeError: 'NoneType' object is not subscriptable

In [25]:
target=13
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 158.3709677419355]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 59.17741935483871]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 25.983870967741936]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=10.370967741935484
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 10.370967741935484
 Cluster 13 has neighbors:
Dmain  2 :  77
[2]
SVGs for domain  13 : ['LTF', 'CREB3L1', 'CRYBG1', 'COMT', 'MARCKSL1', 'GRINA', 'RAB5B', 'PILRB', 'PITX1', 'USP36', 'CSF3R', 'ARF3', 'SHISA5', 'TSPAN15', 'MTDH', 'PATJ', 'ERGIC3', 'ATP5PF', 'TBC1D16', 'ATP1A1', 'THRA', 'ATP6AP1', 'SOD1', 'RAB5IF', 'SEC63', 'RNF187', 'KI

In [26]:
target=14
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [1.4142135381698608, 13.928388595581055], num_nbr [1.0, 134.21052631578948]
Calculateing adj matrix using xy only...
Run 2: radius [1.4142135381698608, 7.671301066875458], num_nbr [1.0, 54.44736842105263]
Calculateing adj matrix using xy only...
Run 3: radius [1.4142135381698608, 4.542757302522659], num_nbr [1.0, 24.86842105263158]
Calculateing adj matrix using xy only...
recommended radius =  2.97848542034626 num_nbr=10.052631578947368
2.97848542034626
radius= 2.97848542034626 average number of neighbors for each spot is 10.052631578947368
 Cluster 14 has neighbors:
No neighbor domain found, try bigger radius or smaller ratio.
None


TypeError: 'NoneType' object is not subscriptable