## Identify SVGs in FF Left Ventricle using SpaGCN

In [1]:
import os,csv,re
import pandas as pd
import numpy as np
import numba
import scanpy as sc
import math
import SpaGCN as spg
from scipy.sparse import issparse
import random, torch
import warnings
warnings.filterwarnings("ignore")
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import SpaGCN as spg
import cv2

  from .autonotebook import tqdm as notebook_tqdm
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [9]:
##Read in preprocessed adata
#Read in gene expression and spatial location
adata=sc.read("ff_left_ventricle.h5ad")

#read in spatial positions
spatial=pd.read_csv("data/V1_Human_Heart/spatial/tissue_positions_list.csv",sep=",",header=None,na_filter=False,index_col=0,names=["barcode", "in_tissue", "array_row", "array_col","pxl_row_in_fullres","pxl_col_in_fullres"]) 

#Read in hitology image
img=cv2.imread("data/V1_Human_Heart/spatial/tissue_hires_image.tiff")


In [10]:
##re index spatial table according to barcodes in adata
sorted_barcodes = list(adata.obs_names)

spatial = spatial.reindex(sorted_barcodes)

In [18]:
# Manually edit the 'log1p' values
adata.uns['log1p'] = {'base': None}

In [11]:
#Set coordinates
x_array=adata.obs["array_row"].tolist()
y_array=adata.obs["array_col"].tolist()
x_pixel=spatial["pxl_row_in_fullres"].tolist()
y_pixel=spatial["pxl_col_in_fullres"].tolist()

print(img)
#Test coordinates on the image
img_new=img.copy()
for i in range(len(x_pixel)):
    x=x_pixel[i]
    y=y_pixel[i]
    img[int(x-20):int(x+20), int(y-20):int(y+20),:]=0

cv2.imwrite('ff_left_ventricle_scanpy_processed_image_map.jpg', img_new)

[[[176 180 178]
  [177 180 177]
  [177 180 177]
  ...
  [177 180 178]
  [177 181 179]
  [177 181 178]]

 [[177 179 177]
  [177 179 177]
  [176 180 177]
  ...
  [177 180 179]
  [176 180 178]
  [176 180 178]]

 [[177 179 178]
  [177 179 178]
  [177 180 178]
  ...
  [177 180 178]
  [176 180 177]
  [176 180 177]]

 ...

 [[177 179 178]
  [176 179 178]
  [177 179 178]
  ...
  [177 180 179]
  [177 180 178]
  [176 180 178]]

 [[177 179 178]
  [177 179 178]
  [176 179 178]
  ...
  [178 180 179]
  [176 179 179]
  [176 180 179]]

 [[177 179 178]
  [177 179 178]
  [177 179 178]
  ...
  [177 180 179]
  [176 180 178]
  [178 180 178]]]


True

In [12]:
#Calculate adjacent matrix
s=1
b=49
#If histlogy image is not available, SpaGCN can calculate the adjacent matrix using the fnction below
adj=spg.calculate_adj_matrix(x=x_pixel,y=y_pixel, histology=False)
print(adj)
np.savetxt('ff_human_breast_cancer_adj.csv', adj, delimiter=',')

Calculateing adj matrix using xy only...
[[   0.     5810.5347 4344.374  ... 5095.743  5194.049  6228.6826]
 [5810.5347    0.     7459.3105 ...  766.1416 1763.6454 6602.3857]
 [4344.374  7459.3105    0.     ... 7050.6484 5917.8315 3074.1665]
 ...
 [5095.743   766.1416 7050.6484 ...    0.     1807.3589 6506.8535]
 [5194.049  1763.6454 5917.8315 ... 1807.3589    0.     4839.3604]
 [6228.6826 6602.3857 3074.1665 ... 6506.8535 4839.3604    0.    ]]


In [13]:
#set hyper-parameters
p=0.5 
#Find the l value given p
l=spg.search_l(p, adj, start=0.01, end=1000, tol=0.01, max_run=100)
print(l)

Run 1: l [0.01, 1000], p [0.0, 292.986848583757]
Run 2: l [0.01, 500.005], p [0.0, 80.82018280029297]
Run 3: l [0.01, 250.0075], p [0.0, 20.635290145874023]
Run 4: l [0.01, 125.00874999999999], p [0.0, 4.6028547286987305]
recommended l =  62.509375
62.509375


In [14]:
#Going with 8 clusters as this is what I got from previous data - this has 0 index
n_clusters=8
#Set seed
r_seed=t_seed=n_seed=100
#Search for suitable resolution
res=spg.search_res(adata, adj, l, n_clusters, start=0.7, step=0.1, tol=5e-3, lr=0.05, max_epochs=20, r_seed=r_seed, t_seed=t_seed, n_seed=n_seed)

Start at res =  0.7 step =  0.1
Initializing cluster centers with louvain, resolution =  0.7
Epoch  0
Epoch  10
Res =  0.7 Num of clusters =  6
Initializing cluster centers with louvain, resolution =  0.7999999999999999
Epoch  0
Epoch  10
Res =  0.7999999999999999 Num of clusters =  6
Res changed to 0.7999999999999999
Initializing cluster centers with louvain, resolution =  0.8999999999999999
Epoch  0
Epoch  10
Res =  0.8999999999999999 Num of clusters =  7
Res changed to 0.8999999999999999
Initializing cluster centers with louvain, resolution =  0.9999999999999999
Epoch  0
Epoch  10
Res =  0.9999999999999999 Num of clusters =  8
recommended res =  0.9999999999999999


In [15]:
clf=spg.SpaGCN()
clf.set_l(l)
#Set seed
random.seed(r_seed)
torch.manual_seed(t_seed)
np.random.seed(n_seed)
#Run
clf.train(adata,adj,init_spa=True,init="louvain",res=res, tol=5e-3, lr=0.05, max_epochs=200)
y_pred, prob=clf.predict()
adata.obs["pred"]= y_pred
adata.obs["pred"]=adata.obs["pred"].astype('category')
#Do cluster refinement(optional)
#shape="hexagon" for Visium data, "square" for ST data.
adj_2d=spg.calculate_adj_matrix(x=x_array,y=y_array, histology=False)
refined_pred=spg.refine(sample_id=adata.obs.index.tolist(), pred=adata.obs["pred"].tolist(), dis=adj_2d, shape="hexagon")
adata.obs["refined_pred"]=refined_pred
adata.obs["refined_pred"]=adata.obs["refined_pred"].astype('category')

Initializing cluster centers with louvain, resolution =  0.9999999999999999
Epoch  0
Epoch  10
Epoch  20
Epoch  30
Epoch  40
Epoch  50
Epoch  60
Epoch  70
Epoch  80
Epoch  90
Epoch  100
Epoch  110
Epoch  120
Epoch  130
Epoch  140
Epoch  150
Epoch  160
Epoch  170
Epoch  180
Epoch  190
Calculateing adj matrix using xy only...


In [16]:
#Plot spatial domains
#Set colors used
plot_color=["#F56867","#FEB915","#C798EE","#59BE86","#7495D3","#D1D1D1","#6D1A9C","#15821E","#3A84E6","#997273","#787878","#DB4C6C","#9E7A7A","#554236","#AF5F3C","#93796C","#F9BD3F","#DAB370","#877F6C","#268785"]
#Plot spatial domains
domains="pred"
num_celltype=len(adata.obs[domains].unique())
adata.uns[domains+"_colors"]=list(plot_color[:num_celltype])
#ax=sc.pl.scatter(adata,alpha=1,x="y_pixel",y="x_pixel",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
#change above line so it comes from adata input
ax=sc.pl.scatter(adata,alpha=1,x="array_row",y="array_col",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
ax.set_aspect('equal', 'box')
ax.axes.invert_yaxis()
plt.savefig("ff_left_ventricle_spagcn_pred.png", dpi=600)
plt.close()

#Plot refined spatial domains
domains="refined_pred"
num_celltype=len(adata.obs[domains].unique())
adata.uns[domains+"_colors"]=list(plot_color[:num_celltype])
ax=sc.pl.scatter(adata,alpha=1,x="array_row",y="array_col",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata.shape[0])
ax.set_aspect('equal', 'box')
ax.axes.invert_yaxis()
plt.savefig("ff_left_ventricle_spagcn_refined_pred.png", dpi=600)
plt.close()

In [19]:
#Use domain 0 as an example
target=0
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.8885440826416], num_nbr [8.608050847457626, 400.61228813559325]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.9442720413208], num_nbr [8.608050847457626, 129.385593220339]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.9721360206604], num_nbr [8.608050847457626, 51.389830508474574]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.9860680103302], num_nbr [8.608050847457626, 19.62076271186441]
Calculateing adj matrix using xy only...
recommended radius =  2.9930340051651 num_nbr=12.307203389830509
3.1622776601683795
radius= 3.1622776601683795 average number of neighbors for each spot is 19.62076271186441
 Cluster 0 has neighbors:
Dmain  4 :  1712
Dmain  5 :  1428
Dmain  2 :  1201
Dmain  1 :  922
Dmain  6 :  545
Dmain  7 :  472
Dmain  3 :  225
Dmain  8 :  6
[4, 5, 2, 1, 6, 7, 3, 8]
SVGs for domain  

In [21]:
target=1
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.8885440826416], num_nbr [8.593856655290102, 400.23549488054607]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.9442720413208], num_nbr [8.593856655290102, 129.34129692832764]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.9721360206604], num_nbr [8.593856655290102, 51.7542662116041]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.9860680103302], num_nbr [8.593856655290102, 19.7098976109215]
Calculateing adj matrix using xy only...
recommended radius =  2.9930340051651 num_nbr=12.293515358361775
radius= 3.1622776601683795 average number of neighbors for each spot is 19.7098976109215
 Cluster 1 has neighbors:
Dmain  4 :  1022
Dmain  0 :  922
Dmain  5 :  804
Dmain  2 :  738
Dmain  6 :  572
Dmain  7 :  301
Dmain  3 :  174
Dmain  8 :  3
[4, 0, 5, 2, 6, 7, 3, 8]
SVGs for domain  1 : ['HBB', 'HBA2', 'HB

In [22]:
target=2
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.8885440826416], num_nbr [8.378491620111731, 387.3994413407821]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.9442720413208], num_nbr [8.378491620111731, 125.69413407821229]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.9721360206604], num_nbr [8.378491620111731, 50.34357541899441]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.9860680103302], num_nbr [8.378491620111731, 19.124301675977655]
Calculateing adj matrix using xy only...
recommended radius =  2.9930340051651 num_nbr=11.962290502793296
radius= 3.1622776601683795 average number of neighbors for each spot is 19.124301675977655
 Cluster 2 has neighbors:
Dmain  5 :  2887
Dmain  4 :  1458
Dmain  6 :  1256
Dmain  0 :  1201
Dmain  7 :  1149
Dmain  1 :  738
Dmain  3 :  230
Dmain  8 :  2
[5, 4, 6, 0, 7, 1, 3, 8]
SVGs for domain  2 : ['EEF1A1', 

In [23]:
target=3
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.8885440826416], num_nbr [8.42, 397.55]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.9442720413208], num_nbr [8.42, 129.02]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.9721360206604], num_nbr [8.42, 51.27]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.9860680103302], num_nbr [8.42, 19.46]
Calculateing adj matrix using xy only...
recommended radius =  2.9930340051651 num_nbr=12.13
radius= 3.1622776601683795 average number of neighbors for each spot is 19.46
 Cluster 3 has neighbors:
Dmain  4 :  413
Dmain  6 :  218
Dmain  1 :  174
Dmain  8 :  4
[4, 6, 1, 8]
SVGs for domain  3 : []


In [24]:
target=4
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.8885440826416], num_nbr [8.37566844919786, 395.8449197860963]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.9442720413208], num_nbr [8.37566844919786, 127.46256684491979]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.9721360206604], num_nbr [8.37566844919786, 50.526737967914436]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.9860680103302], num_nbr [8.37566844919786, 19.1524064171123]
Calculateing adj matrix using xy only...
recommended radius =  2.9930340051651 num_nbr=11.987967914438503
radius= 3.1622776601683795 average number of neighbors for each spot is 19.1524064171123
 Cluster 4 has neighbors:
Dmain  5 :  2126
Dmain  7 :  1804
Dmain  0 :  1712
Dmain  2 :  1458
Dmain  6 :  1142
Dmain  1 :  1022
Dmain  3 :  413
Dmain  8 :  9
[5, 7, 0, 2, 6, 1, 3, 8]
SVGs for domain  4 : []


In [25]:
target=5
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.8885440826416], num_nbr [8.474930362116991, 388.7590529247911]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.9442720413208], num_nbr [8.474930362116991, 127.10445682451254]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.9721360206604], num_nbr [8.474930362116991, 50.930362116991645]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.9860680103302], num_nbr [8.474930362116991, 19.370473537604457]
Calculateing adj matrix using xy only...
recommended radius =  2.9930340051651 num_nbr=12.096100278551532
radius= 3.1622776601683795 average number of neighbors for each spot is 19.370473537604457
 Cluster 5 has neighbors:
Dmain  2 :  2887
Dmain  4 :  2126
Dmain  0 :  1428
Dmain  7 :  1261
Dmain  6 :  1181
Dmain  1 :  804
Dmain  3 :  318
Dmain  8 :  7
[2, 4, 0, 7, 6, 1, 3, 8]
SVGs for domain  5 : []


In [None]:
target=6
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Calculateing adj matrix using xy only...
Run 1: radius [2.0, 17.8885440826416], num_nbr [8.472636815920398, 394.9950248756219]
Calculateing adj matrix using xy only...
Run 2: radius [2.0, 9.9442720413208], num_nbr [8.472636815920398, 126.92537313432835]
Calculateing adj matrix using xy only...
Run 3: radius [2.0, 5.9721360206604], num_nbr [8.472636815920398, 50.89054726368159]
Calculateing adj matrix using xy only...
Run 4: radius [2.0, 3.9860680103302], num_nbr [8.472636815920398, 19.398009950248756]
Calculateing adj matrix using xy only...
recommended radius =  2.9930340051651 num_nbr=12.121890547263682
radius= 3.1622776601683795 average number of neighbors for each spot is 19.398009950248756
 Cluster 6 has neighbors:
Dmain  2 :  1256
Dmain  5 :  1181
Dmain  4 :  1142
Dmain  7 :  1066
Dmain  1 :  572
Dmain  0 :  545
Dmain  3 :  218
Dmain  8 :  6
[2, 5, 4, 7, 1, 0, 3, 8]


In [None]:
target=7
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())

In [None]:
target=8
#Set filtering criterials
min_in_group_fraction=0.8
min_in_out_group_ratio=1
min_fold_change=1.5
#Search radius such that each spot in the target domain has approximately 10 neighbors on average
adj_2d=spg.calculate_adj_matrix(x=x_array, y=y_array, histology=False)
start, end= np.quantile(adj_2d[adj_2d!=0],q=0.001), np.quantile(adj_2d[adj_2d!=0],q=0.1)
r=spg.search_radius(target_cluster=target, cell_id=adata.obs.index.tolist(), x=x_array, y=y_array, pred=adata.obs["pred"].tolist(), start=start, end=end, num_min=10, num_max=14,  max_run=100)
r = 3.1622776601683795
#Detect neighboring domains
#print(r)
nbr_domians=spg.find_neighbor_clusters(target_cluster=target,
                                   cell_id=adata.obs.index.tolist(), 
                                   x=adata.obs["array_row"].tolist(), 
                                   y=adata.obs["array_col"].tolist(), 
                                   pred=adata.obs["pred"].tolist(),
                                   radius=r,
                                   ratio=1/2)


print(nbr_domians)
nbr_domians=nbr_domians[0:3]

de_genes_info=spg.rank_genes_groups(input_adata=adata,
                                target_cluster=target,
                                nbr_list=nbr_domians, 
                                label_col="pred", 
                                adj_nbr=True, 
                                log=True)
#Filter genes
de_genes_info=de_genes_info[(de_genes_info["pvals_adj"]<0.05)]
filtered_info_domain_0=de_genes_info
filtered_info_domain_0=filtered_info_domain_0[(filtered_info_domain_0["pvals_adj"]<0.05) &
                            (filtered_info_domain_0["in_out_group_ratio"]>min_in_out_group_ratio) &
                            (filtered_info_domain_0["in_group_fraction"]>min_in_group_fraction) &
                            (filtered_info_domain_0["fold_change"]>min_fold_change)]
filtered_info_domain_0=filtered_info_domain_0.sort_values(by="in_group_fraction", ascending=False)
filtered_info_domain_0["target_dmain"]=target
filtered_info_domain_0["neighbors"]=str(nbr_domians)
print("SVGs for domain ", str(target),":", filtered_info_domain_0["genes"].tolist())