Load required libraries. int_nmf_model must be in the same directory. If it is not it can be added to pythons path

In [None]:
import anndata as ad
import scanpy as sc
import numpy as np
import scipy
import sys
sys.path.append("../../CellPie/") # go to parent dir
import cellpie_main as cp
from cellpie_main import intNMF
import anndata as ad
from matplotlib import pyplot as plt
import scipy.io
import pandas as pd
import squidpy as sq
from PIL import Image
from sklearn.cluster import KMeans
from kneed import KneeLocator
import seaborn as sns

In [None]:
sc.set_figure_params(scanpy=True, dpi=250, dpi_save=100)

# CellPie

In [None]:
histo_2 = pd.read_csv('Data/Histology_Visium_FFPE_Human_Prostate_Cancer_cloupe.csv').dropna()

In [None]:
# read ST data
adata = sc.read_visium(path = 'Data/', 
                       count_file='Visium_FFPE_Human_Prostate_Cancer_filtered_feature_bc_matrix.h5', 
                       library_id='A1_spot',                        
                       load_images=True)
adata.var_names_make_unique()
adata.var['SYMBOL'] = adata.var_names

In [None]:
adata = adata[adata.obs_names.isin(histo_2['Barcode']),:]

In [None]:
# adata.obs['histo_10x'] = histo_10x['Pathology'].values

In [None]:
img = sq.im.ImageContainer(
adata.uns["spatial"]['A1_spot']["images"]["hires"][:,:,0:3],
scale=adata.uns["spatial"]['A1_spot']["scalefactors"]["tissue_hires_scalef"])

In [None]:
img.show(channelwise=True)

In [None]:
# the image contains one extra channel that looks empty, so we want to remove this before extracting the features
adata.uns["spatial"]['A1_spot']["images"]["hires"]=adata.uns["spatial"]['A1_spot']["images"]["hires"][:,:,0:3]

In [None]:
for spot_scale in [1,2,3]:
    feature_name = f"features_summary_scale{spot_scale}"
    sq.im.calculate_image_features(
        adata,
        img.compute(),
        features="histogram",
        key_added=feature_name,
        n_jobs=4,
        scale = 1,
        spot_scale=spot_scale,
    )

In [None]:
# combine features in one dataframe
adata.obsm["features"] = pd.concat(
    [adata.obsm[f] for f in adata.obsm.keys() if "features_summary" in f],
    axis="columns",
)
# make sure that we have no duplicated feature names in the combined table
adata.obsm["features"].columns = ad.utils.make_index_unique(
    adata.obsm["features"].columns
)

In [None]:
sc.pp.normalize_total(adata)

In [None]:
feat = adata.obsm['features'].loc[:, (adata.obsm['features'] != 0).any(axis=0)]
adata.obsm['features'] = feat

In [None]:
adata_ms = adata

In [None]:
import pickle as pkl
def do_model_selection(ks: list, rna, atac, method="bic"):

    sweep_res = {}
    best_model = [None, 1e20]

    if method == "bic":

        for k in ks:
            sweep_res[k] = [intNMF(adata_ms,k,lam=0,epochs = 50, init = 'NNDSVD',mod1_skew=1.6), None]
            sweep_res[k][0].fit(adata_ms,tf_transf=False)
            
            n_atac_features = atac.shape[1]
            n_rna_features = rna.shape[1]
            n_cells = rna.shape[0]

            sweep_res[k][1] = np.log(np.square(sweep_res[k][0].loss[-1])) 
            if sweep_res[k][1] < best_model[1]:
                best_model = sweep_res[k]

            sweep_res[k] = tuple(sweep_res[k])
            
    else:
        print("incorrect selection of sweep method")
        return

    with open('sweep_res_intNMF.pickle', 'wb') as handle:
        pkl.dump(sweep_res, handle, protocol=pkl.HIGHEST_PROTOCOL)


    return (tuple(best_model), sweep_res)

In [None]:
n = 30
ks = []

for i in range(1, n+1):
    ks.append(i)

In [None]:
mod=do_model_selection(ks=ks,rna=adata_ms,atac=adata_ms.obsm['features'])

In [None]:
from kneed import KneeLocator

kn = KneeLocator(ks, np.float64(pd.DataFrame.from_dict(mod[1]).iloc[1,:].values),interp_method="polynomial",curve='convex', direction='decreasing')
print(kn.knee)

In [None]:
plt.scatter(ks,pd.DataFrame.from_dict(mod[1]).iloc[1,:].values)
plt.axvline(kn.knee, linewidth=2, color='r')

In [None]:
from sklearn import metrics
res = []

for i in np.arange(0, 2.01,0.1):
    
    nmf_model = intNMF(adata,11,lam=0,epochs = 50, init = 'NNDSVD',mod1_skew=i)
    nmf_model.fit(adata)
    from sklearn.cluster import KMeans
    X = adata.obs.iloc[:,3:14].values
    
    kmeans = KMeans(n_clusters=5, init='k-means++',max_iter=500,random_state=2).fit(X)
    adata.obs['kmeans_t']=kmeans.labels_.astype(str)
    mut_info=metrics.fowlkes_mallows_score(adata.obs['kmeans_t'],histo_2['Histology'])
    adj_rand=metrics.adjusted_rand_score(adata.obs['kmeans_t'],histo_2['Histology'])
    adj_mut_info=metrics.adjusted_mutual_info_score(adata.obs['kmeans_t'],histo_2['Histology'])
    res.append((i,mut_info,adj_rand,adj_mut_info)) 

In [None]:
score = pd.DataFrame(res)   
plt.plot(score[0],score[1],color="green",label='Fowlkes Mallows Score')
plt.plot(score[0],score[2],color="red",label='Adjusted Rand Score')
plt.plot(score[0],score[3],color="blue",label='Adjusted Mutual Info Score')
plt.xlabel("Weight")
plt.ylabel("Score")
plt.legend(prop={'size': 9})

In [None]:
score[2].idxmax()

In [None]:
nmf_model = intNMF(adata,11,lam=0,epochs = 50, init = 'NNDSVD',mod1_skew=1.7)
nmf_model.fit(adata,tf_transf=False)

In [None]:
cp.plot_topic_proportions(adata,11)

### extract gene loading matrix

In [None]:
l=cp.get_genes_topic(adata,nmf_model.phi_expr)

In [None]:
l.T.to_csv('Results/marker_genes_prostate_reproduce_11topics_w17.csv')

In [None]:
adata.write('Results/prostate_reproduce_11topics_w17.h5ad')

In [None]:
adata_l=sc.AnnData(l)

In [None]:
adata_l.write('Results/genes_prostate_reproduce_11topics_w17.h5ad')

In [None]:
sc.pp.pca(adata)

In [None]:
from sklearn.cluster import KMeans
X = adata.obsm['X_pca']
kmeans = KMeans(n_clusters=5, init='k-means++',max_iter=500,random_state=2).fit(X)
adata.obs['kmeans']=kmeans.labels_.astype(str)

In [None]:
from sklearn.cluster import KMeans
X = adata.obs.iloc[:,3:14].values
kmeans = KMeans(n_clusters=5, init='k-means++',max_iter=500,random_state=2).fit(X)
adata.obs['kmeans_CellPie_int']=kmeans.labels_.astype(str)

In [None]:
from sklearn import metrics
adj_rand=metrics.adjusted_rand_score(adata.obs['kmeans_CellPie_int'],histo_2['Histology'])
adj_rand

In [None]:
sc.pl.spatial(adata,color='kmeans_CellPie_int')

CellPie clustering for single NMF (weight=2)

In [None]:
adata_0 = adata.copy()

In [None]:
nmf_model = intNMF(adata_0,11,lam=0,epochs = 50, init = 'NNDSVD',mod1_skew=2)
nmf_model.fit(adata_0)

In [None]:
X = adata_0.obs.iloc[:,3:14].values
kmeans = KMeans(n_clusters=5, init='k-means++',max_iter=500,random_state=2).fit(X)
adata_0.obs['kmeans_CellPie_0']=kmeans.labels_.astype(str)

In [None]:
adj_rand=metrics.adjusted_rand_score(adata_0.obs['kmeans_CellPie_0'],histo_2['Histology'])
adj_rand

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(10, 8))
sns.heatmap(
    pd.crosstab(
        histo_2['Histology'],adata.obs['kmeans_CellPie_int'].values,
        normalize ='index'
    ), 
    cmap=sns.color_palette("crest", as_cmap=True),
    annot = True
)
plt.grid(None)
plt.show()

In [None]:
sc.pl.spatial(adata,color='kmeans_CellPie_int',size=1.5)

In [None]:
adata.obs['kmeans_CellPie_0'] = adata_0.obs['kmeans_CellPie_0']

In [None]:
adata.obs['path_anot'] = histo_2['Histology'].values

In [None]:
adata.uns['kmeans_CellPie_int_colors'] =['#279e68', '#1f77b4', '#e377c2' ,'#d62728', '#aa40fc']

In [None]:
adata.uns['kmeans_CellPie_0_colors'] = ['#279e68', '#1f77b4','#e377c2','#d62728' ,'#aa40fc']   

In [None]:
adata.uns['kmeans_colors'] = ['#279e68', '#1f77b4', '#aa40fc',  '#e377c2','#d62728']

In [None]:
adata.uns['path_anot_colors']

In [None]:
sc.pl.spatial(adata,color='path_anot',size=1.5)

# SPAGCN

In [None]:
import SpaGCN as spg
import cv2

In [None]:
#Read in gene expression and spatial location
adata_spg = sc.read_visium(path = 'Data/', 
                       count_file='Visium_FFPE_Human_Prostate_Cancer_filtered_feature_bc_matrix.h5', 
                       library_id='A1_spot',                       
                       load_images=True)
adata_spg.var_names_make_unique()
adata_spg.var['SYMBOL'] = adata_spg.var_names
#Read in hitology image
adata_spg = adata_spg[adata_spg.obs_names.isin(histo_2['Barcode']),:]
spatial=pd.read_csv("Data/spatial/tissue_positions_list.csv",sep=",",header=None,na_filter=False,index_col=0) 
adata_spg.obs["x1"]=spatial[1]
adata_spg.obs["x2"]=spatial[2]
adata_spg.obs["x3"]=spatial[3]
adata_spg.obs["x4"]=spatial[4]
adata_spg.obs["x5"]=spatial[5]
adata_spg.obs["x_array"]=adata_spg.obs["x2"]
adata_spg.obs["y_array"]=adata_spg.obs["x3"]
adata_spg.obs["x_pixel"]=adata_spg.obs["x4"]
adata_spg.obs["y_pixel"]=adata_spg.obs["x5"]
#Select captured samples
adata_spg=adata_spg[adata_spg.obs["x1"]==1]
adata_spg.var_names=[i.upper() for i in list(adata_spg.var_names)]
adata_spg.var["genename"]=adata_spg.var.index.astype("str")
adata_spg.write_h5ad("sample_data.h5ad")
                 
adata_spg=sc.read("sample_data.h5ad")


In [None]:
img=cv2.imread("/Users/user/Prostate_cancer_invasive/Visium_FFPE_Human_Prostate_Cancer_image.tif")

In [None]:
#Set coordinates
x_array=adata_spg.obs["x_array"].tolist()
y_array=adata_spg.obs["y_array"].tolist()
x_pixel=adata_spg.obs["x_pixel"].tolist()
y_pixel=adata_spg.obs["y_pixel"].tolist()

#Test coordinates on the image
img_new=img.copy()
for i in range(len(x_pixel)):
    x=x_pixel[i]
    y=y_pixel[i]
    img_new[int(x-20):int(x+20), int(y-20):int(y+20),:]=0

cv2.imwrite('ic_map.jpg', img_new)

In [None]:
#Calculate adjacent matrix
s=1
b=49
adj=spg.calculate_adj_matrix(x=x_pixel,y=y_pixel, x_pixel=x_pixel, y_pixel=y_pixel, image=img, beta=b, alpha=s, histology=True)
#If histlogy image is not available, SpaGCN can calculate the adjacent matrix using the fnction below
# adj=spg.calculate_adj_matrix(x=x_pixel,y=y_pixel, histology=False)
np.savetxt('ic_adj.csv', adj, delimiter=',')

In [None]:
adj=np.loadtxt('ic_adj.csv', delimiter=',')
spg.prefilter_genes(adata,min_cells=3) # avoiding all genes are zeros
spg.prefilter_specialgenes(adata_spg)
#Normalize and take log for UMI
sc.pp.normalize_per_cell(adata_spg)
sc.pp.log1p(adata_spg)

In [None]:
p=0.5 
#Find the l value given p
l=spg.search_l(p, adj, start=0.01, end=1000, tol=0.01, max_run=100)

In [None]:
r_seed=t_seed=n_seed=100

In [None]:
#If the number of clusters known, we can use the spg.search_res() fnction to search for suitable resolution(optional)
#For this toy data, we set the number of clusters=7 since this tissue has 7 layers
n_clusters=7
#Set seed
r_seed=t_seed=n_seed=100
#Seaech for suitable resolution
res=spg.search_res(adata_spg, adj, l, n_clusters, start=0.7, step=0.1, tol=5e-3, lr=0.05, max_epochs=20, r_seed=r_seed, t_seed=t_seed, n_seed=n_seed)

In [None]:
import random, torch
clf = spg.SpaGCN()
res = 0.20000000000000004
clf.set_l(l)
#Set seed
random.seed(r_seed)
torch.manual_seed(t_seed)
np.random.seed(n_seed)
#Run
clf.train(adata_spg,adj,init_spa=True,init="louvain",res=res, tol=5e-3, lr=0.05, max_epochs=200)
y_pred, prob=clf.predict()
adata_spg.obs["pred"]= y_pred
adata_spg.obs["pred"]=adata_spg.obs["pred"].astype('category')
#Do cluster refinement(optional)
#shape="hexagon" for Visium data, "square" for ST data.
adj_2d=spg.calculate_adj_matrix(x=x_array,y=y_array, histology=False)
refined_pred=spg.refine(sample_id=adata_spg.obs.index.tolist(), pred=adata_spg.obs["pred"].tolist(), dis=adj_2d, shape="hexagon")
adata_spg.obs["SpaGCN_clusters"]=refined_pred
adata_spg.obs["SpaGCN_clusters"]=adata_spg.obs["SpaGCN_clusters"].astype('category')
#Save results
adata_spg.write_h5ad("ic_spagcn_results.h5ad")

In [None]:
adata_spg=sc.read("ic_spagcn_results.h5ad")
#Set colors used
plot_color=["#F56867","#FEB915","#C798EE","#59BE86","#7495D3","#D1D1D1","#6D1A9C","#15821E","#3A84E6","#997273","#787878","#DB4C6C","#9E7A7A","#554236","#AF5F3C","#93796C","#F9BD3F","#DAB370","#877F6C","#268785"]
#Plot spatial domains
domains="pred"
num_celltype=len(adata_spg.obs[domains].unique())
adata_spg.uns[domains+"_colors"]=list(plot_color[:num_celltype])
ax=sc.pl.scatter(adata_spg,alpha=1,x="y_pixel",y="x_pixel",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata_spg.shape[0])
ax.set_aspect('equal', 'box')
ax.axes.invert_yaxis()
plt.savefig("ic_spagcn_pred.png", dpi=600)
plt.close()

#Plot refined spatial domains
domains="SpaGCN_clusters"
num_celltype=len(adata_spg.obs[domains].unique())
adata_spg.uns[domains+"_colors"]=list(plot_color[:num_celltype])
ax=sc.pl.scatter(adata_spg,alpha=1,x="y_pixel",y="x_pixel",color=domains,title=domains,color_map=plot_color,show=False,size=100000/adata_spg.shape[0])
ax.set_aspect('equal', 'box')
ax.axes.invert_yaxis()
plt.savefig("ic_spagcn_refined_pred.png", dpi=600)
plt.close()

In [None]:
adata_spg.uns['SpaGCN_clusters_colors'] = ['#359c62','#d32929','#1f77b4','#e377c2','#aa40fc']

In [None]:
sc.pl.spatial(adata_spg,color='SpaGCN_clusters',size=1.5)

In [None]:
mut_info=metrics.fowlkes_mallows_score(adata_spg.obs['SpaGCN_clusters'],histo_2['Histology'])
adj_rand=metrics.adjusted_rand_score(adata_spg.obs['SpaGCN_clusters'],histo_2['Histology'])
adj_mut_info=metrics.adjusted_mutual_info_score(adata_spg.obs['SpaGCN_clusters'],histo_2['Histology'])
print(mut_info,adj_rand,adj_mut_info)

# stLearn

In [None]:
import stlearn as st
from pathlib import Path

In [None]:
# specify PATH to data
BASE_PATH = Path("Data/")

# spot tile is the intermediate result of image pre-processing
TILE_PATH = Path("/tmp/tiles")
TILE_PATH.mkdir(parents=True, exist_ok=True)

# output path
OUT_PATH = Path("/")
OUT_PATH.mkdir(parents=True, exist_ok=True)

In [None]:
# load data
data = st.Read10X(BASE_PATH)

In [None]:
data = data[data.obs_names.isin(histo_2['Barcode']),:]

In [None]:
# pre-processing for gene count table
st.pp.filter_genes(data,min_cells=1)
st.pp.normalize_total(data)
st.pp.log1p(data)

In [None]:
# pre-processing for spot image
st.pp.tiling(data, TILE_PATH)

# this step uses deep learning model to extract high-level features from tile images
# may need few minutes to be completed
st.pp.extract_feature(data)

In [None]:
# run PCA for gene expression data
st.em.run_pca(data,n_comps=50)

In [None]:
data_SME = data.copy()
# apply stSME to normalise log transformed data
st.spatial.SME.SME_normalize(data_SME, use_data="raw")
data_SME.X = data_SME.obsm['raw_SME_normalized']
st.pp.scale(data_SME)
st.em.run_pca(data_SME,n_comps=50)

In [None]:
# K-means clustering on stSME normalised PCA
st.tl.clustering.kmeans(data_SME,n_clusters=5, use_data="X_pca", key_added="kmeans_stlearn")
st.pl.cluster_plot(data_SME, use_label="kmeans_stlearn")

In [None]:
data_SME.uns['kmeans_stlearn_colors'] =['#d32929','#359c62','#1f77b4','#e377c2','#8c564b']

In [None]:
adata.obs['kmeans_stlearn'] = data_SME.obs['kmeans_stlearn']
adata.uns['kmeans_stlearn_colors'] = data_SME.uns['kmeans_stlearn_colors']
adata.obs['spaGCN_clusters'] = adata_spg.obs['SpaGCN_clusters']
adata.uns['spaGCN_clusters_colors'] = adata_spg.uns['SpaGCN_clusters_colors']

In [None]:
adata

In [None]:
sc.pl.spatial(adata,color=['kmeans_CellPie_int'])

In [None]:
sc.pl.spatial(adata,color=['path_anot','kmeans_CellPie_int','kmeans_CellPie_0','kmeans','spaGCN_clusters','kmeans_stlearn'],
              size=1.5,ncols=3,legend_loc = 'left margin')

In [None]:
l = ['kmeans','kmeans_CellPie_int','kmeans_CellPie_0']
for l in l:
    mut_info=metrics.fowlkes_mallows_score(adata_0.obs[l],histo_2['Histology'])
    adj_rand=metrics.adjusted_rand_score(adata_0.obs[l],histo_2['Histology'])
    adj_mut_info=metrics.adjusted_mutual_info_score(adata_0.obs[l],histo_2['Histology'])
    print(l,mut_info,adj_rand,adj_mut_info)

In [None]:
mut_info=metrics.fowlkes_mallows_score(adata_spg.obs['SpaGCN_clusters'],histo_2['Histology'])
adj_rand=metrics.adjusted_rand_score(adata_spg.obs['SpaGCN_clusters'],histo_2['Histology'])
adj_mut_info=metrics.adjusted_mutual_info_score(adata_spg.obs['SpaGCN_clusters'],histo_2['Histology'])
print(mut_info,adj_rand,adj_mut_info)

In [None]:
mut_info=metrics.fowlkes_mallows_score(data_SME.obs['kmeans_stlearn'],histo_2['Histology'])
adj_rand=metrics.adjusted_rand_score(data_SME.obs['kmeans_stlearn'],histo_2['Histology'])
adj_mut_info=metrics.adjusted_mutual_info_score(data_SME.obs['kmeans_stlearn'],histo_2['Histology'])
print(mut_info,adj_rand,adj_mut_info)

In [None]:
f={'CellPie_int':[0.3182037696596412],'CellPie_0':[0.2452324137345626],'kmeans':[0.2667040151343367],
'SpaGCN':[0.25655188260699413],
 'stLearn':[0.39077121051876296]}

In [None]:
score_comp = pd.DataFrame(data=f)
score_comp.index = ['ARI']

In [None]:
score_comp

In [None]:
import seaborn as sns

plot=sns.barplot(data=score_comp,orient='h')
plot.set_xlabel( "ARI")
plt.grid(False)

In [None]:
histo_3_new = histo_2[histo_2['Histology'].str.contains('Gleason 3')]
histo_4_new = histo_2[histo_2['Histology'].str.contains('Gleason 4')]

In [None]:
adata_spa_new_g4 = adata_spg.obs[(adata_spg.obs['SpaGCN_clusters']==0)]
adata_spa_new_g3= adata_spg.obs[(adata_spg.obs['SpaGCN_clusters']==0)]
adata_stl_new_g4 = data_SME.obs[(data_SME.obs['kmeans_stlearn']=='1')]
adata_stl_new_g3= data_SME.obs[(data_SME.obs['kmeans_stlearn']=='1')]
adata_km_new_g4 = adata.obs[(adata.obs['kmeans']=='0')]
adata_km_new_g3= adata.obs[(adata.obs['kmeans']=='0')]

In [None]:
adata_int_new_g4 = adata.obs[(adata.obs['kmeans_CellPie_int']=='3')]
adata_int_new_g3= adata.obs[(adata.obs['kmeans_CellPie_int']=='0')]
adata_0_new_g4 = adata_0.obs[(adata_0.obs['kmeans_CellPie_0']=='3')]
adata_0_new_g3= adata_0.obs[(adata_0.obs['kmeans_CellPie_0']=='0')]

In [None]:
g3_spa_overl = histo_3_new.index.intersection(adata_spa_new_g3.index)
g4_spa_overl = histo_4_new.index.intersection(adata_spa_new_g4.index)
g3_stl_overl = histo_3_new.index.intersection(adata_stl_new_g3.index)
g4_stl_overl = histo_4_new.index.intersection(adata_stl_new_g4.index)
g3_km_overl = histo_3_new.index.intersection(adata_km_new_g3.index)
g4_km_overl = histo_4_new.index.intersection(adata_km_new_g4.index)

In [None]:
adata_int_new_g3

In [None]:
g3_int_overl

In [None]:
g3_spa_dif =adata_spa_new_g3.index.difference(histo_3_new.index)
g4_spa_dif = adata_spa_new_g4.index.difference(histo_4_new.index)
g3_stl_dif = adata_stl_new_g3.index.difference(histo_3_new.index)
g4_stl_dif =adata_stl_new_g4.index.difference(histo_4_new.index)
g3_km_dif = adata_km_new_g3.index.difference(histo_3_new.index)
g4_km_dif = adata_km_new_g4.index.difference(histo_4_new.index)

In [None]:
g3_int_dif

In [None]:
histo_3_new.index = histo_3_new['Barcode']
histo_4_new.index = histo_4_new['Barcode']

In [None]:
g3_int_overl = histo_3_new.index.intersection(adata_int_new_g3.index)
g4_int_overl = histo_4_new.index.intersection(adata_int_new_g4.index)
g3_0_overl = histo_3_new.index.intersection(adata_0_new_g3.index)
g4_0_overl = histo_4_new.index.intersection(adata_0_new_g4.index)

In [None]:
g3_int_dif =adata_int_new_g3.index.difference(histo_3_new.index)
g4_int_dif = adata_int_new_g4.index.difference(histo_4_new.index)
g3_0_dif = adata_0_new_g3.index.difference(histo_3_new.index)
g4_0_dif = adata_0_new_g4.index.difference(histo_4_new.index)

In [None]:
ac_g3_int = g3_int_overl.shape[0]/(adata_int_new_g3.shape[0])
ac_g4_int = g4_int_overl.shape[0]/(adata_int_new_g4.shape[0])

ac_g3_0 = g3_0_overl.shape[0]/(adata_0_new_g3.shape[0])

ac_g4_0 = g4_0_overl.shape[0]/(adata_0_new_g4.shape[0])
print(ac_g3_int,ac_g4_int,ac_g3_0,ac_g4_0)

In [None]:
pre_g3_int = g3_int_overl.shape[0]/(g3_int_overl.shape[0]+g3_int_dif.shape[0])
pre_g4_int = g4_int_overl.shape[0]/(g4_int_overl.shape[0]+g4_int_dif.shape[0])

pre_g3_0 = g3_0_overl.shape[0]/(g3_0_overl.shape[0]+g3_0_dif.shape[0])

pre_g4_0 = g4_0_overl.shape[0]/(g4_0_overl.shape[0]+g4_0_dif.shape[0])
print(pre_g3_int,pre_g4_int,pre_g3_0,pre_g4_0)

In [None]:
ratio_g3_spa = g3_spa_overl.shape[0]/(adata_spa_new_g3.shape[0])
ratio_g4_spa = g4_spa_overl.shape[0]/(adata_spa_new_g4.shape[0])

ratio_g3_stl = g3_stl_overl.shape[0]/(adata_stl_new_g3.shape[0])

ratio_g4_stl = g4_stl_overl.shape[0]/(adata_stl_new_g4.shape[0])
ratio_g3_km = g3_km_overl.shape[0]/(adata_km_new_g3.shape[0])

ratio_g4_km = g4_km_overl.shape[0]/(adata_km_new_g4.shape[0])
print(ratio_g3_spa,ratio_g4_spa,ratio_g3_stl,ratio_g4_stl,ratio_g3_km,ratio_g4_km)

In [None]:
pre_g3_spa = g3_spa_overl.shape[0]/(g3_spa_overl.shape[0]+g3_spa_dif.shape[0])
pre_g4_spa = g4_spa_overl.shape[0]/(g4_spa_overl.shape[0]+g4_spa_dif.shape[0])

pre_g3_stl = g3_stl_overl.shape[0]/(g3_stl_overl.shape[0]+g3_stl_dif.shape[0])

pre_g4_stl = g4_stl_overl.shape[0]/(g4_stl_overl.shape[0]+g4_stl_dif.shape[0])
pre_g3_km = g3_km_overl.shape[0]/(g3_km_overl.shape[0]+g3_km_dif.shape[0])

pre_g4_km = g4_km_overl.shape[0]/(g4_km_overl.shape[0]+g4_km_dif.shape[0])
print(pre_g3_spa,pre_g4_spa,pre_g3_stl,pre_g4_stl,pre_g3_km,pre_g4_km)

In [None]:
g3 ={'CellPie_int':[0.625],'CellPie_0':[0.5624178712220762],'kmeans':[0.4297159504734159],'SpaGCN':[0.45419254658385094],'stLearn':[0.3992042440318302] }
g4 ={'CellPie_int':[ 0.5338474721508141],'CellPie_0':[ 0.45729813664596275],'kmeans':[0.5142024763292061],'SpaGCN':[ 0.45729813664596275],'stLearn':[0.5358090185676393]}

In [None]:
score_g3 = pd.DataFrame(data=g3)
score_g4 = pd.DataFrame(data=g4)
plot=sns.barplot(data=score_g3,orient='h')
plot.set_xlabel("Precision")
plt.grid(False)

In [None]:
plot=sns.barplot(data=score_g4,orient='h')
plot.set_xlabel( "Precision")
plt.grid(False)

In [None]:
adata.write('adata_prostate_repro.h5ad')