Load required libraries. int_nmf_model must be in the same directory. If it is not it can be added to pythons path

In [None]:
import anndata as ad
import scanpy as sc
import numpy as np
import scipy
import sys
sys.path.append("../../../CellPie/") # go to parent dir
import cellpie_main as cp
from cellpie_main import intNMF
import anndata as ad
from matplotlib import pyplot as plt
import pandas as pd
import squidpy as sq
from sklearn.cluster import KMeans
import seaborn as sns

In [None]:
sc.set_figure_params(scanpy=True, dpi=250, dpi_save=100)

In [None]:
random_state=80

# CellPie

In [None]:
histo_2 = pd.read_csv('Histology_Visium_FFPE_Human_Prostate_Cancer_cloupe.csv').dropna()
histo_2.index = histo_2['Barcode']

In [None]:
%%sh
mkdir -p invasive_prostate_visium
pushd invasive_prostate_visium/
wget https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Prostate_Cancer/Visium_FFPE_Human_Prostate_Cancer_image.tif
wget https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Prostate_Cancer/Visium_FFPE_Human_Prostate_Cancer_filtered_feature_bc_matrix.h5
wget https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Prostate_Cancer/Visium_FFPE_Human_Prostate_Cancer_spatial.tar.gz
tar -xzf Visium_FFPE_Human_Prostate_Cancer_spatial.tar.gz
rm Visium_FFPE_Human_Prostate_Cancer_spatial.tar.gz
popd

In [None]:
# read ST data
adata = sc.read_visium(path = 'invasive_prostate_visium/', 
                       count_file='Visium_FFPE_Human_Prostate_Cancer_filtered_feature_bc_matrix.h5',                   
                       load_images=True)
adata.var_names_make_unique()
adata.var['SYMBOL'] = adata.var_names

In [None]:
adata = adata[adata.obs_names.isin(histo_2['Barcode']),:]

In [None]:
from feature_extr import extract_features
scale_range = np.arange(0.1,3,0.1)
features = extract_features(adata,img_path='invasive_prostate_visium/Visium_FFPE_Human_Prostate_Cancer_image.tif',
                           scale=1,spot_scale=scale_range)

In [None]:
from cp_utils import preprocess_data
preprocess_data(adata,min_cells=100)

In [None]:
adata.obs['Histology']=histo_2['Histology']

In [None]:
from cp_utils import model_selection
k = np.arange(1,30)
mod_sel = model_selection(adata,k,random_state=random_state)

In [None]:
from sklearn import metrics
res = []
for i in np.arange(0, 2.01,0.1):
    nmf_model = intNMF(adata,14,epochs = 50, init = 'nndsvd',random_state=random_state,mod1_skew=i)
    nmf_model.fit(adata)
    
    adata.obsm['nmf_cellpie'] = nmf_model.theta
    sc.pp.neighbors(adata,n_neighbors=90,random_state=random_state,use_rep='nmf_cellpie',key_added='nmf_cellpie')
    sc.tl.leiden(adata,resolution=0.35,random_state=random_state,neighbors_key='nmf_cellpie',key_added = 'CellPie')
    # kmeans_cellpie = KMeans(n_clusters=6, init='k-means++',max_iter=500,random_state=random_state).fit(nmf_model.theta[:,:])
    # adata.obs['CellPie']=kmeans_cellpie.labels_.astype(str)
    
    mut_info=metrics.fowlkes_mallows_score(adata.obs['CellPie'],adata.obs['Histology'])
    adj_rand=metrics.adjusted_rand_score(adata.obs['CellPie'],adata.obs['Histology'])
    adj_mut_info=metrics.adjusted_mutual_info_score(adata.obs['CellPie'],adata.obs['Histology'])
    res.append((i,mut_info,adj_rand,adj_mut_info)) 

In [None]:
score = pd.DataFrame(res)   
plt.plot(score[0],score[1],color="green",label='Fowlkes Mallows Score')
plt.plot(score[0],score[2],color="red",label='Adjusted Rand Index Score')
plt.plot(score[0],score[3],color="blue",label='Adjusted Mutual Info Score')
plt.xlabel("Modality Weight")
plt.ylabel("Score")
plt.legend(prop={'size': 9})

In [None]:
score[2].idxmax()

In [None]:
k = 14

In [None]:
nmf_model_0 = intNMF(adata,k,epochs = 50,init = 'nndsvd',random_state=random_state,mod1_skew=2.0)
nmf_model_0.fit(adata)

In [None]:
nmf_model = intNMF(adata,k,epochs = 50,init = 'nndsvd',random_state=random_state,mod1_skew=1.8)
nmf_model.fit(adata)

In [None]:
from re import sub
import matplotlib as mpl
sel_clust = ['Factor_'+str(i+1) for i in range(k)]
with mpl.rc_context({'figure.figsize': (5, 6), 'axes.facecolor': 'black'}):
    sc.pl.spatial(adata,
                cmap='magma',
                color=sel_clust,
                ncols=3,
                size=1, img_key='hires', 
                alpha_img=0,
                 )

In [None]:
# adata.obsm['nmf_cellpie'] = nmf_model.theta
# sc.pp.neighbors(adata,n_neighbors=90,random_state=random_state,use_rep='nmf_cellpie',key_added='nmf_cellpie')
# sc.tl.leiden(adata,resolution=0.35,random_state=random_state,neighbors_key='nmf_cellpie',key_added = 'CellPie')

In [None]:
# from sklearn import metrics
# adj_rand=metrics.adjusted_rand_score(adata.obs['CellPie'],adata.obs['Histology'])
# adj_rand

In [None]:
mefisto = pd.read_csv('Benchmark/factors_mefisto_prostate_poi_14.csv',index_col=0)

nsf = pd.read_csv('Benchmark/factors_nsf_prostate_poi_14.csv',index_col=0)

nsfh_s = pd.read_csv('Benchmark/spatialfactors_nsfh_prostate_poi_14.csv',index_col=0)

nsfh_ns = pd.read_csv('Benchmark/nonspatialfactors_nsfh_prostate_poi_14.csv',index_col=0)

pnmf = pd.read_csv('Benchmark/factors_pnmf_prostate_poi_14.csv',index_col=0)


In [None]:
nsfh_s = nsfh_s.rename(columns={"0": "7", "1": "8", "2": "9", "3":"10","4":"11","5":"12", "6":"13"})

In [None]:
nsfh = pd.concat([nsfh_ns,nsfh_s], axis=1)
nsfh

In [None]:
from sklearn.decomposition import FactorAnalysis
transformer = FactorAnalysis(n_components=k, random_state=random_state)
X_transformed = transformer.fit_transform(adata.X.A)

In [None]:
for i in range(k):
    adata.obs[f"nsfh_{i+1}"] = nsfh.iloc[:,i]
for i in range(k):
    adata.obs[f"nsf_{i+1}"] = nsf.iloc[:,i]
for i in range(k):
    adata.obs[f"mefisto_{i+1}"] = mefisto.iloc[:,i]
for i in range(k):
    adata.obs[f"FA_{i+1}"] = X_transformed[:,i]
for i in range(k):
    adata.obs[f"pnmf_{i+1}"] = pnmf.iloc[:,i]

In [None]:
from re import sub
import matplotlib as mpl
sel_clust = ['mefisto_'+str(i+1) for i in range(k)]
with mpl.rc_context({'figure.figsize': (5, 6), 'axes.facecolor': 'black'}):
    sc.pl.spatial(adata,
                cmap='magma',
                color=sel_clust,
                ncols=3,
                size=1, img_key='hires',
                alpha_img=0
                 )

In [None]:
from re import sub
import matplotlib as mpl
sel_clust = ['nsfh_'+str(i+1) for i in range(k)]
with mpl.rc_context({'figure.figsize': (5, 6), 'axes.facecolor': 'black'}):
    sc.pl.spatial(adata,
                cmap='magma',
                color=sel_clust,
                ncols=3,
                size=1, img_key='hires', 
                alpha_img=0
                 )

In [None]:
from re import sub
import matplotlib as mpl
sel_clust = ['FA_'+str(i+1) for i in range(k)]
with mpl.rc_context({'figure.figsize': (5, 6), 'axes.facecolor': 'black'}):
    sc.pl.spatial(adata,
                cmap='magma',
                color=sel_clust,
                ncols=3,
                size=1, img_key='hires', 
                alpha_img=0
                 )

In [None]:
from re import sub
import matplotlib as mpl
sel_clust = ['pnmf_'+str(i+1) for i in range(k)]
with mpl.rc_context({'figure.figsize': (5, 6), 'axes.facecolor': 'black'}):
    sc.pl.spatial(adata,
                cmap='magma',
                color=sel_clust,
                ncols=3,
                size=1, img_key='hires', 
                alpha_img=0
                 )

In [None]:
from re import sub
import matplotlib as mpl
sel_clust = ['nsf_'+str(i+1) for i in range(k)]
with mpl.rc_context({'figure.figsize': (5, 6), 'axes.facecolor': 'black'}):
    sc.pl.spatial(adata,
                cmap='magma',
                color=sel_clust,
                ncols=3,
                size=1, img_key='hires', 
                alpha_img=0
                 )

### extract gene loading matrix

In [None]:
l = cp.get_genes_topic(adata,nmf_model.phi_expr)

In [None]:
# adata.write('prostate_reproduce_14Factors_paper.h5ad')

In [None]:
# adata_l=sc.AnnData(l)

In [None]:
# adata_l.write('prostate_loadings_reproduce_14Factors_papers.h5ad')

In [None]:
adata.obsm['nsf'] = adata.obs.loc[:,adata.obs.columns.str.contains('nsf_')]
sc.pp.neighbors(adata,n_neighbors=90,random_state=random_state,use_rep='nsf',key_added='nsf')
sc.tl.leiden(adata,resolution=0.35,random_state=random_state,neighbors_key='nsf',key_added = 'nsf')
             
adata.obsm['nsfh'] = adata.obs.loc[:,adata.obs.columns.str.contains('nsfh_')]
sc.pp.neighbors(adata,n_neighbors=90,random_state=random_state,use_rep='nsfh',key_added='nsfh')
sc.tl.leiden(adata,resolution=0.35,random_state=random_state,neighbors_key='nsfh',key_added = 'nsfh')

adata.obsm['mefisto'] = adata.obs.loc[:,adata.obs.columns.str.contains('mefisto_')]
sc.pp.neighbors(adata,n_neighbors=90,random_state=random_state,use_rep='mefisto',key_added='mefisto')
sc.tl.leiden(adata,random_state=random_state,resolution=0.33,neighbors_key='mefisto',key_added = 'mefisto')
                   
adata.obsm['fa'] = adata.obs.loc[:,adata.obs.columns.str.contains('FA_')]
sc.pp.neighbors(adata,n_neighbors=90,random_state=random_state,use_rep='fa',key_added='fa')
sc.tl.leiden(adata,random_state=random_state,neighbors_key='fa',resolution=0.38,key_added = 'fa')
                                 
adata.obsm['pnmf'] = adata.obs.loc[:,adata.obs.columns.str.contains('pnmf_')]
sc.pp.neighbors(adata,n_neighbors=90,random_state=random_state,use_rep='pnmf',key_added='pnmf')
sc.tl.leiden(adata,resolution=0.39,random_state=random_state,neighbors_key='pnmf',key_added = 'pnmf')

adata.obsm['cellpie_0'] =nmf_model_0.theta
sc.pp.neighbors(adata,n_neighbors=90,random_state=random_state,use_rep='cellpie_0',key_added='cellpie_0')
sc.tl.leiden(adata,resolution=0.135,random_state=random_state,neighbors_key='cellpie_0',key_added = 'cellpie_0')

adata.obsm['nmf_cellpie'] = nmf_model.theta
sc.pp.neighbors(adata,n_neighbors=90,random_state=random_state,use_rep='nmf_cellpie',key_added='nmf_cellpie')
sc.tl.leiden(adata,resolution=0.35,random_state=random_state,neighbors_key='nmf_cellpie',key_added = 'CellPie')

In [None]:
sc.pl.spatial(adata,color=['Histology','CellPie','nsf','nsfh','mefisto','fa',
                           'pnmf','cellpie_0'],
              # save='invasive_prostate_new',
              size=1.4,ncols=4)

In [None]:
from sklearn import metrics
l_t = ['CellPie','nsf','nsfh','mefisto','fa','pnmf','cellpie_0']
res = []
for l_t in l_t:
    mut_info=metrics.fowlkes_mallows_score(adata.obs[l_t],adata.obs['Histology'])
    adj_rand=metrics.adjusted_rand_score(adata.obs[l_t],adata.obs['Histology'])
    adj_mut_info=metrics.adjusted_mutual_info_score(adata.obs[l_t],adata.obs['Histology'])
    res.append((l_t,mut_info,adj_rand,adj_mut_info))
res_df = pd.DataFrame(res, columns = ['Method','Mutual Info','Adjusted Rand Index','Adjusted Mutual Info'])

In [None]:
res_df

In [None]:
res_df

In [None]:
import seaborn as sns
plot=sns.barplot(data=res_df,x = res_df['Method'],y='Adjusted Rand Index',hue='Adjusted Rand Index',width=0.6, legend=False)
plot.set_xticklabels(plot.get_xticklabels(), fontsize=10)
plt.title('ARI-Leiden Clustering')
plt.grid(False)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(10, 8))
sns.heatmap(
    pd.crosstab(
        adata.obs['Histology'],adata.obs['CellPie'].values,
        normalize ='columns'
    ), 
    cmap=sns.color_palette("crest", as_cmap=True),
    annot = True
)
plt.grid(None)
plt.show()

In [None]:
with mpl.rc_context({'figure.figsize': (2, 3), 'axes.facecolor': 'black'}):
    sc.pl.spatial(adata,color=['EPCAM','MSMB'],size=1.4,alpha_img=0,cmap='magma')

In [None]:
import matplotlib as mpl
marker_genes_0 = l.T[7].sort_values(ascending=False).index[0:10].to_list()
for g in marker_genes_0:
    with mpl.rc_context({'figure.figsize': (2, 3), 'axes.facecolor': 'black'}):
        sc.pl.spatial(adata,color=[f'{g}'],size=1.4,alpha_img=0,cmap='magma')

In [None]:
import matplotlib as mpl
marker_genes_1 = l.T[0].sort_values(ascending=False).index[0:30].to_list()
for g in marker_genes_1:
    with mpl.rc_context({'figure.figsize': (2, 3), 'axes.facecolor': 'black'}):
        sc.pl.spatial(adata,color=[f'{g}'],size=1.4,alpha_img=0,cmap='magma')

In [None]:
import gseapy as gp
def gene_er(topic):
    enr = gp.enrichr(gene_list=topic,
                gene_sets=['GO_Biological_Process_2023'],cutoff = 0.6,
                organism='human', 
                outdir=None,
                )
    return enr

In [None]:
l=cp.get_genes_topic(adata,nmf_model.phi_expr)
enr0 = gene_er(l.T[0].sort_values(ascending=False).index[0:150].to_list())
enr1 = gene_er(l.T[1].sort_values(ascending=False).index[0:150].to_list())
enr2 = gene_er(l.T[2].sort_values(ascending=False).index[0:150].to_list())
enr3 = gene_er(l.T[3].sort_values(ascending=False).index[0:150].to_list())
enr4 = gene_er(l.T[4].sort_values(ascending=False).index[0:150].to_list())
enr5 = gene_er(l.T[5].sort_values(ascending=False).index[0:150].to_list())
enr6 = gene_er(l.T[6].sort_values(ascending=False).index[0:150].to_list())
enr7 = gene_er(l.T[7].sort_values(ascending=False).index[0:150].to_list())
enr8= gene_er(l.T[8].sort_values(ascending=False).index[0:150].to_list())
enr9 = gene_er(l.T[9].sort_values(ascending=False).index[0:150].to_list())
enr10 = gene_er(l.T[10].sort_values(ascending=False).index[0:150].to_list())
enr11 = gene_er(l.T[11].sort_values(ascending=False).index[0:150].to_list())
enr12 = gene_er(l.T[12].sort_values(ascending=False).index[0:150].to_list())
enr13 = gene_er(l.T[13].sort_values(ascending=False).index[0:150].to_list())

In [None]:
gp.barplot(enr0.res2d,title='GO_Biological_Process_2023 Factor 1',color=['darkred'])
plt.grid(None)

In [None]:
gp.barplot(enr1.res2d,title='GO_Biological_Process_2023 Factor 2',color=['darkred'])
plt.grid(None)

In [None]:
gp.barplot(enr2.res2d,title='GO_Biological_Process_2023 Factor 3',color=['darkred'])
plt.grid(None)

In [None]:
gp.barplot(enr3.res2d,title='GO_Biological_Process_2023 Factor 4',color=['darkred'])
plt.grid(None)

In [None]:
gp.barplot(enr4.res2d,title='GO_Biological_Process_2023 Factor 5',color=['darkred'])
plt.grid(None)

In [None]:
gp.barplot(enr5.res2d,title='GO_Biological_Process_2023 Factor 6',color=['darkred'])
plt.grid(None)

In [None]:
gp.barplot(enr6.res2d,title='GO_Biological_Process_2023 Factor 7',color=['darkred'])
plt.grid(None)

In [None]:
gp.barplot(enr7.res2d,title='GO_Biological_Process_2023 Factor 8',color=['darkred'])
plt.grid(None)

In [None]:
gp.barplot(enr8.res2d,title='GO_Biological_Process_2023 Factor 9',color=['darkred'])
plt.grid(None)

In [None]:
gp.barplot(enr9.res2d,title='GO_Biological_Process_2023 Factor 10',color=['darkred'])
plt.grid(None)

In [None]:
gp.barplot(enr10.res2d,title='GO_Biological_Process_2023 Factor 11',color=['darkred'])
plt.grid(None)

In [None]:
gp.barplot(enr11.res2d,title='GO_Biological_Process_2023 Factor 12',color=['darkred'])
plt.grid(None)

In [None]:
gp.barplot(enr12.res2d,title='GO_Biological_Process_2023 Factor 13',color=['darkred'])
plt.grid(None)

In [None]:
gp.barplot(enr13.res2d,title='GO_Biological_Process_2023 Factor 14',color=['darkred'])
plt.grid(None)