Load required libraries. int_nmf_model must be in the same directory. If it is not it can be added to pythons path

In [None]:
import anndata as ad
import scanpy as sc
import numpy as np
import scipy
import sys
sys.path.append("../../CellPie/") # go to parent dir
import cellpie_main as cp
from cellpie_main import intNMF
from matplotlib import pyplot as plt
import scipy.io
import pandas as pd
from sklearn.cluster import KMeans
import seaborn as sns

In [None]:
import spatialdata as sd
from spatialdata_io import visium_hd

In [None]:
sc.set_figure_params(scanpy=True, dpi=250, dpi_save=200)

In [None]:
random_state=80

# CellPie

Download Visium HD CRC P1 data - https://www.10xgenomics.com/products/visium-hd-spatial-gene-expression/dataset-human-crc

In [None]:
%%sh
mkdir -p visium_hd_crc_p1
pushd visium_hd_crc_p1/
wget https://cf.10xgenomics.com/samples/spatial-exp/3.0.0/Visium_HD_Human_Colon_Cancer_P1/Visium_HD_Human_Colon_Cancer_P1_binned_outputs.tar.gz
wget https://cf.10xgenomics.com/samples/spatial-exp/3.0.0/Visium_HD_Human_Colon_Cancer_P1/Visium_HD_Human_Colon_Cancer_P1_spatial.tar.gz
wget https://cf.10xgenomics.com/samples/spatial-exp/3.0.0/Visium_HD_Human_Colon_Cancer_P1/Visium_HD_Human_Colon_Cancer_P1_molecule_info.h5
wget https://cf.10xgenomics.com/samples/spatial-exp/3.0.0/Visium_HD_Human_Colon_Cancer_P1/Visium_HD_Human_Colon_Cancer_P1_feature_slice.h5
tar -xzf Visium_HD_Human_Colon_Cancer_P1_binned_output.tar.gz
tar -xzf Visium_HD_Human_Colon_Cancer_P1_spatial.tar.gz
rm Visium_HD_Human_Colon_Cancer_P1_binned_output.tar.gz
rm Visium_HD_Human_Colon_Cancer_P1_spatial.tar.gz
popd

In [None]:
import spatialdata_io
sdata = spatialdata_io.visium_hd('visium_hd_crc_p1/')

In [None]:
from spatialdata_io.experimental import from_legacy_anndata, to_legacy_anndata
adata = to_legacy_anndata(sdata, include_images=True,table_name='square_016um',coordinate_system='downscaled_hires')

In [None]:
image_feat = pd.read_csv('visium_hd_human_colorectal_P1_features.csv',index_col=0)
image_feat.index = adata.obs_names
adata.obsm['features']=image_feat
adata.obsm['features'] = adata.obsm['features'].loc[:, (adata.obsm['features'] != 0).any(axis=0)]

In [None]:
adata.obsm['features']=np.log1p(adata.obsm['features'])

In [None]:
# from cp_utils import model_selection
# k = np.arange(1,251)
# mod_sel = model_selection(adata,k,random_state=random_state,reps=1)

In [None]:
k=80

In [None]:
%%time
nmf_model = intNMF(adata,k,epochs = 50, init = 'nndsvd',random_state=random_state,mod1_skew=1.5)
nmf_model.fit(adata)

In [None]:
# weight=1
from re import sub
import matplotlib as mpl
# sel_clust = ['Factor_'+str(i+1) for i in range(k)]
sel_clust = ['Factor_18','REG1A','Factor_9','TGFBI']
with mpl.rc_context({'figure.figsize': (10, 8), 'axes.facecolor': 'black'}):
    sc.pl.spatial(adata,
                cmap='magma',
                color=sel_clust,
                library_id = 'Visium_HD_Human_Colon_Cancer_P1_hires_image',
                ncols=2,
                size=3, img_key='hires', 
                alpha_img=0.2
                 )

In [None]:
# weight=0.8
from re import sub
import matplotlib as mpl
sel_clust = ['Factor_'+str(i+1) for i in range(k)]
with mpl.rc_context({'figure.figsize': (5, 6), 'axes.facecolor': 'black'}):
    sc.pl.spatial(adata,
                cmap='magma',
                color=sel_clust,
                library_id = 'Visium_HD_Human_Colon_Cancer_P1_hires_image',
                ncols=7,
                size=5, img_key='hires', 
                alpha_img=0.2
                 )

In [None]:
adata.var_names_make_unique()
reg = sc.get.obs_df(adata,keys='REG1A')
tgf = sc.get.obs_df(adata,keys='TGFBI')

In [None]:
time_16 = [3.58,39.33,63]
# time_008 = [5.4]
# cellpie, nmf,nsf with 3000 inducing points

In [None]:
sns.barplot(time_16,orient='h',width=0.4)

In [None]:
factors=adata.obs.iloc[:,5:(5+k)]
mi_scores_reg_15 = np.zeros((1,k))
for i in range(1):
    for j in range(k):
        mi_scores_reg_15[i,j] = np.corrcoef(reg,factors.iloc[:, j])[1][0]
        
mi_scores_tgf_15 = np.zeros((1,k))
for i in range(1):
    for j in range(k):
        mi_scores_tgf_15[i,j] = np.corrcoef(tgf,factors.iloc[:, j])[1][0]

In [None]:
plt.figure(figsize=(30,1))
plt.rcParams["axes.grid"] = False
sns.heatmap(mi_scores_reg_15, annot=True, cmap='viridis',
            xticklabels=factors.columns,annot_kws={"fontsize":10})
sns.set(font_scale=2)
plt.title('Pearson Correlation Between REG1As and CellPie Factors')
plt.xlabel('Factors')
plt.ylabel('REG1A')
plt.show()

In [None]:
plt.figure(figsize=(30, 1))
plt.rcParams["axes.grid"] = False
sns.heatmap(mi_scores_tgf_15, annot=True, cmap='viridis',
            xticklabels=factors.columns,annot_kws={"fontsize":10})
sns.set(font_scale=2)
plt.title('Pearson Correlation Between TGFBI and CellPie Factors')
plt.xlabel('Factors')
plt.ylabel('TGFBI')
plt.show()

In [None]:
mi_scores_reg_15 = np.corrcoef(reg,adata.obs['Factor_18'])[1][0]
mi_scores_tgf_15 = np.corrcoef(tgf,adata.obs['Factor_9'])[1][0]

In [None]:
%%time
nmf_model = intNMF(adata,k,epochs = 50, init = 'nndsvd',random_state=random_state,mod1_skew=1)
nmf_model.fit(adata)

In [None]:
mi_scores_reg_1 = np.corrcoef(reg,adata.obs['Factor_18'])[1][0]
mi_scores_tgf_1 = np.corrcoef(tgf,adata.obs['Factor_9'])[1][0]

In [None]:
%%time
nmf_model = intNMF(adata,k,epochs = 50, init = 'nndsvd',random_state=random_state,mod1_skew=0.8)
nmf_model.fit(adata)

In [None]:
mi_scores_reg_08 = np.corrcoef(reg,adata.obs['Factor_18'])[1][0]
mi_scores_tgf_08 = np.corrcoef(tgf,adata.obs['Factor_9'])[1][0]

In [None]:
%%time
nmf_model = intNMF(adata,k,epochs = 50, init = 'nndsvd',random_state=random_state,mod1_skew=1.8)
nmf_model.fit(adata)

In [None]:
mi_scores_reg_18 = np.corrcoef(reg,adata.obs['Factor_18'])[1][0]
mi_scores_tgf_18 = np.corrcoef(tgf,adata.obs['Factor_9'])[1][0]

In [None]:
%%time
nmf_model = intNMF(adata,k,epochs = 50, init = 'nndsvd',random_state=random_state,mod1_skew=2.0)
nmf_model.fit(adata)

In [None]:
mi_scores_reg_2 = np.corrcoef(reg,adata.obs['Factor_18'])[1][0]
mi_scores_tgf_2 = np.corrcoef(tgf,adata.obs['Factor_9'])[1][0]

In [None]:
scores_reg = [mi_scores_reg_08,mi_scores_reg_1,mi_scores_reg_15,mi_scores_reg_15,mi_scores_reg_2]
scores_tgf = [mi_scores_tgf_08,mi_scores_tgf_1,mi_scores_tgf_15,mi_scores_tgf_15,mi_scores_tgf_2]

In [None]:
scores_reg = np.array(scores_reg).reshape(1, -1)
scores_tgf = np.array(scores_tgf).reshape(1, -1)

In [None]:
plt.figure(figsize=(30, 5))
plt.rcParams["axes.grid"] = False
sns.heatmap(scores_reg, annot=True, cmap='viridis', annot_kws={"fontsize":50})
sns.set(font_scale=4)
plt.title('Pearson Correlation Between REG1As and CellPie Factors')
plt.xlabel('Factors')
plt.ylabel('REG1A')
plt.show()

In [None]:
plt.figure(figsize=(30, 5))
plt.rcParams["axes.grid"] = False
sns.heatmap(scores_tgf, annot=True, cmap='viridis',
 annot_kws={"fontsize":50})
sns.set(font_scale=4)
plt.title('Pearson Correlation Between TGFBI and CellPie Factors')
plt.xlabel('Factors')
plt.ylabel('TGFBI')
plt.show()

In [None]:
import gseapy as gp

In [None]:
def gene_er(topic):
    enr = gp.enrichr(gene_list=topic, # or "./tests/data/gene_list.txt",
                gene_sets=['MSigDB_Hallmark_2020'],
                organism='human', # don't forget to set organism to the one you desired! e.g. Yeast
                outdir=None, # don't write to disk
                )
    return enr

In [None]:
l=cp.get_genes_topic(adata,nmf_model.phi_expr)

In [None]:
enr9 = gene_er(l.T[8].sort_values(ascending=False).index[0:100].to_list())
enr18 = gene_er(l.T[17].sort_values(ascending=False).index[0:100].to_list())

In [None]:
gp.barplot(enr9.res2d,title='GO_Biological_Process_2023')

In [None]:
gp.barplot(enr18.res2d,title='GO_Biological_Process_2023')

In [None]:
%%time
from sklearn.decomposition import NMF
model = NMF(n_components=k, init='nndsvd', random_state=random_state)
W = model.fit_transform(adata.X.A)
H = model.components_