Load required libraries. int_nmf_model must be in the same directory. If it is not it can be added to pythons path

In [None]:
import anndata as ad
import scanpy as sc
import numpy as np
import scipy
import sys
sys.path.append("../../CellPie/") # go to parent dir
import cellpie_main as cp
from cellpie_main import intNMF
import anndata as ad
from matplotlib import pyplot as plt
import scipy.io
import pandas as pd
import squidpy as sq
from PIL import Image
from sklearn.cluster import KMeans
import seaborn as sns
import spatialdata as sd
import spatialdata_io
from spatialdata_io import visium_hd

In [None]:
sc.set_figure_params(scanpy=True, dpi=250, dpi_save=200)

In [None]:
random_state=123

# CellPie

In [None]:
%%sh
mkdir -p visium_hd_crc_p1
pushd visium_hd_crc_p1/
wget https://cf.10xgenomics.com/samples/spatial-exp/3.0.0/Visium_HD_Human_Colon_Cancer_P1/Visium_HD_Human_Colon_Cancer_P1_binned_outputs.tar.gz
wget https://cf.10xgenomics.com/samples/spatial-exp/3.0.0/Visium_HD_Human_Colon_Cancer_P1/Visium_HD_Human_Colon_Cancer_P1_spatial.tar.gz
wget https://cf.10xgenomics.com/samples/spatial-exp/3.0.0/Visium_HD_Human_Colon_Cancer_P1/Visium_HD_Human_Colon_Cancer_P1_molecule_info.h5
wget https://cf.10xgenomics.com/samples/spatial-exp/3.0.0/Visium_HD_Human_Colon_Cancer_P1/Visium_HD_Human_Colon_Cancer_P1_feature_slice.h5
tar -xzf Visium_HD_Human_Colon_Cancer_P1_binned_output.tar.gz
tar -xzf Visium_HD_Human_Colon_Cancer_P1_spatial.tar.gz
rm Visium_HD_Human_Colon_Cancer_P1_binned_output.tar.gz
rm Visium_HD_Human_Colon_Cancer_P1_spatial.tar.gz
popd

In [None]:
sdata = spatialdata_io.visium_hd('visium_hd_crc_p1/')

In [None]:
scale_range = [0.01,0.05,0.1,0.5,1,1.5,2,2.5,3]

In [None]:
from feature_extr import extract_features_visiumhd
features = extract_features_visiumhd(sdata,img_path='visium_hd_crc_p1/binned_outputs/square_016um/spatial/tissue_hires_image.png',
                           json_path = 'visium_hd_crc_p1/binned_outputs/square_016um/spatial/scalefactors_json.json',resolution='square_016um',
                                     spot_scale=scale_range,bins=100)

In [None]:
from spatialdata_io.experimental import from_legacy_anndata, to_legacy_anndata
adata = to_legacy_anndata(sdata, include_images=True,table_name='square_016um',coordinate_system='downscaled_hires')

In [None]:
from cp_utils import preprocess_data_visiumHD
preprocess_data_visiumHD(adata)

In [None]:
# # Model selection
# from cp_utils import model_selection_alpha
# k = np.arange(1,101)
# mod_sel = model_selection_alpha(adata,k,random_state=random_state,epochs = 20, init='random',mod1_skew=1)

In [None]:
k=80

In [None]:
%%time
nmf_model = intNMF(adata,k,epochs = 20, init = 'random',random_state=random_state,mod1_skew=0.9)
nmf_model.fit(adata)

In [None]:
adata.var_names_make_unique()
reg = sc.get.obs_df(adata,keys='REG1A')
tgf = sc.get.obs_df(adata,keys='TGFBI')

In [None]:
from re import sub
import matplotlib as mpl
# sel_clust = ['Factor_'+str(i+1) for i in range(k)]
sel_clust = ['Factor_34','REG1A','Factor_11','TGFBI']
with mpl.rc_context({'figure.figsize': (10, 8), 'axes.facecolor': 'black'}):
    sc.pl.spatial(adata,
                cmap='magma',
                color=sel_clust,
                library_id = 'Visium_HD_Human_Colon_Cancer_P1_hires_image',
                ncols=2,
                size=3, img_key='hires', 
                alpha_img=0.2)

In [None]:
# weight=0.8
from re import sub
import matplotlib as mpl
sel_clust = ['Factor_'+str(i+1) for i in range(k)]
with mpl.rc_context({'figure.figsize': (5, 6), 'axes.facecolor': 'black'}):
    sc.pl.spatial(adata,
                cmap='magma',
                color=sel_clust,
                library_id = 'Visium_HD_Human_Colon_Cancer_P1_hires_image',
                ncols=7,
                size=5, img_key='hires', 
                alpha_img=0.2
                 )

In [None]:
pd.DataFrame(mi_scores_reg_all).to_csv('REG1A.csv')
pd.DataFrame(mi_scores_tgf_all).to_csv('TGF.csv')

In [None]:
mi_scores_tgf_all.max(axis=1)

In [None]:
weights = np.arange(0.1, 2.01, 0.1)
plt.figure(figsize=(10, 6))

plt.plot(weights, mi_scores_reg_all.max(axis=1), marker='o', linestyle='-', color='b', label='Max REG1A Correlation')
plt.plot(weights, mi_scores_tgf_all.max(axis=1), marker='o', linestyle='--', color='r', label='Max TGF Correlation')

plt.title('Maximum Pearson Correlation Between Factors per Weight')
plt.xlabel('Weight (mod1_skew)')
plt.ylabel('Maximum Correlation')
plt.ylim(0.5, 1) 
plt.grid(True)
plt.legend()
plt.show()

In [None]:
import gseapy as gp

In [None]:
plt.rcParams["axes.grid"] = False

In [None]:
def gene_er(topic):
    enr = gp.enrichr(gene_list=topic, # or "./tests/data/gene_list.txt",
                gene_sets=['MSigDB_Hallmark_2020'],
                organism='human', # don't forget to set organism to the one you desired! e.g. Yeast
                outdir=None, # don't write to disk
                )
    return enr

In [None]:
l=cp.get_genes_topic(adata,nmf_model.phi_expr)

In [None]:
spp1_mac = ['CD52','FBP1','IL1RN','FN1','LPL','CHI3L1','MMP7','SPP1','CHIT1','TGFBI']
selenop_mac = ['SPP1','CSF1R','MPEG1','MS4A6A','FGL2','SLC40A1','IGKC','STAB1','SELENOP','REG1A','REG1B']

In [None]:
enr34 = gene_er(l.T[33].sort_values(ascending=False).index[0:150].to_list())
enr11 = gene_er(l.T[10].sort_values(ascending=False).index[0:150].to_list())

In [None]:
sc.set_figure_params(scanpy=True, dpi=250, dpi_save=300)

In [None]:
gp.barplot(enr34.res2d,title='GO_Biological_Process_2023_Factor_34',color=['darkred'])
plt.grid(False)

In [None]:
gp.barplot(enr11.res2d,title='GO_Biological_Process_2023_Factor_11',color=['darkred'])
plt.grid(False)