In [1]:
import scanpy as sc
import squidpy as sq
import numpy as np
import pandas as pd
from anndata import AnnData
import pathlib
import matplotlib.pyplot as plt
import matplotlib as mpl
import skimage
import seaborn as sns
import tangram as tg
import gc

import cosg

import scipy.stats as st

sc.logging.print_header()
print(f"squidpy=={sq.__version__}")

%load_ext autoreload
%autoreload 2
%matplotlib inline

scanpy==1.9.5 anndata==0.9.2 umap==0.5.4 numpy==1.22.0 scipy==1.10.1 pandas==1.5.0 scikit-learn==1.3.1 statsmodels==0.14.0 igraph==0.10.8 pynndescent==0.5.10
squidpy==1.2.3


In [2]:
import lightning

In [3]:
adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/tangram/data_smfish/scrnaseq_data.h5ad")
adata_st = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/tangram/data_smfish/spatial_data.h5ad")

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
def calcualte_pse_correlation(adata_sc, adata_st, celltype, p_value_threshold = 0.05, cor_threshold = 0.5):
    overlap_gene = overlap_gene = list(set(adata_sc.var_names).intersection(adata_st.var_names))
    adata_sc = adata_sc[:,overlap_gene]
    adata_st = adata_st[:,overlap_gene]
    
    cell_type_common = list(set(adata_sc.obs[celltype].unique()).intersection(adata_st.obs[celltype].unique()))
    
    pseudo_st = []
    pseudo_sc = []
    for i in cell_type_common:
        adata1 = adata_st[adata_st.obs[celltype] == i]
        adata2 = adata_sc[adata_sc.obs[celltype] == i]

        pseudo_st.append(np.mean(adata1.X.toarray(), axis = 0))
        pseudo_sc.append(np.mean(adata2.X.toarray(), axis = 0))
    
    pseudo_st = np.array(pseudo_st)
    pseudo_sc = np.array(pseudo_sc)

    cor_pearson = []
    cor_pvalue = []
    for i in range(pseudo_st.shape[1]):
        cor, pval = st.pearsonr(pseudo_st[:,i], pseudo_sc[:,i])
        cor_pearson.append(cor)
        cor_pvalue.append(pval)
        
    information_stat = pd.DataFrame()

    information_stat['pearson'] = cor_pearson
    information_stat['pvalue'] = cor_pvalue
    information_stat.index = adata_st.var_names

    information_stat_update = information_stat.loc[((information_stat['pvalue']<p_value_threshold) & (information_stat['pearson']>cor_threshold))]
    
    return information_stat_update.index

In [6]:
info_gene = calcualte_pse_correlation(adata_sc, adata_st, 'scClassify')

In [7]:
adata_sc = adata_sc[:,info_gene]
adata_st = adata_st[:,info_gene]

In [8]:
import random 
random.seed(2023)
overlap_gene = list(set(adata_st.var_names).intersection(adata_sc.var_names))
overlap_gene = sorted(overlap_gene)

In [9]:
train_g, test_g = train_test_split(overlap_gene, test_size=0.33, random_state=2023)

In [10]:
adata_st[:, test_g].X = 0

In [11]:
def tangram_batch_allsc(adata_st_gt, adata_sc, savepath = "./data_breastbatch/", celltype = "graph_cluster_anno",  
                  epoch = 500, filename = 'human_breast', batchsize = 10000, 
                        set_seed = 0, density_prior = 'uniform', train_gene = train_g, spatial_label = 'scClassify'):
    lightning.seed_everything(set_seed)
    markers = train_gene
    adata_st.obs[celltype] = list(adata_st.obs[spatial_label])
    for item in range(len(adata_st_gt)//batchsize + 1):
        end = item*batchsize + batchsize
        if end >= len(adata_st_gt):
            end = len(adata_st_gt)

        adata_st_imp = adata_st[item*batchsize:end,:]
        
        sample_adata_celltype = adata_st_imp.obs[celltype].unique()
        
        tg.pp_adatas(adata_sc, adata_st_imp, genes=markers, gene_to_lowercase = False)

        ad_map = tg.map_cells_to_space(adata_sc, adata_st_imp,
            mode="cells",
            density_prior=density_prior,
            num_epochs=epoch,
            device="cuda:0", 
            correlation = False       #     device='cpu',

        )

        gc.collect()
        #adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/deconvdatasets/spatial_dataset/xenium_brain/aibs_mouse_ctx-hpf_smartseq_sce.h5ad")
        ad_ge = tg.project_genes(adata_map=ad_map, adata_sc=adata_sc, gene_to_lowercase = False)
        gc.collect()
        print(epoch)
        ad_ge.write_h5ad(savepath + f"{filename}_data_batch_maskgene_raw_filter0.5_{epoch}_{item}_seed{set_seed}.h5ad")
    return True

In [15]:
for seed in range(0,10):

    tangram_batch_allsc(adata_st, adata_sc, epoch=500, savepath='./data_smfish/', 
                        celltype='scClassify', filename = 'smfish', spatial_label = 'scClassify',
                       set_seed = seed)

[rank: 0] Global seed set to 0


13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..
500


[rank: 0] Global seed set to 1


13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 2


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 3


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 4


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 5


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..
500


[rank: 0] Global seed set to 6


13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 7


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 8


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 9


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..
500


In [12]:
for seed in range(0,10):

    tangram_batch_allsc(adata_st, adata_sc, epoch=100, savepath='./data_smfish/', 
                        celltype='scClassify', filename = 'smfish', spatial_label = 'scClassify',
                       set_seed = seed)

[rank: 0] Global seed set to 0


13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 1


100
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 2


100
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 3


100
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 4


100
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 5


100
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 6


100
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 7


100
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 8


100
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 9


100
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Saving results..
100


In [13]:
for seed in range(0,10):

    tangram_batch_allsc(adata_st, adata_sc, epoch=200, savepath='./data_smfish/', 
                        celltype='scClassify', filename = 'smfish', spatial_label = 'scClassify',
                       set_seed = seed)

[rank: 0] Global seed set to 0


13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 1


200
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 2


200
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 3


200
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 4


200
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 5


200
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 6


200
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 7


200
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 8


200
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 9


200
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Saving results..
200


In [14]:
for seed in range(0,10):

    tangram_batch_allsc(adata_st, adata_sc, epoch=300, savepath='./data_smfish/', 
                        celltype='scClassify', filename = 'smfish', spatial_label = 'scClassify',
                       set_seed = seed)

[rank: 0] Global seed set to 0


13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 1


300
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 2


300
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 3


300
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 4


300
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 5


300
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 6


300
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 7


300
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 8


300
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 9


300
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Saving results..
300


In [15]:
for seed in range(0,10):

    tangram_batch_allsc(adata_st, adata_sc, epoch=400, savepath='./data_smfish/', 
                        celltype='scClassify', filename = 'smfish', spatial_label = 'scClassify',
                       set_seed = seed)

[rank: 0] Global seed set to 0


13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 1


400
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 2


400
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 3


400
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 4


400
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 5


400
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 6


400
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 7


400
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 8


400
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.484, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 9


400
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
13 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.485, KL reg: 0.000
Score: 0.997, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..
400


In [1]:
import scanpy as sc
import squidpy as sq
import numpy as np
import pandas as pd
from anndata import AnnData
import pathlib
import matplotlib.pyplot as plt
import matplotlib as mpl
import skimage
import seaborn as sns
import tangram as tg
import gc

import cosg

import scipy.stats as st

sc.logging.print_header()
print(f"squidpy=={sq.__version__}")

%load_ext autoreload
%autoreload 2
%matplotlib inline

scanpy==1.9.5 anndata==0.9.2 umap==0.5.4 numpy==1.22.0 scipy==1.10.1 pandas==1.5.0 scikit-learn==1.3.1 statsmodels==0.14.0 igraph==0.10.8 pynndescent==0.5.10
squidpy==1.2.3


In [2]:
import lightning

In [3]:
def calcualte_pse_correlation(adata_sc, adata_st, celltype, p_value_threshold = 0.05, cor_threshold = 0.5):
    overlap_gene = overlap_gene = list(set(adata_sc.var_names).intersection(adata_st.var_names))
    adata_sc = adata_sc[:,overlap_gene]
    adata_st = adata_st[:,overlap_gene]
    
    cell_type_common = list(set(adata_sc.obs[celltype].unique()).intersection(adata_st.obs[celltype].unique()))
    
    pseudo_st = []
    pseudo_sc = []
    for i in cell_type_common:
        adata1 = adata_st[adata_st.obs[celltype] == i]
        adata2 = adata_sc[adata_sc.obs[celltype] == i]

        pseudo_st.append(np.mean(adata1.X.toarray(), axis = 0))
        pseudo_sc.append(np.mean(adata2.X.toarray(), axis = 0))
    
    pseudo_st = np.array(pseudo_st)
    pseudo_sc = np.array(pseudo_sc)

    cor_pearson = []
    cor_pvalue = []
    for i in range(pseudo_st.shape[1]):
        cor, pval = st.pearsonr(pseudo_st[:,i], pseudo_sc[:,i])
        cor_pearson.append(cor)
        cor_pvalue.append(pval)
        
    information_stat = pd.DataFrame()

    information_stat['pearson'] = cor_pearson
    information_stat['pvalue'] = cor_pvalue
    information_stat.index = adata_st.var_names

    information_stat_update = information_stat.loc[((information_stat['pvalue']<p_value_threshold) & (information_stat['pearson']>cor_threshold))]
    
    return information_stat_update.index

In [4]:
adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/deconvdatasets/spatial_dataset/xenium_breast/sce_FFPE_full.h5ad")
adata_sc.obs['scClassify'] = adata_sc.obs['graph_cluster_anno'].copy()
adata_sc.var_names_make_unique()

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
def tangram_batch_allsc(adata_st_gt, adata_sc, savepath = "./data_breastbatch/", celltype = "graph_cluster_anno",  
                  epoch = 500, filename = 'human_breast', batchsize = 10000, 
                        set_seed = 0, density_prior = 'uniform', train_gene = None, spatial_label = 'scClassify'):
    markers = train_gene
    adata_st.obs[celltype] = list(adata_st.obs[spatial_label])
    for item in range(len(adata_st_gt)//batchsize + 1):
        end = item*batchsize + batchsize
        if end >= len(adata_st_gt):
            end = len(adata_st_gt)

        adata_st_imp = adata_st[item*batchsize:end,:]
        
        sample_adata_celltype = adata_st_imp.obs[celltype].unique()
        
        tg.pp_adatas(adata_sc, adata_st_imp, genes=markers, gene_to_lowercase = False)

        ad_map = tg.map_cells_to_space(adata_sc, adata_st_imp,
            mode="cells",
            density_prior=density_prior,
            num_epochs=epoch,
            device="cuda:0", 
            correlation = False       #     device='cpu',

        )

        gc.collect()
        #adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/deconvdatasets/spatial_dataset/xenium_brain/aibs_mouse_ctx-hpf_smartseq_sce.h5ad")
        ad_ge = tg.project_genes(adata_map=ad_map, adata_sc=adata_sc, gene_to_lowercase = False)
        gc.collect()
        print(epoch)
        ad_ge.write_h5ad(savepath + f"{filename}_data_batch_maskgene_raw_filter0.5_{epoch}_{item}_seed{set_seed}.h5ad")
    return True

In [22]:
for seed in range(0,10):
    lightning.seed_everything(0)
    adata_st = sc.read_h5ad(f"/gpfs/gibbs/pi/zhao/tl688/tangram/human_breast_simulation/spe_xenium_data_0.1_seed{seed}.h5ad")

    adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/deconvdatasets/spatial_dataset/xenium_breast/sce_FFPE_full.h5ad")
    adata_sc.obs['scClassify'] = adata_sc.obs['graph_cluster_anno'].copy()
    adata_sc.var_names_make_unique()

    info_gene = calcualte_pse_correlation(adata_sc, adata_st, 'scClassify')
    
    adata_sc = adata_sc[:,info_gene]
    adata_st = adata_st[:,info_gene]

    import random 
    random.seed(2023)
    overlap_gene = list(set(adata_st.var_names).intersection(adata_sc.var_names))
    overlap_gene = sorted(overlap_gene)

    train_g, test_g = train_test_split(overlap_gene, test_size=0.33, random_state=2023)
    adata_st[:, test_g].X = 0
    
    tangram_batch_allsc(adata_st, adata_sc, epoch=100, savepath='./data_breastseed/', 
                        celltype='scClassify', filename = 'humanbreast', spatial_label = 'scClassify',
                        batchsize = 20000,
                       set_seed = seed,
                       train_gene = train_g)

[rank: 0] Global seed set to 0


182 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
182 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 182 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.324, KL reg: 0.000
Saving results..
100


[rank: 0] Global seed set to 0


183 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
183 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 183 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.323, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
182 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
182 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 182 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.329, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
185 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
185 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 185 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.324, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
182 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
182 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 182 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.315, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
176 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
176 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 176 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.336, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
184 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
184 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 184 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.328, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
183 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
183 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 183 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.325, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
186 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
186 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 186 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.332, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
184 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
184 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 184 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.326, KL reg: 0.000
Saving results..
100


In [7]:
a = 1

In [8]:
for seed in range(0,10):
    lightning.seed_everything(0)
    adata_st = sc.read_h5ad(f"/gpfs/gibbs/pi/zhao/tl688/tangram/human_breast_simulation/spe_xenium_data_0.1_seed{seed}.h5ad")

    adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/deconvdatasets/spatial_dataset/xenium_breast/sce_FFPE_full.h5ad")
    adata_sc.obs['scClassify'] = adata_sc.obs['graph_cluster_anno'].copy()
    adata_sc.var_names_make_unique()

    info_gene = calcualte_pse_correlation(adata_sc, adata_st, 'scClassify')
    
    adata_sc = adata_sc[:,info_gene]
    adata_st = adata_st[:,info_gene]

    import random 
    random.seed(2023)
    overlap_gene = list(set(adata_st.var_names).intersection(adata_sc.var_names))
    overlap_gene = sorted(overlap_gene)

    train_g, test_g = train_test_split(overlap_gene, test_size=0.33, random_state=2023)
    adata_st[:, test_g].X = 0
    
    tangram_batch_allsc(adata_st, adata_sc, epoch=400, savepath='./data_breastseed/', 
                        celltype='scClassify', filename = 'humanbreast', spatial_label = 'scClassify',
                        batchsize = 20000,
                       set_seed = seed,
                       train_gene = train_g)

[rank: 0] Global seed set to 0


182 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
182 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 182 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.324, KL reg: 0.000
Score: 0.873, KL reg: 0.002
Score: 0.893, KL reg: 0.001
Score: 0.897, KL reg: 0.001
Saving results..
400


[rank: 0] Global seed set to 0


183 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
183 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 183 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.323, KL reg: 0.000
Score: 0.876, KL reg: 0.002
Score: 0.895, KL reg: 0.001
Score: 0.899, KL reg: 0.001
Saving results..
400


[rank: 0] Global seed set to 0


182 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
182 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 182 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.329, KL reg: 0.000
Score: 0.875, KL reg: 0.002
Score: 0.894, KL reg: 0.001
Score: 0.898, KL reg: 0.001
Saving results..


[rank: 0] Global seed set to 0


400
185 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
185 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 185 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.324, KL reg: 0.000
Score: 0.869, KL reg: 0.002
Score: 0.889, KL reg: 0.001
Score: 0.893, KL reg: 0.001
Saving results..


[rank: 0] Global seed set to 0


400
182 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
182 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 182 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.315, KL reg: 0.000
Score: 0.875, KL reg: 0.002
Score: 0.894, KL reg: 0.001
Score: 0.898, KL reg: 0.001
Saving results..


[rank: 0] Global seed set to 0


400
176 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
176 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 176 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.336, KL reg: 0.000
Score: 0.877, KL reg: 0.002
Score: 0.896, KL reg: 0.001
Score: 0.900, KL reg: 0.001
Saving results..


[rank: 0] Global seed set to 0


400
184 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
184 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 184 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.328, KL reg: 0.000
Score: 0.874, KL reg: 0.002
Score: 0.894, KL reg: 0.001
Score: 0.898, KL reg: 0.001
Saving results..


[rank: 0] Global seed set to 0


400
183 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
183 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 183 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.325, KL reg: 0.000
Score: 0.872, KL reg: 0.002
Score: 0.891, KL reg: 0.001
Score: 0.895, KL reg: 0.001
Saving results..


[rank: 0] Global seed set to 0


400
186 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
186 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 186 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.332, KL reg: 0.000
Score: 0.867, KL reg: 0.002
Score: 0.887, KL reg: 0.001
Score: 0.891, KL reg: 0.001
Saving results..


[rank: 0] Global seed set to 0


400
184 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
184 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 184 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.326, KL reg: 0.000
Score: 0.879, KL reg: 0.002
Score: 0.898, KL reg: 0.001
Score: 0.902, KL reg: 0.001
Saving results..
400


In [None]:
for seed in range(0,10):
    adata_list = []
    for i in range(0,2):
        path = f"/gpfs/gibbs/pi/zhao/tl688/tangram/data_brain/mousebrain_data_batch_maskgene_raw_filter0.5_100_{i}_seed{seed}.h5ad"
        adata_i = sc.read_h5ad(path)
        
        if len(adata_i.var_names) == 0:
            print(adata_i.obs['scClassify'].unique())
            continue
        adata_list.append(adata_i)
    adata = sc.concat(adata_list, join='outer')
    adata.write_h5ad(f"/gpfs/gibbs/pi/zhao/tl688/tangram/data_brain/mousebrain_data_filter0.5_100_seed{seed}.h5ad")

In [11]:
for seed in range(0,10):
    adata_list = []
    for i in range(0,2):
        path = f"/gpfs/gibbs/pi/zhao/tl688/tangram/data_brain/mousebrain_data_batch_maskgene_raw_filter0.5_400_{i}_seed{seed}.h5ad"
        adata_i = sc.read_h5ad(path)
        
        if len(adata_i.var_names) == 0:
            print(adata_i.obs['scClassify'].unique())
            continue
        adata_list.append(adata_i)
    adata = sc.concat(adata_list, join='outer')
    adata.write_h5ad(f"/gpfs/gibbs/pi/zhao/tl688/tangram/data_brain/mousebrain_data_filter0.5_400_seed{seed}.h5ad")

In [10]:
for seed in range(0,10):
    adata_list = []
    for i in range(0,2):
        path = f"/gpfs/gibbs/pi/zhao/tl688/tangram/data_brain/mousebrain_data_batch_maskgene_raw_filter0.5_100_{i}_seed{seed}.h5ad"
        adata_i = sc.read_h5ad(path)
        
        if len(adata_i.var_names) == 0:
            print(adata_i.obs['scClassify'].unique())
            continue
        adata_list.append(adata_i)
    adata = sc.concat(adata_list, join='outer')
    adata.write_h5ad(f"/gpfs/gibbs/pi/zhao/tl688/tangram/data_brain/mousebrain_data_filter0.5_100_seed{seed}.h5ad")

# Brain

In [3]:
import scanpy as sc
import squidpy as sq
import numpy as np
import pandas as pd
from anndata import AnnData
import pathlib
import matplotlib.pyplot as plt
import matplotlib as mpl
import skimage
import seaborn as sns
import tangram as tg
import gc

import cosg

import scipy.stats as st

sc.logging.print_header()
print(f"squidpy=={sq.__version__}")

%load_ext autoreload
%autoreload 2
%matplotlib inline

scanpy==1.9.5 anndata==0.9.2 umap==0.5.4 numpy==1.22.0 scipy==1.10.1 pandas==1.5.0 scikit-learn==1.3.1 statsmodels==0.14.0 igraph==0.10.8 pynndescent==0.5.10
squidpy==1.2.3
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
import lightning

In [5]:
def calcualte_pse_correlation(adata_sc, adata_st, celltype, p_value_threshold = 0.05, cor_threshold = 0.5):
    overlap_gene = overlap_gene = list(set(adata_sc.var_names).intersection(adata_st.var_names))
    adata_sc = adata_sc[:,overlap_gene]
    adata_st = adata_st[:,overlap_gene]
    
    cell_type_common = list(set(adata_sc.obs[celltype].unique()).intersection(adata_st.obs[celltype].unique()))
    
    pseudo_st = []
    pseudo_sc = []
    for i in cell_type_common:
        adata1 = adata_st[adata_st.obs[celltype] == i]
        adata2 = adata_sc[adata_sc.obs[celltype] == i]

        pseudo_st.append(np.mean(adata1.X.toarray(), axis = 0))
        pseudo_sc.append(np.mean(adata2.X.toarray(), axis = 0))
    
    pseudo_st = np.array(pseudo_st)
    pseudo_sc = np.array(pseudo_sc)

    cor_pearson = []
    cor_pvalue = []
    for i in range(pseudo_st.shape[1]):
        cor, pval = st.pearsonr(pseudo_st[:,i], pseudo_sc[:,i])
        cor_pearson.append(cor)
        cor_pvalue.append(pval)
        
    information_stat = pd.DataFrame()

    information_stat['pearson'] = cor_pearson
    information_stat['pvalue'] = cor_pvalue
    information_stat.index = adata_st.var_names

    information_stat_update = information_stat.loc[((information_stat['pvalue']<p_value_threshold) & (information_stat['pearson']>cor_threshold))]
    
    return information_stat_update.index

In [6]:
adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/deconvdatasets/spatial_dataset/xenium_breast/sce_FFPE_full.h5ad")
adata_sc.obs['scClassify'] = adata_sc.obs['graph_cluster_anno'].copy()
adata_sc.var_names_make_unique()

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
def tangram_batch_allsc(adata_st_gt, adata_sc, savepath = "./data_breastbatch/", celltype = "graph_cluster_anno",  
                  epoch = 500, filename = 'human_breast', batchsize = 10000, 
                        set_seed = 0, density_prior = 'uniform', train_gene = None, spatial_label = 'scClassify'):
    markers = train_gene
    adata_st.obs[celltype] = list(adata_st.obs[spatial_label])
    for item in range(len(adata_st_gt)//batchsize + 1):
        end = item*batchsize + batchsize
        if end >= len(adata_st_gt):
            end = len(adata_st_gt)

        adata_st_imp = adata_st[item*batchsize:end,:]
        
        sample_adata_celltype = adata_st_imp.obs[celltype].unique()
        
        tg.pp_adatas(adata_sc, adata_st_imp, genes=markers, gene_to_lowercase = False)

        ad_map = tg.map_cells_to_space(adata_sc, adata_st_imp,
            mode="cells",
            density_prior=density_prior,
            num_epochs=epoch,
            device="cuda:0", 
            correlation = False       #     device='cpu',

        )

        gc.collect()
        #adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/deconvdatasets/spatial_dataset/xenium_brain/aibs_mouse_ctx-hpf_smartseq_sce.h5ad")
        ad_ge = tg.project_genes(adata_map=ad_map, adata_sc=adata_sc, gene_to_lowercase = False)
        gc.collect()
        print(epoch)
        ad_ge.write_h5ad(savepath + f"{filename}_data_batch_maskgene_raw_filter0.5_{epoch}_{item}_seed{set_seed}.h5ad")
    return True

In [9]:
for seed in range(0,10):

    lightning.seed_everything(0)
    adata_st = sc.read_h5ad(f"/gpfs/gibbs/pi/zhao/tl688/tangram/data_brain/spe_xenium_data_0.1_seed{seed}.h5ad")

    adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/deconvdatasets/spatial_dataset/xenium_brain/aibs_mouse_ctx-hpf_smartseq_sce.h5ad")
    adata_sc.obs['scClassify'] = adata_sc.obs['cell_type_alias_label2'].copy()
    adata_sc.var_names_make_unique()

    info_gene = calcualte_pse_correlation(adata_sc, adata_st, 'scClassify')
    
    adata_sc = adata_sc[:,info_gene]
    adata_st = adata_st[:,info_gene]

    import random 
    random.seed(2023)
    overlap_gene = list(set(adata_st.var_names).intersection(adata_sc.var_names))
    overlap_gene = sorted(overlap_gene)

    train_g, test_g = train_test_split(overlap_gene, test_size=0.33, random_state=2023)
    adata_st[:, test_g].X = 0
    
    tangram_batch_allsc(adata_st, adata_sc, epoch=100, savepath='./data_brain/', 
                        celltype='scClassify', filename = 'mousebrain', spatial_label = 'scClassify',
                        batchsize = 10000,
                       set_seed = seed,
                       train_gene = train_g)

[rank: 0] Global seed set to 0


104 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
104 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 104 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.360, KL reg: 0.000
Saving results..
100
104 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
104 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.


[rank: 0] Global seed set to 0


100
109 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
109 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 109 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.366, KL reg: 0.000
Saving results..
100
109 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
109 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Annda

[rank: 0] Global seed set to 0


100
106 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
106 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 106 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.363, KL reg: 0.000
Saving results..
100
106 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
106 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Annda

[rank: 0] Global seed set to 0


100
107 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
107 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 107 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.372, KL reg: 0.000
Saving results..
100
107 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
107 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Annda

[rank: 0] Global seed set to 0


100
101 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
101 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 101 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.350, KL reg: 0.000
Saving results..
100
101 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
101 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Annda

[rank: 0] Global seed set to 0


100
108 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
108 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 108 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.384, KL reg: 0.000
Saving results..
100
108 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
108 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Annda

[rank: 0] Global seed set to 0


100
113 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
113 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 113 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.373, KL reg: 0.000
Saving results..
100
113 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
113 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Annda

[rank: 0] Global seed set to 0


100
109 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
109 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 109 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.367, KL reg: 0.000
Saving results..
100
109 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
109 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Annda

[rank: 0] Global seed set to 0


100
111 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
111 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 111 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.372, KL reg: 0.000
Saving results..
100
111 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
111 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Annda

[rank: 0] Global seed set to 0


100
109 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
109 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 109 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.367, KL reg: 0.000
Saving results..
100
109 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
109 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Annda

In [None]:
for seed in range(0,10):

    lightning.seed_everything(0)
    adata_st = sc.read_h5ad(f"/gpfs/gibbs/pi/zhao/tl688/tangram/data_brain/spe_xenium_data_0.1_seed{seed}.h5ad")

    adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/deconvdatasets/spatial_dataset/xenium_brain/aibs_mouse_ctx-hpf_smartseq_sce.h5ad")
    adata_sc.obs['scClassify'] = adata_sc.obs['cell_type_alias_label2'].copy()
    adata_sc.var_names_make_unique()

    info_gene = calcualte_pse_correlation(adata_sc, adata_st, 'scClassify')
    
    adata_sc = adata_sc[:,info_gene]
    adata_st = adata_st[:,info_gene]

    import random 
    random.seed(2023)
    overlap_gene = list(set(adata_st.var_names).intersection(adata_sc.var_names))
    overlap_gene = sorted(overlap_gene)

    train_g, test_g = train_test_split(overlap_gene, test_size=0.33, random_state=2023)
    adata_st[:, test_g].X = 0
    
    tangram_batch_allsc(adata_st, adata_sc, epoch=400, savepath='./data_brain/', 
                        celltype='scClassify', filename = 'mousebrain', spatial_label = 'scClassify',
                        batchsize = 10000,
                       set_seed = seed,
                       train_gene = train_g)

[rank: 0] Global seed set to 0


104 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
104 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 104 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.360, KL reg: 0.000
Score: 0.902, KL reg: 0.008
Score: 0.924, KL reg: 0.004
Score: 0.927, KL reg: 0.003
Saving results..
400
104 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
104 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prio

[rank: 0] Global seed set to 0


109 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
109 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 109 genes and uniform density_prior in cells mode...


# tangram normalized

In [16]:
import scanpy as sc
import squidpy as sq
import numpy as np
import pandas as pd
from anndata import AnnData
import pathlib
import matplotlib.pyplot as plt
import matplotlib as mpl
import skimage
import seaborn as sns
import tangram as tg
import gc

import cosg

import scipy.stats as st

sc.logging.print_header()
print(f"squidpy=={sq.__version__}")

%load_ext autoreload
%autoreload 2
%matplotlib inline

scanpy==1.9.5 anndata==0.9.2 umap==0.5.4 numpy==1.22.0 scipy==1.10.1 pandas==2.0.3 scikit-learn==1.3.1 statsmodels==0.14.0 igraph==0.10.8 pynndescent==0.5.10
squidpy==1.2.3
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
import lightning

In [26]:
adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/tangram/data_smfish/scrnaseq_data.h5ad")
adata_st = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/tangram/data_smfish/spatial_data.h5ad")

In [27]:
from sklearn.model_selection import train_test_split

In [28]:
def calcualte_pse_correlation(adata_sc, adata_st, celltype, p_value_threshold = 0.05, cor_threshold = 0.5):
    overlap_gene = overlap_gene = list(set(adata_sc.var_names).intersection(adata_st.var_names))
    adata_sc = adata_sc[:,overlap_gene]
    adata_st = adata_st[:,overlap_gene]
    
    cell_type_common = list(set(adata_sc.obs[celltype].unique()).intersection(adata_st.obs[celltype].unique()))
    
    pseudo_st = []
    pseudo_sc = []
    for i in cell_type_common:
        adata1 = adata_st[adata_st.obs[celltype] == i]
        adata2 = adata_sc[adata_sc.obs[celltype] == i]

        pseudo_st.append(np.mean(adata1.X.toarray(), axis = 0))
        pseudo_sc.append(np.mean(adata2.X.toarray(), axis = 0))
    
    pseudo_st = np.array(pseudo_st)
    pseudo_sc = np.array(pseudo_sc)

    cor_pearson = []
    cor_pvalue = []
    for i in range(pseudo_st.shape[1]):
        cor, pval = st.pearsonr(pseudo_st[:,i], pseudo_sc[:,i])
        cor_pearson.append(cor)
        cor_pvalue.append(pval)
        
    information_stat = pd.DataFrame()

    information_stat['pearson'] = cor_pearson
    information_stat['pvalue'] = cor_pvalue
    information_stat.index = adata_st.var_names

    information_stat_update = information_stat.loc[((information_stat['pvalue']<p_value_threshold) & (information_stat['pearson']>cor_threshold))]
    
    return information_stat_update.index

In [29]:
info_gene = calcualte_pse_correlation(adata_sc, adata_st, 'scClassify')

In [30]:
adata_sc = adata_sc[:,info_gene]
adata_st = adata_st[:,info_gene]

In [31]:
import random 
random.seed(2023)
overlap_gene = list(set(adata_st.var_names).intersection(adata_sc.var_names))
overlap_gene = sorted(overlap_gene)

In [32]:
train_g, test_g = train_test_split(overlap_gene, test_size=0.33, random_state=2023)

In [33]:
# adata_st[:, test_g].X = 0

In [34]:
sc.pp.normalize_total(adata_sc)
sc.pp.log1p(adata_sc)

sc.pp.normalize_total(adata_st)
sc.pp.log1p(adata_st)

In [35]:
def tangram_batch_allsc(adata_st_gt, adata_sc, savepath = "./data_breastbatch/", celltype = "graph_cluster_anno",  
                  epoch = 500, filename = 'human_breast', batchsize = 10000, 
                        set_seed = 0, density_prior = 'uniform', train_gene = train_g, spatial_label = 'scClassify'):
    lightning.seed_everything(set_seed)
    markers = train_gene
    adata_st.obs[celltype] = list(adata_st.obs[spatial_label])
    for item in range(len(adata_st_gt)//batchsize + 1):
        end = item*batchsize + batchsize
        if end >= len(adata_st_gt):
            end = len(adata_st_gt)

        adata_st_imp = adata_st[item*batchsize:end,:]
        
        sample_adata_celltype = adata_st_imp.obs[celltype].unique()
        
        tg.pp_adatas(adata_sc, adata_st_imp, genes=markers, gene_to_lowercase = False)

        ad_map = tg.map_cells_to_space(adata_sc, adata_st_imp,
            mode="cells",
            density_prior=density_prior,
            num_epochs=epoch,
            device="cuda:0", 
            correlation = False       #     device='cpu',

        )

        gc.collect()
        #adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/deconvdatasets/spatial_dataset/xenium_brain/aibs_mouse_ctx-hpf_smartseq_sce.h5ad")
        ad_ge = tg.project_genes(adata_map=ad_map, adata_sc=adata_sc, gene_to_lowercase = False)
        gc.collect()
        print(epoch)
        ad_ge.write_h5ad(savepath + f"{filename}_data_batch_maskgene_raw_filter0.5_normalized_{epoch}_{item}_seed{set_seed}.h5ad")
    return True

In [36]:
for seed in range(0,10):

    tangram_batch_allsc(adata_st, adata_sc, epoch=500, savepath='./data_smfish/', 
                        celltype='scClassify', filename = 'smfish', spatial_label = 'scClassify',
                       set_seed = seed)

[rank: 0] Global seed set to 0


13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
20 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.766, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 1


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
20 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.766, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 2


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
20 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.766, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 3


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
20 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.766, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 4


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
20 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.766, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 5


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
20 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.766, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 6


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
20 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.766, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 7


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
20 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.766, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 8


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
20 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.766, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 9


500
13 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
20 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 13 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.766, KL reg: 0.000
Score: 0.998, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Score: 0.999, KL reg: 0.000
Saving results..
500


# seqFISH

In [1]:
import scanpy as sc
import squidpy as sq
import numpy as np
import pandas as pd
from anndata import AnnData
import pathlib
import matplotlib.pyplot as plt
import matplotlib as mpl
import skimage
import seaborn as sns
import tangram as tg
import gc

import cosg

import scipy.stats as st

sc.logging.print_header()
print(f"squidpy=={sq.__version__}")

%load_ext autoreload
%autoreload 2
%matplotlib inline

scanpy==1.9.5 anndata==0.9.2 umap==0.5.4 numpy==1.22.0 scipy==1.10.1 pandas==1.5.0 scikit-learn==1.3.1 statsmodels==0.14.0 igraph==0.10.8 pynndescent==0.5.10
squidpy==1.2.3


In [2]:
import lightning

In [3]:
def calcualte_pse_correlation(adata_sc, adata_st, celltype, p_value_threshold = 0.05, cor_threshold = 0.5):
    overlap_gene = overlap_gene = list(set(adata_sc.var_names).intersection(adata_st.var_names))
    adata_sc = adata_sc[:,overlap_gene]
    adata_st = adata_st[:,overlap_gene]
    
    cell_type_common = list(set(adata_sc.obs[celltype].unique()).intersection(adata_st.obs[celltype].unique()))
    
    pseudo_st = []
    pseudo_sc = []
    for i in cell_type_common:
        adata1 = adata_st[adata_st.obs[celltype] == i]
        adata2 = adata_sc[adata_sc.obs[celltype] == i]

        pseudo_st.append(np.mean(adata1.X.toarray(), axis = 0))
        pseudo_sc.append(np.mean(adata2.X.toarray(), axis = 0))
    
    pseudo_st = np.array(pseudo_st)
    pseudo_sc = np.array(pseudo_sc)

    cor_pearson = []
    cor_pvalue = []
    for i in range(pseudo_st.shape[1]):
        cor, pval = st.pearsonr(pseudo_st[:,i], pseudo_sc[:,i])
        cor_pearson.append(cor)
        cor_pvalue.append(pval)
        
    information_stat = pd.DataFrame()

    information_stat['pearson'] = cor_pearson
    information_stat['pvalue'] = cor_pvalue
    information_stat.index = adata_st.var_names

    information_stat_update = information_stat.loc[((information_stat['pvalue']<p_value_threshold) & (information_stat['pearson']>cor_threshold))]
    
    return information_stat_update.index

In [4]:
adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/seqfishdata/data/scRNAseq/seqfish/scRNAseq_seqfish.h5ad")
adata_sc.var_names_make_unique()
adata_sc.obs['scClassify'] = adata_sc.obs['celltype'].copy() 

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
def tangram_batch_allsc(adata_st_gt, adata_sc, savepath = "./data_breastbatch/", celltype = "graph_cluster_anno",  
                  epoch = 500, filename = 'human_breast', batchsize = 10000, 
                        set_seed = 0, density_prior = 'uniform', train_gene = None, spatial_label = 'scClassify'):
    markers = train_gene
    adata_st.obs[celltype] = list(adata_st.obs[spatial_label])
    for item in range(len(adata_st_gt)//batchsize + 1):
        end = item*batchsize + batchsize
        if end >= len(adata_st_gt):
            end = len(adata_st_gt)

        adata_st_imp = adata_st[item*batchsize:end,:]
        
        sample_adata_celltype = adata_st_imp.obs[celltype].unique()
        
        tg.pp_adatas(adata_sc, adata_st_imp, genes=markers, gene_to_lowercase = False)

        ad_map = tg.map_cells_to_space(adata_sc, adata_st_imp,
            mode="cells",
            density_prior=density_prior,
            num_epochs=epoch,
            device="cuda:0", 
            correlation = False       #     device='cpu',

        )

        gc.collect()
        #adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/deconvdatasets/spatial_dataset/xenium_brain/aibs_mouse_ctx-hpf_smartseq_sce.h5ad")
        ad_ge = tg.project_genes(adata_map=ad_map, adata_sc=adata_sc, gene_to_lowercase = False)
        gc.collect()
        print(epoch)
        ad_ge.write_h5ad(savepath + f"{filename}_data_batch_maskgene_raw_filter0.5_{epoch}_{item}_seed{set_seed}.h5ad")
    return True

In [7]:
for seed in range(0,10):
    lightning.seed_everything(0)
    adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/seqfishdata/data/scRNAseq/seqfish/scRNAseq_seqfish.h5ad")
    adata_st = sc.read_h5ad(f"/gpfs/gibbs/pi/zhao/tl688/tangram/data_seqfish/seqfish_data_seed{seed}.h5ad")
    adata_sc.var_names_make_unique()
    
    adata_sc.obs['scClassify'] = adata_sc.obs['celltype'].copy() 
    adata_st.obs['scClassify'] = adata_st.obs['celltype_mapped_refined'].copy() 

    info_gene = calcualte_pse_correlation(adata_sc, adata_st, 'scClassify')
    
    adata_sc = adata_sc[:,info_gene]
    adata_st = adata_st[:,info_gene]

    import random 
    random.seed(2023)
    overlap_gene = list(set(adata_st.var_names).intersection(adata_sc.var_names))
    overlap_gene = sorted(overlap_gene)

    train_g, test_g = train_test_split(overlap_gene, test_size=0.33, random_state=2023)
    adata_st[:, test_g].X = 0
    
    tangram_batch_allsc(adata_st, adata_sc, epoch=100, savepath='./data_seqfish/', 
                        celltype='scClassify', filename = 'seqfish', spatial_label = 'scClassify',
                        batchsize = 20000,
                       set_seed = seed,
                       train_gene = train_g)

[rank: 0] Global seed set to 0


115 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
115 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 115 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.391, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
113 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
113 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 113 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.393, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
111 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
111 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 111 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.383, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
108 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
108 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 108 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.391, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
110 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
110 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 110 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.384, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
116 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
116 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 116 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.399, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
110 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
110 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 110 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.382, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
105 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
105 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 105 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.392, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
106 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
106 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 106 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.383, KL reg: 0.000
Saving results..


[rank: 0] Global seed set to 0


100
114 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
114 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 114 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.397, KL reg: 0.000
Saving results..
100


In [8]:
for seed in range(0,10):
    lightning.seed_everything(0)
    adata_sc = sc.read_h5ad("/gpfs/gibbs/pi/zhao/tl688/seqfishdata/data/scRNAseq/seqfish/scRNAseq_seqfish.h5ad")
    adata_st = sc.read_h5ad(f"/gpfs/gibbs/pi/zhao/tl688/tangram/data_seqfish/seqfish_data_seed{seed}.h5ad")
    adata_sc.var_names_make_unique()
    
    adata_sc.obs['scClassify'] = adata_sc.obs['celltype'].copy() 
    adata_st.obs['scClassify'] = adata_st.obs['celltype_mapped_refined'].copy() 

    info_gene = calcualte_pse_correlation(adata_sc, adata_st, 'scClassify')
    
    adata_sc = adata_sc[:,info_gene]
    adata_st = adata_st[:,info_gene]

    import random 
    random.seed(2023)
    overlap_gene = list(set(adata_st.var_names).intersection(adata_sc.var_names))
    overlap_gene = sorted(overlap_gene)

    train_g, test_g = train_test_split(overlap_gene, test_size=0.33, random_state=2023)
    adata_st[:, test_g].X = 0
    
    tangram_batch_allsc(adata_st, adata_sc, epoch=400, savepath='./data_seqfish/', 
                        celltype='scClassify', filename = 'seqfish', spatial_label = 'scClassify',
                        batchsize = 20000,
                       set_seed = seed,
                       train_gene = train_g)

[rank: 0] Global seed set to 0


115 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
115 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 115 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.391, KL reg: 0.000
Score: 0.916, KL reg: 0.005
Score: 0.927, KL reg: 0.004
Score: 0.930, KL reg: 0.004
Saving results..


[rank: 0] Global seed set to 0


400
113 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
113 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 113 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.393, KL reg: 0.000
Score: 0.914, KL reg: 0.006
Score: 0.926, KL reg: 0.005
Score: 0.928, KL reg: 0.005
Saving results..


[rank: 0] Global seed set to 0


400
111 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
111 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 111 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.383, KL reg: 0.000
Score: 0.917, KL reg: 0.005
Score: 0.928, KL reg: 0.004
Score: 0.931, KL reg: 0.004
Saving results..


[rank: 0] Global seed set to 0


400
108 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
108 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 108 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.391, KL reg: 0.000
Score: 0.921, KL reg: 0.006
Score: 0.932, KL reg: 0.005
Score: 0.935, KL reg: 0.004
Saving results..


[rank: 0] Global seed set to 0


400
110 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
110 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 110 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.384, KL reg: 0.000
Score: 0.920, KL reg: 0.006
Score: 0.932, KL reg: 0.005
Score: 0.934, KL reg: 0.004
Saving results..


[rank: 0] Global seed set to 0


400
116 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
116 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 116 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.399, KL reg: 0.000
Score: 0.913, KL reg: 0.006
Score: 0.925, KL reg: 0.005
Score: 0.927, KL reg: 0.004
Saving results..


[rank: 0] Global seed set to 0


400
110 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
110 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 110 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.382, KL reg: 0.000
Score: 0.916, KL reg: 0.005
Score: 0.928, KL reg: 0.004
Score: 0.930, KL reg: 0.004
Saving results..


[rank: 0] Global seed set to 0


400
105 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
105 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 105 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.392, KL reg: 0.000
Score: 0.922, KL reg: 0.005
Score: 0.933, KL reg: 0.004
Score: 0.935, KL reg: 0.004
Saving results..


[rank: 0] Global seed set to 0


400
106 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
106 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 106 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.383, KL reg: 0.000
Score: 0.931, KL reg: 0.004
Score: 0.942, KL reg: 0.004
Score: 0.944, KL reg: 0.003
Saving results..


[rank: 0] Global seed set to 0


400
114 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
114 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
Allocate tensors for mapping.
Begin training with 114 genes and uniform density_prior in cells mode...
Printing scores every 100 epochs.
Score: 0.397, KL reg: 0.000
Score: 0.915, KL reg: 0.005
Score: 0.927, KL reg: 0.004
Score: 0.929, KL reg: 0.004
Saving results..
400
