In [1]:
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import torch

import tangram as tg

%load_ext autoreload
%autoreload 2
%matplotlib inline

tg.__version__

  from pandas.core import (


'1.0.4'

In [2]:
groups = ['4hours', '12hours', '2days', '14days', '6weeks']
ident_list = ['Hour4R', 'Hour12R', 'Day2R', 'Day14R', 'Week6R']

for idx, ident in enumerate(groups):
    print(ident)
    rna_ad = sc.read_h5ad(f"Visium_decov_sc_{ident}.h5ad")
    vis_ad = sc.read_h5ad(f"Visium_decov_sp_{ident}.h5ad")
    rna_ad.obs["celltype_new"] = rna_ad.obs["celltype_new"].replace('DTL-ATL','DTL')
    
    vis_ad.X = vis_ad.raw.X
    rna_ad.X = rna_ad.X.A
    
    celltype_key = 'celltype_new'
    sc.pp.normalize_total(rna_ad)
    celltype_counts = rna_ad.obs[celltype_key].value_counts()
    celltype_drop = celltype_counts.index[celltype_counts < 2]
    print(f'Drop celltype {list(celltype_drop)} contain less 2 sample')
    
    rna_ad = rna_ad[~rna_ad.obs[celltype_key].isin(celltype_drop),].copy()
    sc.tl.rank_genes_groups(rna_ad, groupby=celltype_key, use_raw=False)
    markers_df = pd.DataFrame(rna_ad.uns["rank_genes_groups"]["names"]).iloc[0:200, :]

    sc.pp.normalize_total(vis_ad)
    genes_sc = np.unique(markers_df.melt().value.values)

    genes_st = vis_ad.var_names.values
    genes = list(set(genes_sc).intersection(set(genes_st)))
    
    tg.pp_adatas(rna_ad, vis_ad, genes=genes)
    
    ad_map = tg.map_cells_to_space(
                   rna_ad,
                   vis_ad,
                   mode='clusters',
                   cluster_label=celltype_key)
    
    tg.project_cell_annotations(ad_map, vis_ad, annotation=celltype_key)
    celltype_density = vis_ad.obsm['tangram_ct_pred']
    celltype_density = (celltype_density.T/celltype_density.sum(axis=1)).T

    celltype_density.to_csv(f"Tangram_deconv_{ident_list[idx]}.csv")


4hours
Drop celltype [] contain less 2 sample


INFO:root:2120 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:16462 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 2120 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.453, KL reg: 0.083
Score: 0.574, KL reg: 0.001
Score: 0.575, KL reg: 0.001
Score: 0.575, KL reg: 0.001
Score: 0.575, KL reg: 0.001
Score: 0.575, KL reg: 0.001
Score: 0.575, KL reg: 0.001
Score: 0.575, KL reg: 0.001
Score: 0.575, KL reg: 0.001
Score: 0.575, KL reg: 0.001


INFO:root:Saving results..
INFO:root:spatial prediction dataframe is saved in `obsm` `tangram_ct_pred` of the spatial AnnData.


12hours
Drop celltype [] contain less 2 sample


INFO:root:2001 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:16848 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 2001 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.470, KL reg: 0.066
Score: 0.570, KL reg: 0.001
Score: 0.571, KL reg: 0.001
Score: 0.571, KL reg: 0.001
Score: 0.571, KL reg: 0.001
Score: 0.571, KL reg: 0.001
Score: 0.571, KL reg: 0.001
Score: 0.571, KL reg: 0.001
Score: 0.571, KL reg: 0.001
Score: 0.571, KL reg: 0.001


INFO:root:Saving results..
INFO:root:spatial prediction dataframe is saved in `obsm` `tangram_ct_pred` of the spatial AnnData.


2days
Drop celltype [] contain less 2 sample


INFO:root:2040 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:16971 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 2040 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.452, KL reg: 0.076
Score: 0.583, KL reg: 0.000
Score: 0.584, KL reg: 0.000
Score: 0.584, KL reg: 0.000
Score: 0.584, KL reg: 0.000
Score: 0.584, KL reg: 0.000
Score: 0.584, KL reg: 0.000
Score: 0.584, KL reg: 0.000
Score: 0.584, KL reg: 0.000
Score: 0.584, KL reg: 0.000


INFO:root:Saving results..
INFO:root:spatial prediction dataframe is saved in `obsm` `tangram_ct_pred` of the spatial AnnData.


14days
Drop celltype [] contain less 2 sample


INFO:root:2050 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:16813 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 2050 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.560, KL reg: 0.062
Score: 0.684, KL reg: 0.000
Score: 0.684, KL reg: 0.000
Score: 0.685, KL reg: 0.000
Score: 0.685, KL reg: 0.000
Score: 0.685, KL reg: 0.000
Score: 0.685, KL reg: 0.000
Score: 0.685, KL reg: 0.000
Score: 0.685, KL reg: 0.000
Score: 0.685, KL reg: 0.000


INFO:root:Saving results..
INFO:root:spatial prediction dataframe is saved in `obsm` `tangram_ct_pred` of the spatial AnnData.


6weeks
Drop celltype [] contain less 2 sample


INFO:root:2084 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:16926 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.
INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 2084 genes and rna_count_based density_prior in clusters mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.428, KL reg: 0.065
Score: 0.580, KL reg: 0.003
Score: 0.581, KL reg: 0.003
Score: 0.581, KL reg: 0.003
Score: 0.581, KL reg: 0.003
Score: 0.581, KL reg: 0.003
Score: 0.581, KL reg: 0.003
Score: 0.581, KL reg: 0.003
Score: 0.581, KL reg: 0.003
Score: 0.581, KL reg: 0.003


INFO:root:Saving results..
INFO:root:spatial prediction dataframe is saved in `obsm` `tangram_ct_pred` of the spatial AnnData.
