In [2]:
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from tqdm import tqdm


In [3]:
    import tangram as tg


In [4]:
path = "../data/raw/visium/"
visium = sc.read_visium(path, genome=None, count_file='CytAssist_FFPE_Human_Breast_Cancer_filtered_feature_bc_matrix.h5',
                        library_id=None, load_images=True, source_image_path=None)
visium.var_names_make_unique()

In [5]:
adata_seq = sc.read_h5ad("../data/interim/scrna.h5ad")

In [6]:
adata_seq.obs.drop(columns = ["celltype_major","celltype_minor"], inplace = True)

In [7]:
cell_types = pd.read_csv("../data/interim/cell_types.csv", index_col = 2, sep = ";")

In [8]:
adata_seq.obs = adata_seq.obs.join(cell_types[["celltype_major","celltype_minor"]])

In [9]:
adata_seq = adata_seq.raw.to_adata()

In [10]:
adata_seq.X.data = np.exp(adata_seq.X.data) - 1


In [11]:
adata_seq.X = adata_seq.X.multiply(adata_seq.obs.nCount_RNA.to_numpy()[:, np.newaxis]).tocsr()
adata_seq.X = np.round(adata_seq.X / 1e4)

In [12]:
adata_seq.obs.index = adata_seq.obs.index.str.replace(".", "-")

In [13]:
adata_seq.var_names_make_unique()


In [14]:
adata_seq

AnnData object with n_obs × n_vars = 29891 × 16969
    obs: 'barcode', 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'percent.MT', 'percent.Ribosomal', 'RNA_snn_res.0.5', 'seurat_clusters', 'celltype_major', 'celltype_minor'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable'
    uns: 'neighbors'
    obsm: 'X_pca', 'X_umap'
    obsp: 'distances'

In [15]:
    tg.pp_adatas(adata_seq, visium, genes=None)


INFO:root:16960 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:16960 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.


In [16]:
    ad_map = tg.map_cells_to_space(adata_seq, visium)


INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 16960 genes and rna_count_based density_prior in cells mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.441, KL reg: 0.449
Score: 0.725, KL reg: 0.001
Score: 0.744, KL reg: 0.001
Score: 0.751, KL reg: 0.000
Score: 0.755, KL reg: 0.000
Score: 0.757, KL reg: 0.000
Score: 0.758, KL reg: 0.000
Score: 0.759, KL reg: 0.000
Score: 0.760, KL reg: 0.000
Score: 0.761, KL reg: 0.000


INFO:root:Saving results..


In [17]:
    ad_ge = tg.project_genes(ad_map, adata_seq)


In [26]:
ad_ge

AnnData object with n_obs × n_vars = 4992 × 16969
    obs: 'in_tissue', 'array_row', 'array_col', 'uniform_density', 'rna_count_based_density'
    var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable', 'n_cells', 'sparsity', 'is_training'
    uns: 'neighbors', 'training_genes', 'overlap_genes'

In [34]:
cell2spot = ad_map.X

In [40]:
spot_ids = np.argmax(cell2spot, axis = 1)
spots = visium.obs_names[spot_ids]

In [41]:
spots

Index(['AGTTAGAAGGACGAAT-1', 'AAGCAGTGGCATCACT-1', 'TGGAAGCTCCATCGAC-1',
       'GTACAGTATCGGAATT-1', 'CGCACGCATAATGGTG-1', 'GCTGTTATGGTTGCCG-1',
       'TCGGCGCTATCACGTG-1', 'GGTCTTGATTCAACCT-1', 'TCGCCTGCACGGAAGG-1',
       'CGATGCCGACCGTGAC-1',
       ...
       'TGATCTTGCCTATTGT-1', 'GCGGTGGACATCAATC-1', 'TGGTGCGAATCCTGCT-1',
       'TCTGTCACCGTTAATA-1', 'GTACGAAGGCGTCTAT-1', 'GAGTCGAGCGTGCTTC-1',
       'AGCTTGGATGCAAGCC-1', 'ACAGTCACAATATTAG-1', 'TAATTGATTCTGTCGC-1',
       'GGACCATCACCGCCAA-1'],
      dtype='object', length=29891)

In [52]:
cts = list(adata_seq.obs.celltype_major.unique())

In [66]:
unique_spots = list(set(spots))

In [68]:
spot_composition = np.zeros((len(unique_spots), len(cts)))
for i, spot in enumerate(spots):
    ct = adata_seq.obs.iloc[i].celltype_major
    spot_composition[unique_spots.index(spot),cts.index(ct)] +=1


In [70]:
pd.DataFrame(spot_composition, index = unique_spots, columns = cts).to_csv("../data/interim/tangram.csv")

In [72]:
spot_composition = pd.DataFrame(spot_composition, index = unique_spots, columns = cts)

In [73]:
spot_composition

Unnamed: 0,Myeloid,Cancer Epithelial,PVL,Normal Epithelial,T.cells,CAFs,Endothelial,NaN,B.cells,Plasmablasts
CTCGTGTCTTGGCCGC-1,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
ATCTGCCGTCTACAGC-1,1.0,4.0,1.0,2.0,2.0,0.0,1.0,1.0,2.0,0.0
CGACTTCACTCGGCAT-1,6.0,0.0,0.0,0.0,3.0,2.0,1.0,0.0,0.0,0.0
AATGGCATAGTATCAT-1,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0
TCCTGATAAGTGCCGT-1,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,2.0
...,...,...,...,...,...,...,...,...,...,...
ATAGTCCGCCGTTGGC-1,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
ACCATTAGATCGTGGC-1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
AAGGTTCATGCGGTGC-1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
ATCCGTTATCGAAGGC-1,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
