In [1]:
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import torch
import tangram as tg
from sklearn.model_selection import train_test_split
from anndata import AnnData, read_h5ad
import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_merfish_raw = pd.read_csv("data/datasets_mouse_brain_map_BrainReceptorShowcase_Slice1_Replicate1_cell_by_gene_S1R1.csv", index_col=0)   
meta_data = pd.read_csv("data/datasets_mouse_brain_map_BrainReceptorShowcase_Slice1_Replicate1_cell_metadata_S1R1.csv", index_col=0)

data_merfish = AnnData(data_merfish_raw)

data_merfish.obs['x_cord'] = meta_data['center_x'].tolist()
data_merfish.obs['y_cord'] = meta_data['center_y'].tolist()
data_merfish_raw = data_merfish.copy()

sc.pp.filter_cells(data_merfish, min_counts=500)
sc.pp.filter_cells(data_merfish, min_genes=100)

sc.pp.neighbors(data_merfish, n_neighbors=15, use_rep='X', random_state=1)
sc.tl.louvain(data_merfish, 0.4, random_state=1)

In [6]:
## right brain as training, left brain as testing
Qdata = data_merfish[data_merfish.obs['x_cord'] <= np.quantile(data_merfish.obs['x_cord'], 0.5)]
Rdata = data_merfish[data_merfish.obs['x_cord'] > np.quantile(data_merfish.obs['x_cord'], 0.5)]

In [7]:
tg.pp_adatas(Qdata, Rdata, genes=Rdata.var.index)

assert Qdata.uns['training_genes'] == Rdata.uns['training_genes']

INFO:root:649 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:649 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.


In [11]:
ad_map = tg.map_cells_to_space(
    adata_sc=Qdata,
    adata_sp=Rdata,
    device='cpu',
    # device='cuda:0',
)

INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 649 genes and rna_count_based density_prior in cells mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.260, KL reg: 0.043
Score: 0.822, KL reg: 0.002
Score: 0.835, KL reg: 0.002
Score: 0.838, KL reg: 0.002
Score: 0.840, KL reg: 0.002
Score: 0.841, KL reg: 0.002
Score: 0.841, KL reg: 0.002
Score: 0.842, KL reg: 0.002
Score: 0.842, KL reg: 0.002
Score: 0.842, KL reg: 0.002


INFO:root:Saving results..


In [12]:
Rdata_location_pred = ad_map.X.dot(np.array(Rdata.obs[['x_cord', 'y_cord']]))
Rdata_location_pred[:, 0] = -Rdata_location_pred[:, 0] + 2*np.quantile(data_merfish.obs['x_cord'], 0.5)
Rdata_location_pred = pd.DataFrame(Rdata_location_pred)
Rdata_location_pred.index = Qdata.obs.index
Rdata_location_pred.columns = ['x', 'y']

Rdata_location_pred.to_csv("output/brain/tangram_brain_left.csv")

In [20]:
## left brain as training, right brain as testing
Rdata = data_merfish[data_merfish.obs['x_cord'] <= np.quantile(data_merfish.obs['x_cord'], 0.5)]
Qdata = data_merfish[data_merfish.obs['x_cord'] > np.quantile(data_merfish.obs['x_cord'], 0.5)]

tg.pp_adatas(Qdata, Rdata, genes=Rdata.var.index)
assert Qdata.uns['training_genes'] == Rdata.uns['training_genes']

INFO:root:649 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:649 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.


In [21]:
ad_map = tg.map_cells_to_space(
    adata_sc=Qdata,
    adata_sp=Rdata,
    device='cpu',
    # device='cuda:0',
)

INFO:root:Allocate tensors for mapping.
INFO:root:Begin training with 649 genes and rna_count_based density_prior in cells mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.260, KL reg: 0.046
Score: 0.820, KL reg: 0.002
Score: 0.833, KL reg: 0.002
Score: 0.837, KL reg: 0.002
Score: 0.838, KL reg: 0.002
Score: 0.839, KL reg: 0.002
Score: 0.840, KL reg: 0.002
Score: 0.840, KL reg: 0.002
Score: 0.841, KL reg: 0.002
Score: 0.841, KL reg: 0.002


INFO:root:Saving results..


In [22]:
Rdata_location_pred = ad_map.X.dot(np.array(Rdata.obs[['x_cord', 'y_cord']]))
Rdata_location_pred[:, 0] = -Rdata_location_pred[:, 0] + 2*np.quantile(data_merfish.obs['x_cord'], 0.5)
Rdata_location_pred = pd.DataFrame(Rdata_location_pred)
Rdata_location_pred.index = Qdata.obs.index
Rdata_location_pred.columns = ['x', 'y']

Rdata_location_pred.to_csv("output/brain/tangram_brain_right.csv")