In [1]:
import novosparc as ns

import os
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import altair as alt
from scipy.spatial.distance import cdist, squareform, pdist
from scipy.stats import ks_2samp
from scipy.stats import pearsonr

import random
random.seed(0)

from skimage import io, color
import torch
from torch.nn import functional as F
import json

from sklearn.model_selection import train_test_split
from anndata import AnnData, read_h5ad
import matplotlib.pyplot as plt
import seaborn as sns

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_merfish_raw = pd.read_csv("data/datasets_mouse_brain_map_BrainReceptorShowcase_Slice1_Replicate1_cell_by_gene_S1R1.csv", index_col=0)   
meta_data = pd.read_csv("data/datasets_mouse_brain_map_BrainReceptorShowcase_Slice1_Replicate1_cell_metadata_S1R1.csv", index_col=0)

data_merfish = AnnData(data_merfish_raw)

data_merfish.obs['x_cord'] = meta_data['center_x'].tolist()
data_merfish.obs['y_cord'] = meta_data['center_y'].tolist()
data_merfish_raw = data_merfish.copy()

sc.pp.filter_cells(data_merfish, min_counts=500)
sc.pp.filter_cells(data_merfish, min_genes=100)

sc.pp.neighbors(data_merfish, n_neighbors=15, use_rep='X', random_state=1)
sc.tl.louvain(data_merfish, 0.4, random_state=1)

  data_merfish = AnnData(data_merfish_raw)


In [4]:
Qdata = data_merfish[data_merfish.obs['x_cord'] <= np.quantile(data_merfish.obs['x_cord'], 0.5)]
Rdata = data_merfish[data_merfish.obs['x_cord'] > np.quantile(data_merfish.obs['x_cord'], 0.5)]

In [5]:
## right brain as training, left brain as testing
datatrain = Rdata.copy()
datatest = Qdata.copy()

random.seed(2021)
torch.manual_seed(2021)
np.random.seed(2021)
## Running novosparc
locations_apriori = datatrain.obs[['x_cord', 'y_cord']].values
tissue = ns.cm.Tissue(dataset=datatest, locations=locations_apriori)
num_neighbors_s = num_neighbors_t = 5

# params for linear cost
atlas_genes = datatrain.var
markers = list(atlas_genes.index)
num_genes = len(markers)
atlas_matrix = datatrain.to_df().values
markers_idx = pd.DataFrame({'markers_idx': np.arange(num_genes)}, index=markers)
markers_to_use = np.concatenate(markers_idx.loc[markers].values)

In [6]:
tissue.setup_reconstruction(atlas_matrix=atlas_matrix, 
                        markers_to_use=markers_to_use, 
                        num_neighbors_s=num_neighbors_s, 
                        num_neighbors_t=num_neighbors_t)
        
tissue.reconstruct(alpha_linear=0.8, epsilon=5e-3)

location_pred = tissue.gw

Setting up for reconstruction ... done ( 384.1 seconds )
Reconstructing spatial information with 649 markers: 21578 cells and 21578 locations ... 
Trying with epsilon: 5.00e-03




In [7]:
location_sum = np.sum(location_pred, axis=1)
location_pred_copy = location_pred / location_sum.reshape(len(location_sum), 1)

pred_cord_transform = location_pred_copy.dot(np.array(Rdata.obs[['x_cord', 'y_cord']]))
pred_cord_transform[:, 0] = -pred_cord_transform[:, 0] + 2*np.quantile(data_merfish.obs['x_cord'], 0.5)
pred_cord_transform = pd.DataFrame(pred_cord_transform)
pred_cord_transform.index = Qdata.obs.index
pred_cord_transform.columns = ['x', 'y']

pred_cord_transform.to_csv("output/brain/novosparc_brain_left.csv")

In [None]:
## left brain as training, right brain as testing
datatrain = Rdata.copy()
datatest = Qdata.copy()

random.seed(2021)
torch.manual_seed(2021)
np.random.seed(2021)
## Running novosparc
locations_apriori = datatrain.obs[['x_cord', 'y_cord']].values
tissue = ns.cm.Tissue(dataset=datatest, locations=locations_apriori)
num_neighbors_s = num_neighbors_t = 5

# params for linear cost
atlas_genes = datatrain.var
markers = list(atlas_genes.index)
num_genes = len(markers)
atlas_matrix = datatrain.to_df().values
markers_idx = pd.DataFrame({'markers_idx': np.arange(num_genes)}, index=markers)
markers_to_use = np.concatenate(markers_idx.loc[markers].values)

In [None]:
tissue.setup_reconstruction(atlas_matrix=atlas_matrix, 
                        markers_to_use=markers_to_use, 
                        num_neighbors_s=num_neighbors_s, 
                        num_neighbors_t=num_neighbors_t)
        
tissue.reconstruct(alpha_linear=0.8, epsilon=5e-3)

location_pred = tissue.gw

In [None]:
location_sum = np.sum(location_pred, axis=1)
location_pred_copy = location_pred / location_sum.reshape(len(location_sum), 1)

pred_cord_transform = location_pred_copy.dot(np.array(Rdata.obs[['x_cord', 'y_cord']]))
pred_cord_transform[:, 0] = -pred_cord_transform[:, 0] + 2*np.quantile(data_merfish.obs['x_cord'], 0.5)

pred_cord_transform = pd.DataFrame(pred_cord_transform)
pred_cord_transform.index = Qdata.obs.index
pred_cord_transform.columns = ['x', 'y']

pred_cord_transform.to_csv("output/brain/novosparc_brain_right.csv")