In [1]:
import os
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist, squareform, pdist
from scipy.stats import ks_2samp
from scipy.stats import pearsonr

import os,csv,re
import math
from skimage import io, color

from scipy.sparse import issparse
import random, torch
import warnings
warnings.filterwarnings("ignore")
import pickle
from sklearn.model_selection import train_test_split
from anndata import AnnData, read_h5ad
import seaborn as sns

import json

from spaotsc import SpaOTsc

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_merfish_raw = pd.read_csv("data/datasets_mouse_brain_map_BrainReceptorShowcase_Slice1_Replicate1_cell_by_gene_S1R1.csv", index_col=0)   
meta_data = pd.read_csv("data/datasets_mouse_brain_map_BrainReceptorShowcase_Slice1_Replicate1_cell_metadata_S1R1.csv", index_col=0)

data_merfish = AnnData(data_merfish_raw)

data_merfish.obs['x_cord'] = meta_data['center_x'].tolist()
data_merfish.obs['y_cord'] = meta_data['center_y'].tolist()
data_merfish_raw = data_merfish.copy()

sc.pp.filter_cells(data_merfish, min_counts=500)
sc.pp.filter_cells(data_merfish, min_genes=100)

sc.pp.neighbors(data_merfish, n_neighbors=15, use_rep='X', random_state=1)
sc.tl.louvain(data_merfish, 0.4, random_state=1)

In [6]:
Qdata = data_merfish[data_merfish.obs['x_cord'] <= np.quantile(data_merfish.obs['x_cord'], 0.5)]
Rdata = data_merfish[data_merfish.obs['x_cord'] > np.quantile(data_merfish.obs['x_cord'], 0.5)]

In [7]:
## right brain as training, left brain as testing
datatest = Qdata.copy()
datatrain = Rdata.copy()
random.seed(2021)
torch.manual_seed(2021)
np.random.seed(2021)
## Running spaOTsc
df_sc = pd.DataFrame(datatest.X)
is_dmat = cdist(np.array(datatrain.obs.iloc[:,0:2]), np.array(datatrain.obs.iloc[:,0:2]), 'euclidean')
sc_dmat = cdist(datatest.X, datatest.X, 'euclidean')

spsc = SpaOTsc.spatial_sc(sc_data=df_sc, is_dmat=is_dmat, sc_dmat=sc_dmat)
cost_matrix = cdist(datatest.X, datatrain.X, 'euclidean')
location_pred = spsc.transport_plan(cost_matrix)

In [None]:
location_sum = np.sum(location_pred, axis=1)
location_pred_copy = location_pred / location_sum.reshape(len(location_sum), 1)

pred_cord_transform = location_pred_copy.dot(np.array(Rdata.obs[['x_cord', 'y_cord']]))
pred_cord_transform[:, 0] = -pred_cord_transform[:, 0] + 2*np.quantile(data_merfish.obs['x_cord'], 0.5)
pred_cord_transform = pd.DataFrame(pred_cord_transform)
pred_cord_transform.index = Qdata.obs.index
pred_cord_transform.columns = ['x', 'y']

pred_cord_transform.to_csv("output/brain/spaotsc_brain_left.csv")

In [None]:
## left brain as training, right brain as testing
datatest = Rdata.copy()
datatrain = Qdata.copy()
random.seed(2021)
torch.manual_seed(2021)
np.random.seed(2021)
## Running spaOTsc
df_sc = pd.DataFrame(datatest.X)
is_dmat = cdist(np.array(datatrain.obs.iloc[:,0:2]), np.array(datatrain.obs.iloc[:,0:2]), 'euclidean')
sc_dmat = cdist(datatest.X, datatest.X, 'euclidean')

spsc = SpaOTsc.spatial_sc(sc_data=df_sc, is_dmat=is_dmat, sc_dmat=sc_dmat)
cost_matrix = cdist(datatest.X, datatrain.X, 'euclidean')
location_pred = spsc.transport_plan(cost_matrix)

In [None]:
location_sum = np.sum(location_pred, axis=1)
location_pred_copy = location_pred / location_sum.reshape(len(location_sum), 1)

pred_cord_transform = location_pred_copy.dot(np.array(Rdata.obs[['x_cord', 'y_cord']]))
pred_cord_transform[:, 0] = -pred_cord_transform[:, 0] + 2*np.quantile(data_merfish.obs['x_cord'], 0.5)
pred_cord_transform = pd.DataFrame(pred_cord_transform)
pred_cord_transform.index = Qdata.obs.index
pred_cord_transform.columns = ['x', 'y']

pred_cord_transform.to_csv("output/brain/spaotsc_brain_right.csv")