In [None]:
import novosparc as ns

import os
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import altair as alt
from scipy.spatial.distance import cdist, squareform, pdist
from scipy.stats import ks_2samp
from scipy.stats import pearsonr

import random
random.seed(0)

from skimage import io, color
import torch
from torch.nn import functional as F
import json

from sklearn.model_selection import train_test_split
from anndata import AnnData, read_h5ad

In [None]:
data_merfish = read_h5ad("data/liver_merfish.h5ad")

In [None]:
Rdata_ind, Qdata_ind, _, _ =train_test_split(range(data_merfish.shape[0]), data_merfish.obs['louvain'], test_size=0.5,random_state=1,stratify=data_merfish.obs['louvain'])

Rdata = data_merfish[np.sort(Rdata_ind), :]
Qdata = data_merfish[np.sort(Qdata_ind), :]

## Cannot run on the entire dataset, downsampling is needed
Rdata_ind, Qdata_ind, _, _ =train_test_split(range(Rdata.shape[0]), Rdata.obs['louvain'], test_size=0.3,random_state=1,stratify=Rdata.obs['louvain'])
Rdata = Rdata[np.sort(Qdata_ind), :]

Rdata_ind, Qdata_ind, _, _ =train_test_split(range(Qdata.shape[0]), Qdata.obs['louvain'], test_size=0.3,random_state=1,stratify=Qdata.obs['louvain'])
Qdata = Qdata[np.sort(Qdata_ind), :]

print((Rdata.shape, Qdata.shape))

In [None]:
datatrain = Rdata.copy()
datatest = Qdata.copy()

random.seed(2021)
torch.manual_seed(2021)
np.random.seed(2021)
## Running novosparc
locations_apriori = datatrain.obs[['x_cord', 'y_cord']].values
tissue = ns.cm.Tissue(dataset=datatest, locations=locations_apriori)
num_neighbors_s = num_neighbors_t = 5

# params for linear cost
atlas_genes = datatrain.var
markers = list(atlas_genes.index)
num_genes = len(markers)
atlas_matrix = datatrain.to_df().values
markers_idx = pd.DataFrame({'markers_idx': np.arange(num_genes)}, index=markers)
markers_to_use = np.concatenate(markers_idx.loc[markers].values)

In [None]:
tissue.setup_reconstruction(atlas_matrix=atlas_matrix, 
                        markers_to_use=markers_to_use, 
                        num_neighbors_s=num_neighbors_s, 
                        num_neighbors_t=num_neighbors_t)
        
tissue.reconstruct(alpha_linear=0.8, epsilon=5e-3)

location_pred = tissue.gw

In [None]:
location_sum = np.sum(location_pred, axis=1)
location_pred_copy = location_pred / location_sum.reshape(len(location_sum), 1)

pred_cord_transform = location_pred_copy.dot(np.array(Rdata.obs[['x_cord', 'y_cord']]))
np.save("output/liver/novosparc_liver.npy", pred_cord_transform)