In [1]:
# imports
%matplotlib inline

import novosparc

import os
import cv2
import numpy as np
import pandas as pd
import pandas.plotting
import scanpy as sc
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import matplotlib.colors
from matplotlib.animation import FuncAnimation
import altair as alt
from scipy.spatial.distance import cdist, squareform, pdist
from scipy.stats import ks_2samp
from scipy.stats import pearsonr
from ipywidgets import interact, HBox, VBox, Output
import kaleido

import copy

from ot.bregman import sinkhorn
import scipy.stats as stats
from sklearn.neighbors import kneighbors_graph

import random
random.seed(0)

In [4]:
# Reading expression data to scanpy AnnData (cells x genes)
data_dir = '../../novosparc/novosparc/datasets/drosophila_scRNAseq/'
data_path = os.path.join(data_dir, 'dge_normalized.txt')
dataset = sc.read(data_path).T # Transpose la matrice pour que les cellules soient en lignes et les gènes en colonne
gene_names = dataset.var.index.tolist() # Récupère le nom de chaque gène pour les mettre dans une liste

num_cells, num_genes = dataset.shape # 12275 cells x 17670 genes

print('number of cells: %d' % num_cells)
print('number of genes: %d' % num_genes)

number of cells: 1297
number of genes: 8924


In [5]:
atlas_dir = '../../novosparc/novosparc/datasets/bdtnp/'
target_space_path = os.path.join(atlas_dir, 'geometry.txt') # le fichier contient les coordonnées de 6078 points selon x, y et z
locations = pd.read_csv(target_space_path, sep=' ')
num_locations = 3039 # Dans notre cas on ne veut conserver que 3039 locations
locations_apriori = locations[:num_locations][['xcoord', 'ycoord', 'zcoord']].values # On ne va donc récupérer que les 3039 premières locations et selon les 3 dimensions
print(locations_apriori)
print(len(locations_apriori))

[[-194.04    0.22   30.2 ]
 [-203.32    1.93   20.08]
 [-200.25    3.59   23.93]
 ...
 [-118.4    33.4   -48.86]
 [ -20.05    3.22  -78.11]
 [  -0.25   21.24   63.68]]
3039


In [6]:
atlas_path = os.path.join(atlas_dir, 'dge.txt')
atlas = sc.read(atlas_path) # Récupération de l'atlas avec 3039 gènes
atlas.obsm['spatial'] = locations_apriori

In [7]:
inter=list(set(dataset.var_names).intersection(atlas.var_names))
subatlas=atlas[:,inter]
atlas_genes = subatlas.var.index.tolist()
len(atlas_genes)

80

In [8]:
sc.pp.normalize_total(dataset)
sc.pp.log1p(dataset)

In [9]:
tissue = novosparc.cm.Tissue(dataset=dataset, locations=locations_apriori)

In [10]:
# params for smooth cost
num_neighbors_s = 3
num_neighbors_t = 5

# params for linear cost
markers = list(set(atlas_genes).intersection(gene_names)) # On ne va utiliser que les gènes en communs entre l'atlas et le jeu de données
atlas_matrix = subatlas.to_df()[markers].values
markers_idx = pd.DataFrame({'markers_idx': np.arange(num_genes)}, index=gene_names)
markers_to_use = np.concatenate(markers_idx.loc[markers].values)

# alternative 1: setup both assumptions 
# Comme on a un atlas ce coup-ci, on peut utiliser setup_reconstruction. Il est également possible de calculer les matrices séparemments (alternative 2)
tissue.setup_reconstruction(atlas_matrix=atlas_matrix, 
                            markers_to_use=markers_to_use, 
                            num_neighbors_s=num_neighbors_s, 
                            num_neighbors_t=num_neighbors_t)

# alternative 2: handling each assumption separately
#tissue.setup_smooth_costs(dge_rep=dge_rep)
#tissue.setup_linear_cost(markers_to_use, atlas_matrix)

Setting up for reconstruction ... done ( 3.25 seconds )


In [11]:
alpha_linear = 0.35
epsilon = 5e-3
tissue.reconstruct(alpha_linear=alpha_linear, epsilon=epsilon)

Reconstructing spatial information with 80 markers: 1297 cells and 3039 locations ... 
Trying with epsilon: 5.00e-03


In [12]:
sdge = tissue.sdge # simulated diferential gne expression
dataset_reconst = sc.AnnData(pd.DataFrame(sdge.T, columns=gene_names))
dataset_reconst.obsm['spatial'] = locations_apriori



In [17]:
embedding_3D_plotly(dataset_reconst,["ImpE2"],title_fig="Zinzen_ImpE2.png",screenshot=True,PCR_mode=False,threshold=0,pal="ice")

In [18]:
embedding_3D_plotly(dataset_reconst,["sna"],title_fig="Zinzen_sna.png",screenshot=True,PCR_mode=False,threshold=0,pal="ice")

In [19]:
embedding_3D_plotly(dataset_reconst,["eve"],title_fig="Zinzen_eve.png",screenshot=True,PCR_mode=False,threshold=0,pal="ice")

In [16]:
embedding_3D_plotly(dataset_reconst,["sca"],title_fig="Zinzen_sca.png",screenshot=True,PCR_mode=False,threshold=0,pal="ice")

In [20]:
embedding_3D_plotly(dataset_reconst,["vnd"],title_fig="Zinzen_vnd.png",screenshot=True,PCR_mode=False,threshold=0,pal="ice")

In [21]:
embedding_3D_plotly(dataset_reconst,["Lim1"],title_fig="Zinzen_Lim1.png",screenshot=True,PCR_mode=False,threshold=0,pal="ice")

In [23]:
embedding_3D_plotly(dataset_reconst,["Mef2"],title_fig="Zinzen_Mef2.png",screenshot=True,PCR_mode=False,threshold=0,pal="ice")

In [24]:
embedding_3D_plotly(dataset_reconst,["opa"],title_fig="Zinzen_opa.png",screenshot=True,PCR_mode=False,threshold=0,pal="ice")