In [1]:
from typing import Dict, Iterable, Optional
import scanpy as sc
import numpy as np
import torch
from torch.distributions import Normal, Poisson
from torch.distributions import kl_divergence as kld
import scvi
from scvi import REGISTRY_KEYS
from scvi._compat import Literal
from scvi.distributions import NegativeBinomial, ZeroInflatedNegativeBinomial
from scvi.module._peakvae import Decoder as DecoderPeakVI
from scvi.module.base import BaseModuleClass, LossRecorder, auto_move_data
from scvi.nn import DecoderSCVI, Encoder, FCLayers
from scvi.module import MULTIVAE

from anndata import AnnData
from typing import Dict, Iterable, List, Optional, Sequence, Union
from scvi.model import MULTIVI 
import pandas as pd


Global seed set to 0


In [2]:
adata_multi = sc.read_h5ad("halo/E18_mouse_Brain/multiomic.h5ad")
adata_multi.obs["batch_id"] = 1
adata_multi.var["modality"] =adata_multi.var["feature_types"]
adata_mvi = scvi.data.organize_multiome_anndatas(adata_multi)

In [6]:
adata_mvi

AnnData object with n_obs × n_vars = 3365 × 176722
    obs: 'celltype', 'batch_id', 'modality'
    var: 'gene_ids', 'feature_types', 'modality'

In [7]:
adata_mvi.obs.celltype

AAACAGCCAACCGCCA-1_paired        Upper Layer
AAACAGCCAAGGTCGA-1_paired     RG, Astro, OPC
AAACAGCCAGGAACAT-1_paired       Deeper Layer
AAACAGCCATATTGAC-1_paired       Deeper Layer
AAACAGCCATGGTTAT-1_paired           Subplate
                                  ...       
TTTGTGGCATAATCGT-1_paired    Ependymal cells
TTTGTGGCATTTGCTC-1_paired        Upper Layer
TTTGTGTTCAATGACC-1_paired                IPC
TTTGTTGGTGGAGCAA-1_paired       Deeper Layer
TTTGTTGGTTAGAGCC-1_paired        Upper Layer
Name: celltype, Length: 3365, dtype: category
Categories (7, object): ['Upper Layer', 'Deeper Layer', 'V-SVZ', 'RG, Astro, OPC', 'Ependymal cells', 'IPC', 'Subplate']

In [18]:
import pandas as pd
df_meta= pd.read_csv("halo/E18_mouse_Brain/RNA/metadata.tsv",sep = "\t",index_col=0)

In [19]:
df_meta

Unnamed: 0,n_counts,celltype,velo_s_norm_self_transition,root_cells,end_points,velo_s_norm_pseudotime,latent_time,binned
AAACAGCCAACCGCCA-1,681.0,Upper Layer,0.057197,0.037962,0.086987,0.857504,0.604954,"(0.6, 0.65]"
AAACAGCCAAGGTCGA-1,296.0,"RG, Astro, OPC",0.114116,0.049274,0.396248,0.213945,0.281341,"(0.25, 0.3]"
AAACAGCCAGGAACAT-1,504.0,Deeper Layer,0.082940,0.020133,0.338813,0.958743,0.851495,"(0.85, 0.9]"
AAACAGCCATATTGAC-1,321.0,Deeper Layer,0.106146,0.003566,0.735495,0.981135,0.982605,"(0.95, 1.0]"
AAACAGCCATGGTTAT-1,432.0,Subplate,0.103904,0.036739,0.134693,0.848509,0.549869,"(0.5, 0.55]"
...,...,...,...,...,...,...,...,...
TTTGTGGCATAATCGT-1,1248.0,Ependymal cells,0.089063,0.556196,0.005403,0.889134,0.454965,"(0.45, 0.5]"
TTTGTGGCATTTGCTC-1,451.0,Upper Layer,0.049716,0.013520,0.407998,0.861768,0.598991,"(0.55, 0.6]"
TTTGTGTTCAATGACC-1,369.0,IPC,0.130270,0.499946,0.019731,0.604746,0.291581,"(0.25, 0.3]"
TTTGTTGGTGGAGCAA-1,513.0,Deeper Layer,0.038342,0.062265,0.113687,0.948574,0.776289,"(0.75, 0.8]"


In [20]:
binned_time = df_meta.binned.unique()

In [21]:
## add time labels
name_dict = {}
i = 0
for time in sorted(binned_time):
    name_dict[time] = i
    i += 1
name_dict    

{'(-0.001, 0.05]': 0,
 '(0.05, 0.1]': 1,
 '(0.1, 0.15]': 2,
 '(0.15, 0.2]': 3,
 '(0.2, 0.25]': 4,
 '(0.25, 0.3]': 5,
 '(0.3, 0.35]': 6,
 '(0.35, 0.4]': 7,
 '(0.4, 0.45]': 8,
 '(0.45, 0.5]': 9,
 '(0.5, 0.55]': 10,
 '(0.55, 0.6]': 11,
 '(0.6, 0.65]': 12,
 '(0.65, 0.7]': 13,
 '(0.7, 0.75]': 14,
 '(0.75, 0.8]': 15,
 '(0.8, 0.85]': 16,
 '(0.85, 0.9]': 17,
 '(0.9, 0.95]': 18,
 '(0.95, 1.0]': 19}

In [23]:
df_meta.apply(lambda row: name_dict[row.binned],axis=1)

AAACAGCCAACCGCCA-1    12
AAACAGCCAAGGTCGA-1     5
AAACAGCCAGGAACAT-1    17
AAACAGCCATATTGAC-1    19
AAACAGCCATGGTTAT-1    10
                      ..
TTTGTGGCATAATCGT-1     9
TTTGTGGCATTTGCTC-1    11
TTTGTGTTCAATGACC-1     5
TTTGTTGGTGGAGCAA-1    15
TTTGTTGGTTAGAGCC-1    14
Length: 3365, dtype: int64

In [24]:
df_meta['time_label'] = df_meta.apply(lambda row: name_dict[row.binned], axis=1)
df_meta.head()

Unnamed: 0,n_counts,celltype,velo_s_norm_self_transition,root_cells,end_points,velo_s_norm_pseudotime,latent_time,binned,time_label
AAACAGCCAACCGCCA-1,681.0,Upper Layer,0.057197,0.037962,0.086987,0.857504,0.604954,"(0.6, 0.65]",12
AAACAGCCAAGGTCGA-1,296.0,"RG, Astro, OPC",0.114116,0.049274,0.396248,0.213945,0.281341,"(0.25, 0.3]",5
AAACAGCCAGGAACAT-1,504.0,Deeper Layer,0.08294,0.020133,0.338813,0.958743,0.851495,"(0.85, 0.9]",17
AAACAGCCATATTGAC-1,321.0,Deeper Layer,0.106146,0.003566,0.735495,0.981135,0.982605,"(0.95, 1.0]",19
AAACAGCCATGGTTAT-1,432.0,Subplate,0.103904,0.036739,0.134693,0.848509,0.549869,"(0.5, 0.55]",10


In [39]:
name = np.array(df_meta.index)
newname = [n+"_paired" for n in name]
newname

['AAACAGCCAACCGCCA-1_paired',
 'AAACAGCCAAGGTCGA-1_paired',
 'AAACAGCCAGGAACAT-1_paired',
 'AAACAGCCATATTGAC-1_paired',
 'AAACAGCCATGGTTAT-1_paired',
 'AAACATGCACTTACAG-1_paired',
 'AAACCAACAACTGGGA-1_paired',
 'AAACCGCGTCACCAAA-1_paired',
 'AAACCGCGTGCAATGC-1_paired',
 'AAACCGGCAACTAACT-1_paired',
 'AAACCGGCAATTAGGA-1_paired',
 'AAACGCGCAGCTAATT-1_paired',
 'AAACGCGCAGTTATCG-1_paired',
 'AAACGCGCATCACTTC-1_paired',
 'AAACGTACAAGGCCAA-1_paired',
 'AAACGTACATCCTGAA-1_paired',
 'AAACGTACATGTGGGA-1_paired',
 'AAAGCAAGTGAGGTAG-1_paired',
 'AAAGCAAGTTGCTGGG-1_paired',
 'AAAGCACCAACTGGCT-1_paired',
 'AAAGCCCGTATGGTGC-1_paired',
 'AAAGCCCGTTCACCAT-1_paired',
 'AAAGCCGCACAACCTA-1_paired',
 'AAAGCCGCAGGCAAGC-1_paired',
 'AAAGCCGCATCCGTAA-1_paired',
 'AAAGCTTGTCATGCCC-1_paired',
 'AAAGGAGCAATTTAGC-1_paired',
 'AAAGGCTCAAATATCC-1_paired',
 'AAAGGCTCAAGGTACG-1_paired',
 'AAAGGCTCAATATACC-1_paired',
 'AAAGGCTCAATCCCTT-1_paired',
 'AAAGGCTCACCTGGTG-1_paired',
 'AAAGGCTCATAGCGGA-1_paired',
 'AAAGGCTC

In [37]:
df_meta.apply(lambda row: row.index+'_paired', axis=1)

AAACAGCCAACCGCCA-1    Index(['n_counts_paired', 'celltype_paired',
 ...
AAACAGCCAAGGTCGA-1    Index(['n_counts_paired', 'celltype_paired',
 ...
AAACAGCCAGGAACAT-1    Index(['n_counts_paired', 'celltype_paired',
 ...
AAACAGCCATATTGAC-1    Index(['n_counts_paired', 'celltype_paired',
 ...
AAACAGCCATGGTTAT-1    Index(['n_counts_paired', 'celltype_paired',
 ...
                                            ...                        
TTTGTGGCATAATCGT-1    Index(['n_counts_paired', 'celltype_paired',
 ...
TTTGTGGCATTTGCTC-1    Index(['n_counts_paired', 'celltype_paired',
 ...
TTTGTGTTCAATGACC-1    Index(['n_counts_paired', 'celltype_paired',
 ...
TTTGTTGGTGGAGCAA-1    Index(['n_counts_paired', 'celltype_paired',
 ...
TTTGTTGGTTAGAGCC-1    Index(['n_counts_paired', 'celltype_paired',
 ...
Length: 3365, dtype: object

In [42]:
df_meta['paired_label'] = newname

In [43]:
df_meta

Unnamed: 0,n_counts,celltype,velo_s_norm_self_transition,root_cells,end_points,velo_s_norm_pseudotime,latent_time,binned,time_label,paired_label
AAACAGCCAACCGCCA-1,681.0,Upper Layer,0.057197,0.037962,0.086987,0.857504,0.604954,"(0.6, 0.65]",12,AAACAGCCAACCGCCA-1_paired
AAACAGCCAAGGTCGA-1,296.0,"RG, Astro, OPC",0.114116,0.049274,0.396248,0.213945,0.281341,"(0.25, 0.3]",5,AAACAGCCAAGGTCGA-1_paired
AAACAGCCAGGAACAT-1,504.0,Deeper Layer,0.082940,0.020133,0.338813,0.958743,0.851495,"(0.85, 0.9]",17,AAACAGCCAGGAACAT-1_paired
AAACAGCCATATTGAC-1,321.0,Deeper Layer,0.106146,0.003566,0.735495,0.981135,0.982605,"(0.95, 1.0]",19,AAACAGCCATATTGAC-1_paired
AAACAGCCATGGTTAT-1,432.0,Subplate,0.103904,0.036739,0.134693,0.848509,0.549869,"(0.5, 0.55]",10,AAACAGCCATGGTTAT-1_paired
...,...,...,...,...,...,...,...,...,...,...
TTTGTGGCATAATCGT-1,1248.0,Ependymal cells,0.089063,0.556196,0.005403,0.889134,0.454965,"(0.45, 0.5]",9,TTTGTGGCATAATCGT-1_paired
TTTGTGGCATTTGCTC-1,451.0,Upper Layer,0.049716,0.013520,0.407998,0.861768,0.598991,"(0.55, 0.6]",11,TTTGTGGCATTTGCTC-1_paired
TTTGTGTTCAATGACC-1,369.0,IPC,0.130270,0.499946,0.019731,0.604746,0.291581,"(0.25, 0.3]",5,TTTGTGTTCAATGACC-1_paired
TTTGTTGGTGGAGCAA-1,513.0,Deeper Layer,0.038342,0.062265,0.113687,0.948574,0.776289,"(0.75, 0.8]",15,TTTGTTGGTGGAGCAA-1_paired


In [45]:
sub_df = df_meta[['paired_label', "time_label"]]
sub_df = sub_df.set_index("paired_label")
sub_df.head()

Unnamed: 0_level_0,time_label
paired_label,Unnamed: 1_level_1
AAACAGCCAACCGCCA-1_paired,12
AAACAGCCAAGGTCGA-1_paired,5
AAACAGCCAGGAACAT-1_paired,17
AAACAGCCATATTGAC-1_paired,19
AAACAGCCATGGTTAT-1_paired,10


In [48]:
sub_df.loc['AAACAGCCAACCGCCA-1_paired',:]

time_label    12
Name: AAACAGCCAACCGCCA-1_paired, dtype: int64

In [50]:
adata_mvi.var["time_label"] = sub_df
adata_mvi

AnnData object with n_obs × n_vars = 3365 × 176722
    obs: 'celltype', 'batch_id', 'modality', 'time_label'
    var: 'gene_ids', 'feature_types', 'modality', 'time_label'

In [56]:
adata_multi = sc.read_h5ad("halo/E18_mouse_Brain/multiomic.h5ad")
adata_multi.obs["batch_id"] = 1
adata_multi.var["modality"] =adata_multi.var["feature_types"]
adata_mvi = scvi.data.organize_multiome_anndatas(adata_multi)
df_meta= pd.read_csv("halo/E18_mouse_Brain/RNA/metadata.tsv",sep = "\t",index_col=0)

## add time label
binned_time = df_meta.binned.unique()
name_dict = {}
i = 0
for time in sorted(binned_time):
    name_dict[time] = i
    i += 1
name_dict 

## add time label to dfs
df_meta['time_label'] = df_meta.apply(lambda row: name_dict[row.binned], axis=1)
## change name:
name = np.array(df_meta.index)
newname = [n+"_paired" for n in name]
df_meta['paired_label'] = newname

## get sub dataframe
sub_df = df_meta[['paired_label', "time_label"]]
sub_df = sub_df.set_index("paired_label")
adata_mvi.obs["time_label"] = sub_df


In [57]:
adata_mvi.obs

Unnamed: 0,celltype,batch_id,modality,time_label
AAACAGCCAACCGCCA-1_paired,Upper Layer,1,paired,12
AAACAGCCAAGGTCGA-1_paired,"RG, Astro, OPC",1,paired,5
AAACAGCCAGGAACAT-1_paired,Deeper Layer,1,paired,17
AAACAGCCATATTGAC-1_paired,Deeper Layer,1,paired,19
AAACAGCCATGGTTAT-1_paired,Subplate,1,paired,10
...,...,...,...,...
TTTGTGGCATAATCGT-1_paired,Ependymal cells,1,paired,9
TTTGTGGCATTTGCTC-1_paired,Upper Layer,1,paired,11
TTTGTGTTCAATGACC-1_paired,IPC,1,paired,5
TTTGTTGGTGGAGCAA-1_paired,Deeper Layer,1,paired,15
