In [1]:
%load_ext autoreload
%autoreload 2

import tqdm, sys, os, time, logging, warnings
from numba.core.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning
warnings.simplefilter('ignore', category=NumbaDeprecationWarning)
warnings.simplefilter('ignore', category=NumbaPendingDeprecationWarning)

import pandas as pd
import numpy as np
import scipy as sp
import scipy.sparse as sps

import scanpy as sc
import anndata as ad
import muon as mu
import scvi

import plotly.express as px
import plotly.graph_objects as go
# sc.set_figure_params(dpi=100, fontsize=10, figsize="8,8", color_map='viridis', transparent=True)
from sklearn.metrics import adjusted_rand_score as ari

Global seed set to 0


# Load

In [2]:
mdata = mu.read_h5mu("data/processed/scRNAseq_snRNAseq_filteredQC_processedPCA.h5mu")
scrna = mdata['scrna']
snrna = mdata['snrna']

mdata.X = sps.vstack([mdata['scrna'].X, mdata['snrna'].X])
mdata

In [3]:
mdata.X[:5, :5].todense()

matrix([[3.7340188 , 0.        , 0.        , 0.        , 0.        ],
        [2.933109  , 0.        , 0.        , 0.        , 0.        ],
        [3.1178575 , 0.414911  , 0.        , 0.        , 0.        ],
        [3.3489263 , 0.        , 0.        , 0.38774753, 0.        ],
        [3.7994354 , 1.0549483 , 0.        , 0.        , 0.        ]],
       dtype=float32)

In [4]:
scrna.X[:5, :5].todense()

matrix([[3.7340188 , 0.        , 0.        , 0.        , 0.        ],
        [2.933109  , 0.        , 0.        , 0.        , 0.        ],
        [3.1178575 , 0.414911  , 0.        , 0.        , 0.        ],
        [3.3489263 , 0.        , 0.        , 0.38774753, 0.        ],
        [3.7994354 , 1.0549483 , 0.        , 0.        , 0.        ]],
       dtype=float32)

In [5]:
scrna.obsm['counts'][:5, :5].todense()

matrix([[49.,  0.,  0.,  0.,  0.],
        [14.,  0.,  0.,  0.,  0.],
        [42.,  1.,  0.,  0.,  0.],
        [58.,  0.,  0.,  1.,  0.],
        [70.,  3.,  0.,  0.,  0.]], dtype=float32)

In [6]:
scrna

AnnData object with n_obs × n_vars = 74132 × 31053
    obs: 'Amp_Date', 'Amp_Name', 'Amp_PCR_cyles', 'Cell_Capture', 'Donor', 'Gender', 'Lib_Cells', 'Lib_Date', 'Lib_Name', 'Lib_PCR_cycles', 'Lib_PassFail', 'Lib_type', 'Live_Cells', 'Live_percent', 'Mean_Reads_perCell', 'Median_Genes_perCell', 'Median_UMI_perCell', 'Region', 'Replicate_Lib', 'Saturation', 'Seq_batch', 'Total_Cells', 'aggr_num', 'class_label', 'cluster_color', 'cluster_id', 'cluster_label', 'doublet.score', 'exp_component_name', 'gene.counts', 'library_id', 'mapped_reads', 'method', 'nonconf_mapped_reads', 'size', 'subclass_label', 'total.reads', 'tube_barcode', 'umi.counts', 'unmapped_reads', 'dataset'
    var: 'gene_id', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'hvg', 'log1p', 'neighbors', 'pca', 'umap'
    obsm: 'X_pca', 'X_umap', 'counts'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'

# SCVI

In [7]:
mdata.obs

Unnamed: 0_level_0,scrna:Amp_Date,scrna:Amp_Name,scrna:Amp_PCR_cyles,scrna:Cell_Capture,scrna:Donor,scrna:Gender,scrna:Lib_Cells,scrna:Lib_Date,scrna:Lib_Name,scrna:Lib_PCR_cycles,...,snrna:nUMI,class_label,cluster_color,cluster_id,cluster_label,gene.counts,size,subclass_label,umi.counts,dataset
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACCCAAGCTTCATG-1L8TX_181211_01_G12,11/29/2018,A8TX_181129_03_A03,12.0,5898.0,426003,Male,9733.0,12/11/2018,L8TX_181211_01_G12,9.0,...,,Glutamatergic,#52CA74,42,L5 IT Tcap_2,7122.000000,17334.0,L5 IT,55812.000000,scRNA_10x_v3 A
AAACCCAAGTGAGGTC-1L8TX_181211_01_G12,11/29/2018,A8TX_181129_03_A03,12.0,5898.0,426003,Male,9733.0,12/11/2018,L8TX_181211_01_G12,9.0,...,,Glutamatergic,#5DDB65,41,L5 IT Tcap_1,6125.000000,7462.0,L5 IT,36622.000000,scRNA_10x_v3 A
AAACCCACACCAGCCA-1L8TX_181211_01_G12,11/29/2018,A8TX_181129_03_A03,12.0,5898.0,426003,Male,9733.0,12/11/2018,L8TX_181211_01_G12,9.0,...,,Glutamatergic,#52CA74,42,L5 IT Tcap_2,8322.000000,17334.0,L5 IT,90475.000000,scRNA_10x_v3 A
AAACCCAGTGAACGGT-1L8TX_181211_01_G12,11/29/2018,A8TX_181129_03_A03,12.0,5898.0,426003,Male,9733.0,12/11/2018,L8TX_181211_01_G12,9.0,...,,Glutamatergic,#5DDB65,41,L5 IT Tcap_1,8681.000000,7462.0,L5 IT,98226.000000,scRNA_10x_v3 A
AAACCCAGTGGCATCC-1L8TX_181211_01_G12,11/29/2018,A8TX_181129_03_A03,12.0,5898.0,426003,Male,9733.0,12/11/2018,L8TX_181211_01_G12,9.0,...,,GABAergic,#B09FFF,20,Vip Gpc3,8081.000000,314.0,Vip,74567.000000,scRNA_10x_v3 A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
pBICCNsMMrMOpRMiM006d190320_TTTGGTTCATGAGTAA,,,,,,,,,,,...,20193.0,Glutamatergic,#3CBC92,53,L6 IT Sulf1_1,5218.066616,3978.0,L6 IT,18064.753140,snRNA_10x_v3 B
pBICCNsMMrMOpRMiM006d190320_TTTGGTTTCGCAAGAG,,,,,,,,,,,...,2858.0,Non-Neuronal,#474662,90,Oligo Opalin_4,1740.348364,16566.0,Oligo,3419.937704,snRNA_10x_v3 B
pBICCNsMMrMOpRMiM006d190320_TTTGTTGAGACTCTTG,,,,,,,,,,,...,35854.0,Glutamatergic,#00CF1E,51,L5 IT S100b,5464.036043,8684.0,L5 IT,20579.022920,snRNA_10x_v3 B
pBICCNsMMrMOpRMiM006d190320_TTTGTTGTCACCTTGC,,,,,,,,,,,...,23493.0,Glutamatergic,#3CBC92,52,L5 IT Pld5,5680.692074,3621.0,L5 IT,22718.304610,snRNA_10x_v3 B


In [8]:
scvi.model.SCVI.setup_anndata(mdata, 
                              layer='counts', 
                              batch_key="sample_name", 
                              labels_key='subclass_label')


ValueError: Detected unpaired observations in modality scrna. Please make sure that data is fully paired in all MuData inputs. Either pad the unpaired modalities or take the intersection with muon.pp.intersect_obs().