# scRNA-seq Analysis Pipeline

This pipeline enables you to analyze and visualize your single cell RNA sequencing datasets with an array of algorithms and data visualization methods. The pipeline includes quality control, library size analysis, normalization, plotting most highly expressed genes, plotting samples, MAGIC normalization, clustering, enrichment analysis, trajectory inference, and cell type prediction.

In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
# Basic libraries
import pandas as pd
import random
import time
import numpy as np
import warnings


# Visualization
import scipy.stats as ss
import IPython
from IPython.display import HTML, display, Markdown, IFrame, FileLink, Image
from itertools import combinations

# Data analysis
from sklearn.decomposition import PCA
from sklearn.preprocessing import quantile_transform
from sklearn import cluster
from sklearn.metrics import silhouette_score
from sklearn.manifold import TSNE
import umap
from rpy2 import robjects
from rpy2.robjects import r, pandas2ri
from magic import MAGIC as MG
import scanpy as sc
import anndata
from maayanlab_bioinformatics.enrichment.crisp import enrich_crisp, fisher_overlap

# Bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, CustomJS, ColumnDataSource, Span, Select, Legend, PreText, Paragraph, LinearColorMapper, ColorBar, CategoricalColorMapper
from bokeh.layouts import layout, row, column, gridplot
from bokeh.palettes import all_palettes
import colorcet as cc
from bokeh.palettes import Category20

# External Code
from utils import *
output_notebook()

In [None]:
%%appyter hide_code_exec
{% do SectionField(
    name='Data_Section1',
    title='Load your Dataset 1',
    subtitle='Load your expression data. In comma/tab separated formats, genes should be in rows and samples shoud be in columns. You can also upload Cell Ranger files (matrix, genes, and barcords files). ',
    img='analysis.png'
    
) %}


{% do SectionField(
    name='Data_Section2',
    title='Load your Dataset 2',
    subtitle='Load your expression data. In comma/tab separated formats, genes should be in rows and samples shoud be in columns. You can also upload Cell Ranger files (matrix, genes, and barcords files). ',
    img='analysis.png'
    
) %}

{% do SectionField(
    name='Data_Section3',
    title='Load your Dataset 3',
    subtitle='Load your expression data. In comma/tab separated formats, genes should be in rows and samples shoud be in columns. You can also upload Cell Ranger files (matrix, genes, and barcords files). ',
    img='analysis.png'
    
) %}

{% do SectionField(
    name='Data_Section4',
    title='Load your Dataset 4',
    subtitle='Load your expression data. In comma/tab separated formats, genes should be in rows and samples shoud be in columns. You can also upload Cell Ranger files (matrix, genes, and barcords files). ',
    img='analysis.png'
    
) %}

{% do SectionField(
    name='Normalization_Section',
    title='Select Normalization Methods',
    subtitle='',
    img='analysis.png'
    
) %}

{% do SectionField(
    name='Visualization_Section',
    title='Select Visualization Parameters',
    subtitle='',
    img='analysis.png'
    
) %}

{% do SectionField(
    name='DEG_Section',
    title='Select Differentially Exprssed Gene Analysis Parameters',
    subtitle='',
    img='analysis.png'
    
) %}

{% do SectionField(
    name='TI_Section',
    title='Select Trajectory Inference Parameters',
    subtitle='',
    img='analysis.png'
    
) %}

{% do SectionField(
    name='Time_TI_Section',
    title='Select Time-series Trajectory Inference Parameters',
    subtitle='',
    img='analysis.png'
    
) %}

In [None]:
%%appyter code_exec

{% set file_kind1 = TabField(
    name='file_kind1',
    label='Data file',
    default='.mtx from 10x Genomics',
    description='Upload your expression files',
    choices={
        'Plain text': [             
            FileField(
                name='rnaseq_data_filename1', 
                label='RNA-seq data file (.csv, .txt or .tsv)', 
                default='GSE110499_example_expression.txt',
                examples={'GSE110499_example_expression.txt': 'https://appyters.maayanlab.cloud/storage/SC_RNA_seq/GSE110499-expression.txt'}, 
                description='Upload RNA-seq expression data as comma seperated or tab seperated format. The index of the dataset are genes, the columns are samples.', 
                section='Data_Section1'
            ),
            FileField(
                name='meta_data_filename1', 
                label='(Optional) Meta data file (.csv, .txt or .tsv)', 
                default='GSE110499_example_metadata.txt',
                examples={'GSE110499_example_metadata.txt': 'https://appyters.maayanlab.cloud/storage/SC_RNA_seq/GSE110499-metadata.txt'}, 
                description='Upload metadata as two-column comma seperated or tab seperated format. The first column contains sample IDs and the other column contains sample labels',
                section='Data_Section1'
            ),         
            StringField(
                name='meta_class_column_name1', 
                label='(Optional) Class column name in metadata', 
                default='prep-site', 
                description='class column name of metadata', 
                section='Data_Section1'
            )

        ],
        
        '.mtx from 10x Genomics': [
            FileField(
                name='mtx_data_filename1', 
                label='RNA-seq data file (.mtx)', 
                default='example_matrix_GDC_C3N-00662.mtx',
                examples={'example_matrix_GDC_C3N-00662.mtx': 'https://appyters.maayanlab.cloud/storage/SC_RNA_seq/example_matrix_GDC_C3N-00662.mtx'}, 
                description='Expression data file from 10x Genomics need to be in .mtx format which store the expression data in sparse matrix.',
                section='Data_Section1'
            ),
            FileField(
                name='gene_data_filename1', 
                label='Gene infomation file (.tsv)', 
                default='example_features_GDC_C3N-00662.tsv',
                examples={'example_features_GDC_C3N-00662.tsv': 'https://appyters.maayanlab.cloud/storage/SC_RNA_seq/example_features_GDC_C3N-00662.tsv'}, 
                description='A tab delimited file of the corresponding genes in the .mtx expression matrix file. The first column should be the gene IDs whereas the secound column corresponds to gene symbols.', 
                section='Data_Section1'
            ),
            FileField(
                name='barcode_data_filename1', 
                label='Barcode information file (.tsv)', 
                default='example_barcodes_GDC_C3N-00662.tsv',
                examples={'example_barcodes_GDC_C3N-00662.tsv': 'https://appyters.maayanlab.cloud/storage/SC_RNA_seq/example_barcodes_GDC_C3N-00662.tsv'}, 
                description='A tab delimited file of the corresponding barcodes in the .mtx expression matrix file. The first column should be the unique barcodes for cells.', 
                section='Data_Section1'
            ),
            FileField(
                name='meta_data_filename_mtx1', 
                label='(Optional) Meta data file (.csv, .txt or .tsv)', 
                default='',
                description='Upload metadata as two-column comma seperated or tab seperated format. The first column contains sample IDs and the other column contains sample labels',
                section='Data_Section1'
            ),         
            StringField(
                name='meta_class_column_name_mtx1', 
                label='(Optional) Class column name in metadata', 
                default='', 
                description='class column name of metadata', 
                section='Data_Section1'
            )
        ]
        
    },
    section = 'Data_Section1',
) %}
{% set dataset_name1 = StringField(
    name='dataset_name1', 
    label='Dataset Name', 
    default='Dataset1', 
    description='', 
    section='Data_Section1'
    )
%}

In [None]:
%%appyter code_exec

{% set file_kind2 = TabField(
    name='file_kind2',
    label='Data file',
    default='Plain text',
    description='Upload your expression files',
    choices={
        'Plain text': [             
            FileField(
                name='rnaseq_data_filename2', 
                label='RNA-seq data file (.csv, .txt or .tsv)', 
                default='',
                description='Upload RNA-seq expression data as comma seperated or tab seperated format. The index of the dataset are genes, the columns are samples.', 
                section='Data_Section2'
            ),
            FileField(
                name='meta_data_filename2', 
                label='(Optional) Meta data file (.csv, .txt or .tsv)', 
                default='',
                description='Upload metadata as two-column comma seperated or tab seperated format. The first column contains sample IDs and the other column contains sample labels',
                section='Data_Section2'
            ),         
            StringField(
                name='meta_class_column_name2', 
                label='(Optional) Class column name in metadata', 
                default='', 
                description='class column name of metadata', 
                section='Data_Section2'
            )


        ],
        
        '.mtx from 10x Genomics': [
            FileField(
                name='mtx_data_filename2', 
                label='RNA-seq data file (.mtx)', 
                default='',
                description='Expression data file from 10x Genomics need to be in .mtx format which store the expression data in sparse matrix.',
                section='Data_Section2'
            ),
            FileField(
                name='gene_data_filename2', 
                label='Gene infomation file (.tsv)', 
                default='',
                description='A tab delimited file of the corresponding genes in the .mtx expression matrix file. The first column should be the gene IDs whereas the secound column corresponds to gene symbols.', 
                section='Data_Section2'
            ),
            FileField(
                name='barcode_data_filename2', 
                label='Barcode information file (.tsv)', 
                default='',
                description='A tab delimited file of the corresponding barcodes in the .mtx expression matrix file. The first column should be the unique barcodes for cells.', 
                section='Data_Section2'
            ),
            FileField(
                name='meta_data_filename_mtx2', 
                label='(Optional) Meta data file (.csv, .txt or .tsv)', 
                default='',
                description='Upload metadata as two-column comma seperated or tab seperated format. The first column contains sample IDs and the other column contains sample labels',
                section='Data_Section2'
            ),         
            StringField(
                name='meta_class_column_name_mtx2', 
                label='(Optional) Class column name in metadata', 
                default='', 
                description='class column name of metadata', 
                section='Data_Section2'
            )
        ]
        
    },
    section = 'Data_Section2',
) %}
{% set dataset_name2 = StringField(
    name='dataset_name2', 
    label='Dataset Name', 
    default='Dataset2', 
    description='', 
    section='Data_Section2'
    )
%}

In [None]:
%%appyter code_exec

{% set file_kind3 = TabField(
    name='file_kind3',
    label='Data file',
    default='Plain text',
    description='Upload your expression files',
    choices={
        'Plain text': [             
            FileField(
                name='rnaseq_data_filename3', 
                label='RNA-seq data file (.csv, .txt or .tsv)', 
                default='',
                description='Upload RNA-seq expression data as comma seperated or tab seperated format. The index of the dataset are genes, the columns are samples.', 
                section='Data_Section3'
            ),
            FileField(
                name='meta_data_filename3', 
                label='(Optional) Meta data file (.csv, .txt or .tsv)', 
                default='',
                description='Upload metadata as two-column comma seperated or tab seperated format. The first column contains sample IDs and the other column contains sample labels',
                section='Data_Section3'
            ),         
            StringField(
                name='meta_class_column_name3', 
                label='(Optional) Class column name in metadata', 
                default='', 
                description='class column name of metadata', 
                section='Data_Section3'
            )


        ],
        
        '.mtx from 10x Genomics': [
            FileField(
                name='mtx_data_filename3', 
                label='RNA-seq data file (.mtx)', 
                default='',
                description='Expression data file from 10x Genomics need to be in .mtx format which store the expression data in sparse matrix.',
                section='Data_Section3'
            ),
            FileField(
                name='gene_data_filename3', 
                label='Gene infomation file (.tsv)', 
                default='',
                description='A tab delimited file of the corresponding genes in the .mtx expression matrix file. The first column should be the gene IDs whereas the secound column corresponds to gene symbols.', 
                section='Data_Section3'
            ),
            FileField(
                name='barcode_data_filename3', 
                label='Barcode information file (.tsv)', 
                default='',
                description='A tab delimited file of the corresponding barcodes in the .mtx expression matrix file. The first column should be the unique barcodes for cells.', 
                section='Data_Section3'
            ),
            FileField(
                name='meta_data_filename_mtx3', 
                label='(Optional) Meta data file (.csv, .txt or .tsv)', 
                default='',
                description='Upload metadata as two-column comma seperated or tab seperated format. The first column contains sample IDs and the other column contains sample labels',
                section='Data_Section3'
            ),         
            StringField(
                name='meta_class_column_name_mtx3', 
                label='(Optional) Class column name in metadata', 
                default='', 
                description='class column name of metadata', 
                section='Data_Section3'
            )
        ]
        
    },
    section = 'Data_Section3',
) %}
{% set dataset_name3 = StringField(
    name='dataset_name3', 
    label='Dataset Name', 
    default='Dataset3', 
    description='', 
    section='Data_Section3'
    )
%}

In [None]:
%%appyter code_exec

{% set file_kind4 = TabField(
    name='file_kind4',
    label='Data file',
    default='Plain text',
    description='Upload your expression files',
    choices={
        'Plain text': [             
            FileField(
                name='rnaseq_data_filename4', 
                label='RNA-seq data file (.csv, .txt or .tsv)', 
                default='',
                description='Upload RNA-seq expression data as comma seperated or tab seperated format. The index of the dataset are genes, the columns are samples.', 
                section='Data_Section4'
            ),
            FileField(
                name='meta_data_filename4', 
                label='(Optional) Meta data file (.csv, .txt or .tsv)', 
                default='',
                description='Upload metadata as two-column comma seperated or tab seperated format. The first column contains sample IDs and the other column contains sample labels',
                section='Data_Section4'
            ),         
            StringField(
                name='meta_class_column_name4', 
                label='(Optional) Class column name in metadata', 
                default='', 
                description='class column name of metadata', 
                section='Data_Section4'
            )


        ],
        
        '.mtx from 10x Genomics': [
            FileField(
                name='mtx_data_filename4', 
                label='RNA-seq data file (.mtx)', 
                default='',
                description='Expression data file from 10x Genomics need to be in .mtx format which store the expression data in sparse matrix.',
                section='Data_Section4'
            ),
            FileField(
                name='gene_data_filename4', 
                label='Gene infomation file (.tsv)', 
                default='',
                description='A tab delimited file of the corresponding genes in the .mtx expression matrix file. The first column should be the gene IDs whereas the secound column corresponds to gene symbols.', 
                section='Data_Section4'
            ),
            FileField(
                name='barcode_data_filename4', 
                label='Barcode information file (.tsv)', 
                default='',
                description='A tab delimited file of the corresponding barcodes in the .mtx expression matrix file. The first column should be the unique barcodes for cells.', 
                section='Data_Section4'
            ),
            FileField(
                name='meta_data_filename_mtx4', 
                label='(Optional) Meta data file (.csv, .txt or .tsv)', 
                default='',
                description='Upload metadata as two-column comma seperated or tab seperated format. The first column contains sample IDs and the other column contains sample labels',
                section='Data_Section4'
            ),         
            StringField(
                name='meta_class_column_name_mtx4', 
                label='(Optional) Class column name in metadata', 
                default='', 
                description='class column name of metadata', 
                section='Data_Section4'
            )
        ]
        
    },
    section = 'Data_Section4',
) %}
{% set dataset_name4 = StringField(
    name='dataset_name4', 
    label='Dataset Name', 
    default='Dataset4', 
    description='', 
    section='Data_Section4'
    )
%}

In [None]:
%%appyter code_exec

{% set qc_filter_genes = BoolField(
    name='qc_filter_genes', 
    label='Filter cells by quality control?', 
    default='true', 
    description='Check if you want cells to be filtered by mitochondrial gene expressions', 
    section='Normalization_Section')
%}
{% set qc_threshold = FloatField(
    name='qc_threshold', 
    label='Mitochondria Quality Control threshold', 
    default=0.05,
    min=0.0,
    max=1.0,
    step=0.001,
    description='Remove cells that have too many mitochondrial genes expressed.', 
    section='Normalization_Section')
%} 
{% set qc_filter_doublets = BoolField(
    name='qc_filter_doublets', 
    label='Filter cells by doublet prediction?', 
    default='false', 
    description='Check if you want cells to be filtered by doublet prediction. If you analyze large data, it may take a long time. ', 
    section='Normalization_Section')
%}
{% set log_normalization = BoolField(
    name='log_normalization', 
    label='Log normalization?', 
    default='true', 
    description='Check if you want the dataset to be log-transformed', 
    section='Normalization_Section')
%}
{% set normalization_method = ChoiceField(
    name='normalization_method',
    label='Normalization',
    choices={'None': 'None', 'Satija et al. (2015, Nature Biotechnology)': 'Seurat', 'Zheng et al. (2017, Nature Communications)': 'Zheng17','Weinreb et al. (2018, PNAS)': 'Weignreb17'},
    default='Satija et al. (2015, Nature Biotechnology)', 
    description='Standard normlization recipe for scRNA-seq datasets', 
    section='Normalization_Section')
%}
{% set magic = BoolField(
    name='magic', 
    label='MAGIC?', 
    default='false', 
    description='Check if you want impute missing data with Markov Affinity-based Graph Imputation of Cells (MAGIC)', 
    section='Normalization_Section')
%}

In [None]:
%%appyter code_exec
{% set interactive_plot = BoolField(
    name='interactive_plot', 
    label='Interactive plot?', 
    default='false', 
    description='Check if you want interactive plots. It may require high computational resources and it may take time to render.', 
    section='Visualization_Section')
%}
{% set highest_expr_n_genes = IntField(
    name='highest_expr_n_genes', 
    label='Genes for highest expression', 
    min=0, 
    max=100, 
    default=10, 
    description='The number of genes with highest expression values', 
    section='Visualization_Section')
%}

gene_list_for_clustergrammer = {{TextField(
    name='gene_list_for_clustergrammer', 
    label='Gene List for Clustergrammer (Optional)', 
    default='', 
    description='Paste your gene list (One gene per row) for Clustergrammer heatmap plots.', 
    section = 'Visualization_Section')}}
{% set clustering_topk = IntField(
    name='clustering_topk', 
    label='Genes for clustergrammer', 
    min=0, 
    max=1000, 
    default=800, 
    description='The number of genes with largest variance for Clustergrammer', 
    section='Visualization_Section')
%}


In [None]:
%%appyter code_exec

{% set deg = BoolField(
    name='deg',
    label='Differentially expressed gene analysis?',
    default='true',
    description='Check if you want differentially expressed gene analysis', 
    section='DEG_Section')
%}

{% set integration_option = MultiChoiceField(
    name='integration_option',
    label='Analysis option for multiple datasets',
    choices=['intra-dataset','integrated-dataset', 'inter-dataset'],
    default=['intra-dataset'], 
    description='If you upload multiple datasets, select an analysis option.', 
    section='DEG_Section')
%}


{% set enrichment_groupby = ChoiceField(
    name='enrichment_groupby',
    label='Group for differentially expressed gene analysis',
    choices={'Cluster': 'Cluster', 'User-defined class': 'user_defined_class'},
    default='Cluster', 
    description='Specify groups for enrichment analysis. Clusters will be automatically generated by a clustering method', 
    section='DEG_Section')
%}

{% set diff_gex_method = ChoiceField(
    name='diff_gex_method',
    label='Differential expression analysis method',
    choices={'limma': 'limma', 'wilcoxon': 'wilcoxon', 'characteristic direction': 'characteristic_direction', 'edgeR': 'edgeR', 'DESeq2': 'DESeq2'},
    default='wilcoxon', 
    description='Set a method to get differentially expressed genes', 
    section='DEG_Section')
%}



{% set gene_topk = IntField(
    name='gene_topk', 
    label='Maximum genes for Enrichr', 
    min=0, 
    max=1000, 
    default=200, 
    description='The maximum number of genes discovered by the DEG method', 
    section='DEG_Section')
%}


{% set libraries_tab = TabField(
    name='libraries_tab',
    label='Enrichr libraries?',
    default='Yes',
    description='',
    choices={
        'Yes': [             
            MultiChoiceField(
                name='enrichr_libraries',
                label='Select Enrichr Libraries (upto 2)',
                descriptions='Enrichr libraries to be visualized. Select one or two libraries',
                choices=['Gene Ontology',
                        'Pathway',
                        'Kinase',
                        'Transcription Factor',
                        'miRNA',
                        'Cell Type',
                         'Disease'],
                default=['Cell Type',
                         'Transcription Factor',
                        ],
                section='DEG_Section'
                ),
            

        ],
        
        'No': [
            FileField(
                name='library_filename', 
                label='Upload your library file (.gmt)', 
                default='HubMap_ASCT_plus_B_augmented_w_RNAseq_Coexpression.gmt',
                examples={'PBMC_cell_type_biomarkers.gmt': 'https://appyters.maayanlab.cloud/storage/SC_RNA_seq/PBMC_cell_type_biomarkers.gmt'}, 
                description='',
                section='Data_Section'
            ),
            
        ]
        
    },
    section = 'DEG_Section',
) %}



{% set nr_genesets = IntField(
    name='nr_genesets', 
    label='Top ranked gene sets', 
    min=0, 
    max=100,
    default=15, 
    description='the number of result gene sets', 
    section='DEG_Section')
%}


In [None]:
%%appyter code_exec
{% set trajectory = BoolField(
    name='trajectory',
    label='Trajectory Inference?',
    default='true',
    description='Check if you want trajectory inference analysis', 
    section='TI_Section')
%}

{% set trajectory_method = ChoiceField(
    name='trajectory_method',
    label='Trajectory inference method',
    choices={'DPT(diffusion pseudotime)': 'dpt', 'monocle': 'monocle'},
    default='DPT(diffusion pseudotime)', 
    description='Trajectory inference algorithm', 
    section='TI_Section')
%}


In [None]:
%%appyter code_exec
{% set time_series_trajectory = BoolField(
    name='time_series_trajectory', 
    label='Time series trajectory analysis?', 
    default='false', 
    description='Check if you want time-series trajectory analysis', 
    section='Time_TI_Section')
%}
{% set timepoint_labels_column_name = StringField(
    name='timepoint_labels_column_name', 
    label='Timepoint column name in metadata', 
    default='Timepoints', 
    description='Timepoint column name of metadata', 
    section='Time_TI_Section')
%}
timepoint_labels = {{ TextField(
    name='timepoint_labels', 
    label='Ordered timepoint labels in the timepoint label column', 
    default='', 
    description='Paste your timepoint labels in order (One timepoint per row). e.g., 0H, 12H, 24H', 
    section = 'Time_TI_Section') }}


In [None]:
%%appyter code_exec
rnaseq_data_filename1 = ""
mtx_data_filename1 = ""
gene_data_filename1 = ""
barcode_data_filename1 = ""

{%- if file_kind1.raw_value == 'Plain text' %}
rnaseq_data_filename1 = {{ file_kind1.value[0] }}
meta_data_filename1 = {{ file_kind1.value[1] }}
meta_class_column_name1 = {{ file_kind1.value[2] }}

{%- elif file_kind1.raw_value == '.mtx from 10x Genomics' %}
mtx_data_filename1 = {{ file_kind1.value[0] }}
gene_data_filename1 = {{ file_kind1.value[1] }}
barcode_data_filename1 = {{ file_kind1.value[2] }}

meta_data_filename1 = {{ file_kind1.value[3] }}
meta_class_column_name1 = {{ file_kind1.value[4] }}

{%- endif %}
dataset_name1 = "{{dataset_name1.value}}"

In [None]:
%%appyter code_exec
rnaseq_data_filename2 = ""
mtx_data_filename2 = ""
gene_data_filename2 = ""
barcode_data_filename2 = ""

{%- if file_kind2.raw_value == 'Plain text' %}
rnaseq_data_filename2 = {{ file_kind2.value[0] }}
meta_data_filename2 = {{ file_kind2.value[1] }}
meta_class_column_name2 = {{ file_kind2.value[2] }}

{%- elif file_kind2.raw_value == '.mtx from 10x Genomics' %}
mtx_data_filename2 = {{ file_kind2.value[0] }}
gene_data_filename2 = {{ file_kind2.value[1] }}
barcode_data_filename2 = {{ file_kind2.value[2] }}

meta_data_filename2 = {{ file_kind2.value[3] }}
meta_class_column_name2 = {{ file_kind2.value[4] }}

{%- endif %}
dataset_name2 = "{{dataset_name2.value}}"

In [None]:
%%appyter code_exec
rnaseq_data_filename3 = ""
mtx_data_filename3 = ""
gene_data_filename3 = ""
barcode_data_filename3 = ""

{%- if file_kind3.raw_value == 'Plain text' %}
rnaseq_data_filename3 = {{ file_kind3.value[0] }}
meta_data_filename3 = {{ file_kind3.value[1] }}
meta_class_column_name3 = {{ file_kind3.value[2] }}

{%- elif file_kind3.raw_value == '.mtx from 10x Genomics' %}
mtx_data_filename3 = {{ file_kind3.value[0] }}
gene_data_filename3 = {{ file_kind3.value[1] }}
barcode_data_filename3 = {{ file_kind3.value[2] }}

meta_data_filename3 = {{ file_kind3.value[3] }}
meta_class_column_name3 = {{ file_kind3.value[4] }}

{%- endif %}
dataset_name3 = "{{dataset_name3.value}}"

In [None]:
%%appyter code_exec
rnaseq_data_filename4 = ""
mtx_data_filename4 = ""
gene_data_filename4 = ""
barcode_data_filename4 = ""

{%- if file_kind4.raw_value == 'Plain text' %}
rnaseq_data_filename4 = {{ file_kind4.value[0] }}
meta_data_filename4 = {{ file_kind4.value[1] }}
meta_class_column_name4 = {{ file_kind4.value[2] }}

{%- elif file_kind4.raw_value == '.mtx from 10x Genomics' %}
mtx_data_filename4 = {{ file_kind4.value[0] }}
gene_data_filename4 = {{ file_kind4.value[1] }}
barcode_data_filename4 = {{ file_kind4.value[2] }}

meta_data_filename4 = {{ file_kind4.value[3] }}
meta_class_column_name4 = {{ file_kind4.value[4] }}

{%- endif %}
dataset_name4 = "{{dataset_name4.value}}"

In [None]:
%%appyter code_exec
{%- if libraries_tab.raw_value == 'Yes' %}
enrichr_libraries_filename = []
enrichr_libraries = {{ libraries_tab.value[0] }}

{%- else %}
enrichr_libraries_filename = {{ libraries_tab.value[0] }}
enrichr_libraries = []
{%- endif %}
libraries_tab = "{{libraries_tab.raw_value}}"

In [None]:
%%appyter code_exec
interactive_plot = {{interactive_plot.value}}
qc_filter_genes = {{qc_filter_genes.value}}
qc_threshold = {{qc_threshold.value}}
qc_filter_doublets = {{qc_filter_doublets.value}}
log_normalization = {{log_normalization.value}}
normalization_method = "{{normalization_method.value}}"
magic = {{magic.value}}

highest_expr_n_genes = {{highest_expr_n_genes.value}}
clustering_topk = {{clustering_topk.value}}

deg = {{deg.value}}
integration_option = {{integration_option.value}}
enrichment_groupby = "{{enrichment_groupby.value}}"
diff_gex_method = "{{diff_gex_method.value}}"
gene_topk = {{gene_topk.value}}
nr_genesets = {{nr_genesets.value}}

trajectory = {{trajectory.value}}
trajectory_method = "{{trajectory_method.value}}"

time_series_trajectory = {{time_series_trajectory.value}}
timepoint_labels_column_name = "{{timepoint_labels_column_name.value}}"


In [None]:
warnings.filterwarnings('ignore')
random.seed(0)
pandas2ri.activate()
if interactive_plot == True:
    plot_type='interactive'
else:
    plot_type = 'static'
    
results = {}
table_counter = 1
figure_counter = 1
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80)
sc.settings.figdir="./"
pd.options.plotting.backend = "plotly"

# Load datasets

In [None]:
adata_list = list()
adata_adt_list = list()

adata1, table_counter, meta_class_column_name = load_data(dataset_name1, rnaseq_data_filename1, mtx_data_filename1, gene_data_filename1, barcode_data_filename1, meta_data_filename1, meta_class_column_name1, table_counter)
adata2, table_counter, _ = load_data(dataset_name2, rnaseq_data_filename2, mtx_data_filename2, gene_data_filename2, barcode_data_filename2, meta_data_filename2, meta_class_column_name2, table_counter)
adata3, table_counter, _ = load_data(dataset_name3, rnaseq_data_filename3, mtx_data_filename3, gene_data_filename3, barcode_data_filename3, meta_data_filename3, meta_class_column_name3, table_counter)
adata4, table_counter, _ = load_data(dataset_name4, rnaseq_data_filename4, mtx_data_filename4, gene_data_filename4, barcode_data_filename4, meta_data_filename4, meta_class_column_name4, table_counter)

# check options are valid
if deg == True and enrichment_groupby == 'user_defined_class':
    for meta_data_filename, adata in zip([meta_data_filename1, meta_data_filename2, meta_data_filename3, meta_data_filename4], [adata1, adata2, adata3, adata4]):
        if adata is not None and meta_data_filename == "":
            raise Exception("Error: If you want to perform enrichment analysis between classes, please upload metadata with class annotations for samples. Please refer to the example file. If not, please set the 'Group for differentially expressed gene analysis' option as Cluster.")

# common genes
gene_set = set()
for adata in [adata1, adata2, adata3, adata4]:
    if adata is not None:
        if len(gene_set) == 0:
            gene_set.update(set(adata.var.index.values))
        else:
            gene_set = gene_set.intersection(set(adata.var.index.values))

for adata in [adata1, adata2, adata3, adata4]:
    if adata is not None:
        adata = adata[:, list(gene_set)]
        adata_list.append(adata)


In [None]:
%%appyter code_exec
{%if qc_filter_genes.value == True %}
for adata in adata_list:
    sc.pp.filter_genes(adata, min_cells=3)
    sc.pp.filter_cells(adata, min_genes=200)
    display_statistics(adata, "### Statistics of data ###") 
{% endif %}

In [None]:
if qc_filter_doublets == True:
    display(Markdown("# Predict Doublet Scores"))
    display(Markdown("Single cell RNA-seq often generates technical artifacts known as doublets where multiple cells receive the same barcode. The appyter identifies problemtic doublets by using Scrublet (Wolock, Samuel L., et al. 2019) and filters out predicted doublets. Scrublet predicts doublets using a nearest-neighbor classifier trained on observed transcriptome and simulated doublets. "))

In [None]:
if qc_filter_doublets == True:
    new_adata_list = list()
    for adata in adata_list:
        sc.external.pp.scrublet(adata)
        selected_pred = adata.obs[adata.obs["predicted_doublet"]==False]
        adata = adata[selected_pred.index]
        ds_name = adata.obs["batch"].unique()[0]
        display_statistics(adata, f"### Statistics of RNA data in {ds_name}###")
        new_adata_list.append(adata)
    adata_list = new_adata_list

In [None]:
for adata in adata_list:
    if adata.n_obs < 20000:
        table_counter = display_object(table_counter, "Raw data. The table displays the first 5 rows of the quantified RNA-seq expression dataset. Rows represent genes, columns represent samples, and values show the number of mapped reads.", adata.to_df().iloc[:10,:5].T.head(), istable=True)
        table_counter = display_object(table_counter, "Metadata. The table displays the metadata associated with the samples in the RNA-seq dataset. Rows represent RNA-seq samples, columns represent metadata categories.", adata.obs.head(), istable=True)
        table_counter = display_object(table_counter, "Sample size for each class. The table displays the number of samples in each class.", adata.obs.reset_index().groupby(meta_class_column_name).count(), istable=True)

In [None]:
adata_merged = anndata.concat(adata_list)

In [None]:
%%appyter markdown
{%if qc_filter_genes.value == True %}
# Filter Out Cells Based On Mitochondrial Genes
High expression levels of mitochondrial genes could be an indicator of poor quality cells (Islam, Saiful, et al. 2014, Ilicic, Tomislav, et al. 2016). In a situation where the cell membrane is broken, cytoplasmic RNA will be lost, but RNAs enclosed in the mitochondria will be retained. This analysis removes single cells that are likely having the cell membrane broken.
{% endif %}

In [None]:
%%appyter code_exec
{%if qc_filter_genes.value == True %}
if adata_merged.var_names[0].startswith("ENSG") == False:
    mito_genes = adata_merged.var_names.str.startswith('MT-')    
else:
    gene_symbol_var_names = adata_merged.var_names
    mito_genes = [True if x in gene_id_map_dict and str(gene_id_map_dict[x]).startswith('MT-') else False for x in gene_symbol_var_names ]

# for each cell compute fraction of counts in mito genes vs. all genes
adata_merged.obs['percent_mito'] = np.sum(
    adata_merged[:, mito_genes].X, axis=1) / np.sum(adata_merged.X, axis=1)

sc.pl.violin(adata_merged, ['percent_mito'],
         jitter=0.4, multi_panel=True, show=True, save=True)
figure_counter = display_object(figure_counter, "Violin plot of the percentage of mitochondrial gene expression counts in each cell", istable=False)
display_link("violin.pdf", "Download figure")
adata_merged = adata_merged[adata_merged.obs.percent_mito < qc_threshold, :]
display_statistics(adata_merged, "### Statistics of data after QC ###")
{% endif %}

In [None]:
adata_merged.raw = adata_merged

# Library Size Analysis

In order to quantify gene expression in an RNA-seq dataset, reads generated from the sequencing step are mapped to a reference genome and subsequently aggregated into numeric gene counts. Due to experimental variations and random technical noise, samples in an RNA-seq dataset often have variable amounts of the total RNA. Library size analysis calculates and displays the total number of reads mapped for each sample in the RNA-seq dataset, facilitating the identification of outlying samples and the assessment of the overall quality of the data.

In [None]:
adata_merged.obs['reads'] = adata_merged.X.sum(axis=1)
library_sum_df = pd.DataFrame(adata_merged.obs['reads'])

fig = px.histogram(library_sum_df, x='reads')
fig.update_yaxes(title="samples/cells")
if plot_type == "static":
    fig.show(renderer="png")
else:
    fig.show()
    
figure_counter = display_object(figure_counter, "Histogram of the total number of reads mapped for each sample. The figure contains an interactive bar chart which displays the number of samples according to the total number of reads mapped to each RNA-seq sample in the dataset. Additional information for each sample is available by hovering over the bars.", istable=False)


In [None]:
%%appyter markdown
{% if normalization_method.value != "None" %}
# Normalization
Various normalization methods can be applied based on the selection made by the user (Zheng, Grace XY, et al. 2017, Weinreb, Caleb, et al. 2018, Butler, Andrew, et al. 2018). These normalization methods convert the raw read counts into standardized measures of gene expression by removing factors that may negatively affect the analysis.
{% endif %}

In [None]:
%%appyter code_exec
{% if normalization_method.value != "None" %}
adata_norm = normalize(adata_merged, normalization_method, log_normalization)
{% endif %}

{% if magic.value == True %}    
adata_norm = run_magic(dataset=adata_norm)
table_counter = display_object(table_counter, "Normalized data. The table displays the expression values after normalization.", adata_norm.uns["magic"].T.head(), istable=True)
display_statistics(adata_norm, "### Statistics of data after normalization ###")

{% elif normalization_method.value != "None" %}
table_counter = display_object(table_counter, "Normalized data. The table displays the expression values after normalization.", adata_norm.to_df().T.head(), istable=True)
display_statistics(adata_norm, "### Statistics of data after normalization ###")

{% endif %}

# Most Highly Expressed Genes Across All samples/Cells

This analysis displays the genes that yield the highest fraction of counts in each single cell, across all cells.

In [None]:
df = adata_norm.to_df()
df = df.divide(df.sum(axis=1), axis=0)
mean_df = df.mean().sort_values(ascending=False)
yaxis_gene_names = list(mean_df.index)[:highest_expr_n_genes]
median_df = df.median().sort_values(ascending=False)
selected_median_df = median_df.loc[yaxis_gene_names]
sorted_yaxis_gene_names = list(selected_median_df.sort_values(ascending=False).index)
sc.pl.highest_expr_genes(adata_norm, log=True, n_top=highest_expr_n_genes, order=sorted_yaxis_gene_names, show=True, save=True)
figure_counter = display_object(figure_counter, "The {} genes with the highest mean fraction over all cells sorted by median.".format(highest_expr_n_genes), istable=False)
display_link("highest_expr_genes.pdf", "Download figure")    

# Clustergrammer

Clustergrammer (Fernandez, Nicolas F., et al. 2017) is a web-based tool for visualizing and analyzing high-dimensional data as interactive and hierarchically clustered heatmaps. It is commonly used to explore the similarity between samples in an RNA-seq dataset. In addition to identifying clusters of samples, it also allows to identify the genes which contribute to the clustering. It visualizes the top 800 genes with largest variance by default or you can upload your gene list. 

In [None]:
# Run analysis
results['clustergrammer'] = run_clustergrammer(dataset=adata_norm, meta_class_column_name=meta_class_column_name, nr_genes=clustering_topk, gene_list=gene_list_for_clustergrammer)

# Display results
plot_clustergrammar(results['clustergrammer'])
figure_counter = display_object(figure_counter, "Clustered heatmap plot. The figure contains an interactive heatmap displaying gene expression for each sample in the RNA-seq dataset. Every row of the heatmap represents a gene, every column represents a sample, and every cell displays normalized gene expression values. The heatmap additionally features color bars beside each column which represent prior knowledge of each sample, such as the tissue of origin or experimental treatment.".format(highest_expr_n_genes), istable=False)
 

# Differential Gene Expression Analysis
In the following section, the appyter computes differentially expressed genes for each cluster and performs enrichment analysis using the top genes. You can select analysis options such as intra dataset, integrated dataset, and inter dataset. Option “intra dataset” will analyze the data within each dataset. This will be useful when you want to analyze datasets from different conditions separately and see the difference between them. Option “integrated dataset” will analyze the integrated data. It will be useful when you have multiple datasets from the same condition. Option “inter dataset” will analyze the difference between datasets. It will be similar with bulk RNA-seq analysis.<br>
- The appyter clusters samples using the Leiden algorithm (Traag, Vincent A., et al. 2019) to identify well-connected clusters in networks. In this procedure, the appyter takes into account both modalities of the data by integrating connectivity graphs generated from each modality (ref https://scanpy-tutorials.readthedocs.io/en/multiomics/cite-seq/pbmc5k.html#Clustering). It visualizes the samples colored by clusters. <br>
- Gene expression signatures are alterations in the patterns of gene expression that occur as a result of cellular perturbations such as drug treatments, gene knock-downs or diseases. They can be quantified using differential gene expression (DGE) methods (Ritchie, Matthew E., et al. 2015, Clark, Neil R., et al. 2014), which compare gene expression between two groups of samples to identify genes whose expression is significantly altered in the perturbation.
<br>
- Enrichment analysis is a bioinformatics method used to identify prior knowledge terms which are over-represented in a given gene set by comparing the gene set to many annotated gene sets. The prior-knowledge gene sets can represent cell signaling pathways, molecular functions, diseases, and a wide variety of other terms obtained by processing data from multiple resources. The appyter allows you to input user-created gene set libraries or to select Enrichr libraries (Kuleshov, M.V., et al. 2016) which is a web-based application that performs enrichment analysis against a large collection of gene-set libraries. The appyter performs enrichment analysis for each cluster and visualizes the top enriched term for each cluster. <br>
- The appyter summarizes the enrichment analysis results by showing the number of samples for each enriched term. <br>



In [None]:
adata_object_dict = dict()
bool_plot = True
if "intra-dataset" in integration_option or len(adata_list) == 1:
    datasets = adata_norm.obs["batch"].unique()
    for dataset in datasets:
        display(Markdown(f"### Analysis results of {dataset}"))
        tmp_adata_norm = adata_norm[adata_norm.obs["batch"]==dataset].copy()
        sc.pp.neighbors(tmp_adata_norm, n_neighbors=30)
        
        # clustering
        display(Markdown(f"#### Clustering of {dataset}"))
        tmp_adata, figure_counter = clustering(tmp_adata_norm, dataset, bool_plot=bool_plot, figure_counter=figure_counter, batch_correction=False)
        
        
        # differential_gene_expression_analysis
        display(Markdown(f"#### Differential Gene Expression Analysis of {dataset}"))
        signatures, bool_cluster, table_counter = differential_gene_expression_analysis(tmp_adata, diff_gex_method, enrichment_groupby, meta_class_column_name, table_counter)

        # enrichment analysis
        display(Markdown(f"#### Enrichment Analysis of {dataset}"))
        tmp_adata, option_list, figure_counter, table_counter = visualize_enrichment_analysis(tmp_adata, signatures, meta_class_column_name, diff_gex_method, enrichr_libraries_filename, enrichr_libraries, enrichment_groupby, libraries_tab, gene_topk, bool_cluster, bool_plot, figure_counter, table_counter)
        
        # summary        
        display(Markdown(f"#### Summary of Enrichment Analysis of {dataset}"))
        option_list.append('leiden')
        table_counter = summary(tmp_adata, option_list, table_counter)
        
        adata_object_dict[dataset] = tmp_adata

In [None]:
if "integrated-dataset" in integration_option and len(adata_list) > 1:
    display(Markdown(f"### Analysis Results of Integrated Dataset"))
    
    sc.pp.neighbors(adata_norm, n_neighbors=30)
    # clustering
    display(Markdown(f"#### Clustering"))
    tmp_adata, figure_counter = clustering(adata_norm, dataset="integrated", bool_plot=bool_plot, figure_counter=figure_counter, batch_correction=True)

    # differential_gene_expression_analysis
    display(Markdown(f"#### Differential Gene Expression Analysis"))
    signatures, bool_cluster, table_counter = differential_gene_expression_analysis(tmp_adata, diff_gex_method, enrichment_groupby, meta_class_column_name, table_counter)

    # enrichment analysis
    display(Markdown(f"#### Enrichment Analysis"))
    tmp_adata, option_list, figure_counter, table_counter = visualize_enrichment_analysis(tmp_adata, signatures, meta_class_column_name, diff_gex_method, enrichr_libraries_filename, enrichr_libraries, enrichment_groupby, libraries_tab, gene_topk, bool_cluster, bool_plot, figure_counter, table_counter)

    # summary
    display(Markdown(f"#### Summary of Enrichment Analysis"))
    option_list.append('leiden')
    table_counter = summary(tmp_adata, option_list, table_counter)
        
    adata_object_dict["Integrated Dataset"] = tmp_adata

In [None]:
if "inter-dataset" in integration_option and len(adata_list) > 1:
    display(Markdown(f"### Analysis Results of Inter Dataset"))
    bool_plot=True
    # differential_gene_expression_analysis
    display(Markdown(f"#### Differential Gene Expression Analysis"))
    signatures, bool_cluster, table_counter = differential_gene_expression_analysis(adata_norm, diff_gex_method, "batch", table_counter)

    # enrichment analysis
    display(Markdown(f"#### Enrichment Analysis"))
    joint, option_list, figure_counter, table_counter = visualize_enrichment_analysis(adata_norm, signatures, meta_class_column_name, diff_gex_method, enrichr_libraries_filename, enrichr_libraries, "batch", libraries_tab, gene_topk, bool_cluster, bool_plot, figure_counter, table_counter)
    
    # summary
    display(Markdown(f"#### Summary of Enrichment Analysis"))
    option_list.append('batch')
    table_counter = summary(adata_norm, option_list, table_counter)
    adata_object_dict["Inter_Dataset"] = adata_norm

In [None]:
%%appyter markdown
{% if trajectory.value == True %}
# Trajectory Inference
Trajectory inference is a computational technique used in single-cell transcriptomics to arrange cells based on their progression through the differentiation process. It orders single cells in pseudotime, placing them along a trajectory corresponding to a biological process such as cell differentiation by taking advantage of single cells’ asynchronous progression through those processes. Diffusion pseudotime (DPT) (Haghverdi, Laleh, et al. 2016), which measures transitions between cells using diffusion-like random walks is one of the methods used to identify such trajectories. Monocle (Trapnell, Cole, et al. 2014) is a method for ordering cells by learning an explicit principal graph from the single cell expression data with advanced machine learning techniques (Reversed Graph Embedding), which robustly and accurately resolves complicated biological processes.
{% endif %}

In [None]:
# trajectory
if trajectory == True:
    for dataset, tmp_adata in adata_object_dict.items():
        if dataset != "Inter-dataset":
            tmp_adata, figure_counter = trajectory_inference(tmp_adata, trajectory_method, figure_counter)
    

In [None]:
%%appyter markdown
{% if time_series_trajectory.value == True %}
# Time-series Trajectory Analysis
When time-series data is available, it is possible to order cells by their progression through a dynamic
process using time information. Tempora (Tran, Thinh N., and Gary Bader 2016), a pathway-based cell trajectory inference method that orders cells using time information from the data, infers developmental linages based on biological pathway information. This time-series based analysis can generate insights into dynamic processes and their biological regulation. 
{% endif %}

In [None]:
if time_series_trajectory == True:
    for dataset, tmp_adata in adata_object_dict.items():
        if dataset != "Inter-dataset":
            figure_counter = time_series_trajectory_inference(tmp_adata, timepoint_labels_column_name, timepoint_labels)

# Download Analysis Results Table

In [None]:
for name, obj in adata_object_dict.items():
    try: 
        obj.uns.pop("magic")
    except:
        pass
    display(create_download_link(obj.obs, filename=f"analysis_results_{name}.csv"))
    display(create_download_link(obj, title="Download h5ad file: {}", filename=f"data_{name}.h5ad"))

# References 

Ashburner, M., Ball, C.A., Blake, J.A., Botstein, D., Butler, H., Cherry, J.M., Davis, A.P., Dolinski, K., Dwight, S.S. and Eppig, J.T. (2000) Gene Ontology: tool for the unification of biology. Nature genetics, 25, 25.
<br>
Butler, Andrew, et al. "Integrating single-cell transcriptomic data across different conditions, technologies, and species." Nature biotechnology 36.5 (2018): 411-420.
<br>
Clark, N.R. and Ma’ayan, A. (2011) Introduction to statistical methods to analyze large data sets: principal components analysis. Sci. Signal., 4, tr3-tr3.
<br>
Clark, Neil R., et al. "The characteristic direction: a geometrical approach to identify differentially expressed genes." BMC bioinformatics 15.1 (2014): 79.
<br>
Consortium, E.P. (2004) The ENCODE (ENCyclopedia of DNA elements) project. Science, 306, 636-640.
<br>
Croft, David, et al. "The Reactome pathway knowledgebase." Nucleic acids research 42.D1 (2014): D472-D477.
<br>
Fernandez, Nicolas F., et al. "Clustergrammer, a web-based heatmap visualization and analysis tool for high-dimensional biological data." Scientific data 4 (2017): 170151.
<br>
Haghverdi, Laleh, et al. "Diffusion pseudotime robustly reconstructs lineage branching." Nature methods 13.10 (2016): 845.
<br>
Ilicic, Tomislav, et al. "Classification of low quality cells from single-cell RNA-seq data." Genome biology 17.1 (2016): 29.
<br>
Islam, Saiful, et al. "Quantitative single-cell RNA-seq with unique molecular identifiers." Nature methods 11.2 (2014): 163.
<br>
Kanehisa, M. and Goto, S. (2000) KEGG: kyoto encyclopedia of genes and genomes. Nucleic acids research, 28, 27-30.
<br>
Kuleshov, M.V., Jones, M.R., Rouillard, A.D., Fernandez, N.F., Duan, Q., Wang, Z., Koplev, S., Jenkins, S.L., Jagodnik, K.M. and Lachmann, A. (2016) Enrichr: a comprehensive gene set enrichment analysis web server 2016 update. Nucleic acids research, 44, W90-W97.
<br>
Lachmann, A., Xu, H., Krishnan, J., Berger, S.I., Mazloom, A.R. and Ma'ayan, A. (2010) ChEA: transcription factor regulation inferred from integrating genome-wide ChIP-X experiments. Bioinformatics, 26, 2438-2444.
<br>
Lachmann, Alexander, and Avi Ma'ayan. "KEA: kinase enrichment analysis." Bioinformatics 25.5 (2009): 684-686.
<br>
Maaten, Laurens van der, and Geoffrey Hinton. "Visualizing data using t-SNE." Journal of machine learning research 9.Nov (2008): 2579-2605.
<br>
McInnes, Leland, John Healy, and James Melville. "Umap: Uniform manifold approximation and projection for dimension reduction." arXiv preprint arXiv:1802.03426 (2018).
<br>
Ritchie, Matthew E., et al. "limma powers differential expression analyses for RNA-sequencing and microarray studies." Nucleic acids research 43.7 (2015): e47-e47.
<br>
Traag, Vincent A., Ludo Waltman, and Nees Jan van Eck. "From Louvain to Leiden: guaranteeing well-connected communities." Scientific reports 9.1 (2019): 1-12.
<br>
Tran, Thinh N., and Gary Bader. "Tempora: cell trajectory inference using time-series single-cell RNA sequencing data." bioRxiv (2019): 846907.
<br>
Trapnell, Cole, et al. "The dynamics and regulators of cell fate decisions are revealed by pseudotemporal ordering of single cells." Nature biotechnology 32.4 (2014): 381.
<br>
van Dijk, D., Nainys, J., Sharma, R., Kathail, P., Carr, A.J., Moon, K.R., Mazutis, L., Wolf, G., Krishnaswamy, S. and Pe'er, D. (2017) MAGIC: A diffusion-based imputation method reveals gene-gene interactions in single-cell RNA-sequencing data. BioRxiv, 111591.
<br>
Weinreb, Caleb, et al. "Fundamental limits on dynamic inference from single-cell snapshots." Proceedings of the National Academy of Sciences 115.10 (2018): E2467-E2476.
<br>
Zheng, Grace XY, et al. "Massively parallel digital transcriptional profiling of single cells." Nature communications 8.1 (2017): 1-12.