In [1]:
import numpy
import anndata
import os
import pandas
import scipy
import scanpy
import upsetplot
from matplotlib import pyplot
from pathlib import Path
import upsetplot
import sys

from common import (
    scanpy_load_solo_mtx, 
    scanpy_load_alevin_mtx, 
    scanpy_load_kallisto_gene_mtx, 
    build_anndata,
    plot_retained_barcodes,
    load_barcode_list,
)

In [2]:
%matplotlib inline

In [3]:
LRSC = Path('~/proj/long-rna-seq-condor').expanduser()
if str(LRSC) not in sys.path:
    sys.path.append(str(LRSC))

from woldrnaseq.plot_genes_detected import (
    bin_library_quantification,
    plot_gene_detection_histogram,
)

Paper 10x filtering

UMI counts from CellRanger were filtered first, where cells with fewer than 1,000 genes detected and genes detected in less than 0.1% of cells were removed. Within each cell, counts were divided by the sum and multiplied by 10,000, added to 1, and log-transformed. The top 4,000 high-dispersion genes were identified. To remove noise (https://github.com/brianpenghe/python-genomics), we first performed hierarchical clustering for these genes and then extracted genes that fell in ‘tight’ clusters (those with more than two members after cutting the dendrogram at 0.8 distance), removing a large number of sporadic genes which had high dispersion scores but were barely co-expressed with other genes. These genes were used in place of ‘highly-variable genes’ for the Seurat pipeline. Using the Seurat pipeline, cells with more than 20% mitochondria reads or more than 8,000 genes detected were removed. Genes were regressed against the number of UMIs per cell and mitochondria percentage and scaled. The resulting matrix, guided by the aforementioned feature genes, was used to perform PCA. Jackstraw was then performed using Seurat’s default settings, resulting in 42 significant PCs. These PCs were in turn used for Louvain cell clustering and t-SNE visualization. Clusters 3, 4, 5, 6, 8, 12 and 13 were further re-clustered using the same method, yielding clusters 17–24.

In [4]:
analysis_root = Path('10x_paper')

In [5]:
sausage = ['10x-8','10x-10','10x-11']
runs = ['10x-1','10x-3','10x-4','10x-5','10x-6','10x-7','10x-12','10x-13']


In [6]:
def plot_counts_umi(matrices):
    f = pyplot.figure(figsize=(10,6))
    ax = f.add_subplot(1,1,1)

    alpha = 0.8
    gridalpha = 0.5
    fontsize = 12

    for label in matrices:
        matrix = matrices[label]
        umi = numpy.sort(numpy.array(matrix.obs['counts']), axis=None)[::-1]
        ax.plot(umi,  range(matrix.shape[0]), linewidth=2, alpha=alpha,
                #c=colors['kallisto_em'],
                label=label)

    ax.set_xlabel('Cumulative nI did old alevin 1.2 with and with vs kallisto on our 10x datasetsumber of barcodes', fontsize=fontsize)
    ax.set_ylabel('count of reported UMIs', fontsize=fontsize)
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_xlim(1,1e5)
    ax.set_ylim(1,1e6)
    ax.grid(
        color="dimgrey", linestyle="-", linewidth=0.5, which="both", alpha=gridalpha
    )

    ax.legend()


In [7]:
def filter_combined_matrix(matrix, min_genes_per_cell=1000, min_cells_per_gene_fraction=.001):
    matrix.obs['counts'] = matrix.X.sum(axis=1)
    matrix.obs['ngenes'] = numpy.array((matrix.X > 0).sum(axis=1))
    matrix.varm['expressed_in'] = numpy.array((matrix.X > 0).sum(axis=0)).T

    min_cells_per_gene = matrix.shape[0] * min_cells_per_gene_fraction
    return matrix[matrix.obs['ngenes'] >= min_genes_per_cell, matrix.varm['expressed_in'] >= min_cells_per_gene]

In [8]:
def load_kallisto(root):
    kallisto = []
    for r in runs:
        analysis_dir = root/r
        filtered_list = root/r/'filtered-barcodes.txt'
        matrix = scanpy_load_kallisto_gene_mtx(analysis_dir/'genecount', filter_file=filtered_list)
        matrix.obs_names = ["{}-{}".format(r, x) for x in matrix.obs_names]
        kallisto.append(matrix)
    k_all = anndata.concat(kallisto)
    print('Full matrix size', k_all)
    kallisto = []
    return k_all
    
def load_filtered_kallisto_matrix(root):
    k_all = load_kallisto(root)
    reduced = filter_combined_matrix(k_all)
    print('Reduced matrix size', reduced)


In [9]:
def load_alevin(root):
    alevin = []
    for r in runs:
        analysis_dir = root/r
        #filtered_list = root/r/'filtered-barcodes.txt'
        matrix = scanpy_load_alevin_mtx(analysis_dir)
        matrix.obs_names = ["{}-{}".format(r, x) for x in matrix.obs_names]
        print(r, 'matrix', matrix)
        alevin.append(matrix)
    matrix = anndata.concat(alevin)
    print('Full matrix size', matrix)
    alevin = []
    return matrix
    
def load_filtered_alevin_matrix(root):
    matrix = load_alevin(root)
    reduced = filter_combined_matrix(matrix)
    print('Filtered matrix size', reduced)

# Load Kallisto EM minimal

In [10]:
kallisto_em_minimal = load_kallisto(analysis_root / 'kallisto_em_minimal')

Full matrix size AnnData object with n_obs × n_vars = 91718 × 31635
    obs: 'counts', 'ngenes'


In [11]:
kallisto_em_minimal.write_h5ad(analysis_root / 'kallisto_em_minimal.h5ad')

# Load Alevin Minimal

In [12]:
alevin_minimal = load_alevin(analysis_root / 'alevin_minimal')

10x-1 matrix AnnData object with n_obs × n_vars = 9673 × 31345
    obs: 'counts', 'ngenes'
10x-3 matrix AnnData object with n_obs × n_vars = 335 × 31345
    obs: 'counts', 'ngenes'
10x-4 matrix AnnData object with n_obs × n_vars = 8143 × 31345
    obs: 'counts', 'ngenes'
10x-5 matrix AnnData object with n_obs × n_vars = 12237 × 31345
    obs: 'counts', 'ngenes'
10x-6 matrix AnnData object with n_obs × n_vars = 846 × 31345
    obs: 'counts', 'ngenes'
10x-7 matrix AnnData object with n_obs × n_vars = 7302 × 31345
    obs: 'counts', 'ngenes'
10x-12 matrix AnnData object with n_obs × n_vars = 12457 × 31345
    obs: 'counts', 'ngenes'
10x-13 matrix AnnData object with n_obs × n_vars = 10552 × 31345
    obs: 'counts', 'ngenes'
Full matrix size AnnData object with n_obs × n_vars = 61545 × 31345
    obs: 'counts', 'ngenes'


In [13]:
alevin_minimal.write_h5ad(analysis_root / 'alevin_minimal.h5ad')

# Load Kallisto EM

In [14]:
kallisto_em = load_kallisto(analysis_root / 'kallisto_em')

Full matrix size AnnData object with n_obs × n_vars = 92005 × 81881
    obs: 'counts', 'ngenes'


In [15]:
kallisto_em.write_h5ad(analysis_root / 'kallisto_em.h5ad')

# Load Alevin

In [16]:
alevin = load_alevin(analysis_root / 'alevin')

10x-1 matrix AnnData object with n_obs × n_vars = 9673 × 72301
    obs: 'counts', 'ngenes'
10x-3 matrix AnnData object with n_obs × n_vars = 335 × 72301
    obs: 'counts', 'ngenes'
10x-4 matrix AnnData object with n_obs × n_vars = 8143 × 72301
    obs: 'counts', 'ngenes'
10x-5 matrix AnnData object with n_obs × n_vars = 12237 × 72301
    obs: 'counts', 'ngenes'
10x-6 matrix AnnData object with n_obs × n_vars = 846 × 72301
    obs: 'counts', 'ngenes'
10x-7 matrix AnnData object with n_obs × n_vars = 7302 × 72301
    obs: 'counts', 'ngenes'
10x-12 matrix AnnData object with n_obs × n_vars = 12457 × 72301
    obs: 'counts', 'ngenes'
10x-13 matrix AnnData object with n_obs × n_vars = 10552 × 72301
    obs: 'counts', 'ngenes'
Full matrix size AnnData object with n_obs × n_vars = 61545 × 72301
    obs: 'counts', 'ngenes'


In [17]:
alevin.write_h5ad(analysis_root / 'alevin.h5ad')

# Load Alevin Decoy Minimal

In [18]:
alevin_decoy_minimal = load_alevin(analysis_root / 'alevin_decoy_minimal')

10x-1 matrix AnnData object with n_obs × n_vars = 9673 × 31345
    obs: 'counts', 'ngenes'
10x-3 matrix AnnData object with n_obs × n_vars = 335 × 31345
    obs: 'counts', 'ngenes'
10x-4 matrix AnnData object with n_obs × n_vars = 8143 × 31345
    obs: 'counts', 'ngenes'
10x-5 matrix AnnData object with n_obs × n_vars = 12237 × 31345
    obs: 'counts', 'ngenes'
10x-6 matrix AnnData object with n_obs × n_vars = 846 × 31345
    obs: 'counts', 'ngenes'
10x-7 matrix AnnData object with n_obs × n_vars = 7302 × 31345
    obs: 'counts', 'ngenes'
10x-12 matrix AnnData object with n_obs × n_vars = 12457 × 31345
    obs: 'counts', 'ngenes'
10x-13 matrix AnnData object with n_obs × n_vars = 10552 × 31345
    obs: 'counts', 'ngenes'
Full matrix size AnnData object with n_obs × n_vars = 61545 × 31345
    obs: 'counts', 'ngenes'


In [19]:
alevin_decoy_minimal.write_h5ad(analysis_root / 'alevin_decoy_minimal.h5ad')

# Load Alevin Decoy

In [20]:
alevin_decoy = load_alevin(analysis_root / 'alevin_decoy')

10x-1 matrix AnnData object with n_obs × n_vars = 9673 × 72312
    obs: 'counts', 'ngenes'
10x-3 matrix AnnData object with n_obs × n_vars = 335 × 72312
    obs: 'counts', 'ngenes'
10x-4 matrix AnnData object with n_obs × n_vars = 8143 × 72312
    obs: 'counts', 'ngenes'
10x-5 matrix AnnData object with n_obs × n_vars = 12237 × 72312
    obs: 'counts', 'ngenes'
10x-6 matrix AnnData object with n_obs × n_vars = 846 × 72312
    obs: 'counts', 'ngenes'
10x-7 matrix AnnData object with n_obs × n_vars = 16 × 72312
    obs: 'counts', 'ngenes'
10x-12 matrix AnnData object with n_obs × n_vars = 12457 × 72312
    obs: 'counts', 'ngenes'
10x-13 matrix AnnData object with n_obs × n_vars = 10552 × 72312
    obs: 'counts', 'ngenes'
Full matrix size AnnData object with n_obs × n_vars = 54259 × 72312
    obs: 'counts', 'ngenes'


In [21]:
alevin_decoy.write_h5ad(analysis_root / 'alevin_decoy.h5ad')

# UMIs counts vs UMIs

In [22]:
datasets = {
    'Kallisto (EM) minimal': kallisto_em_minimal,
    'Alevin minimal': alevin_minimal,
    'Kallisto (EM)': kallisto_em,
    'Alevin': alevin,
    'Alevin decoy minimal': alevin_decoy_minimal,
    'Alevin decoy': alevin_decoy,
}

In [23]:
plot_counts_umi(datasets)

# Compute intersections of everything returned by algorithms

This isn't completely fair as we have minimal sets with a reduced index, and this version of alevin likes to skip writing empty rows. Below is a bit more fair comparison where a reasonable expression filter was applied.

In [24]:
upset_barcode_contents = upsetplot.from_contents({
    name: datasets[name].obs_names for name in datasets
})
f = pyplot.figure(dpi=100)
f.suptitle("Cell Barcodes")
_ = upsetplot.plot(upset_barcode_contents, fig=f, show_counts=True)

In [25]:
upset_gene_contents = upsetplot.from_contents({
    name: datasets[name].var_names for name in datasets
})
f = pyplot.figure(dpi=100)
f.suptitle('Genes')
_ = upsetplot.plot(upset_gene_contents, fig=f, show_counts=True)


# Compare intersections after applying filter

Alevin tries to saves disk space by not writing out the full data matrix, so the comparison above is a bit unfair, as some of the genes or cells that kallisto wrote out might just be empty.

So lets try filtering the matrix down like we did in our paper.

For a cell to remain it has to have at least 1,000 genes expressed.
For a gene to remain it must be expressed in at least 0.1% of the cells.

In [26]:
datasets_filtered = {
    name: filter_combined_matrix(datasets[name]) for name in datasets
}

In [27]:
upset_barcode_filtered = upsetplot.from_contents({
    name: datasets_filtered[name].obs_names for name in datasets_filtered
})
f = pyplot.figure(dpi=100)
f.suptitle("Cell Barcodes")
_ = upsetplot.plot(upset_barcode_filtered, fig=f, show_counts=True)

In [28]:
upset_gene_filtered = upsetplot.from_contents({
    name: datasets_filtered[name].var_names for name in datasets_filtered
})
f = pyplot.figure(dpi=100)
f.suptitle('Genes')
_ = upsetplot.plot(upset_gene_filtered, fig=f, show_counts=True)


Load in annotated GTF file so we can get to gene names.

In [29]:
mm10_store = pandas.HDFStore(Path('~/proj/genome/mm10-M21-male/mm10-M21-male.h5').expanduser())
mm10_gtf = mm10_store[list(mm10_store.keys())[0]]
mm10_store.close()
mm10_gtf['length'] = mm10_gtf['stop'] - mm10_gtf['start']
isoform_count = mm10_gtf[mm10_gtf['type'] == 'transcript'][['gene_id', 'transcript_id']].groupby('gene_id').count()['transcript_id'].to_dict()
mm10_gtf['isoform_count'] = mm10_gtf['gene_id'].apply(lambda x: isoform_count.get(x, 1))
mm10_gtf.head()

Unnamed: 0,chromosome,source,type,start,stop,score,strand,frame,gene_id,transcript_id,...,transcript_support_level,tag,havana_transcript,exon_number,exon_id,protein_id,ccdsid,ont,length,isoform_count
0,chr1,ENSEMBL,tRNA,112349389,112349461,,1,,896,896,...,,,,,,,,,72,1
1,chr1,ENSEMBL,tRNA,112576185,112576260,,-1,,897,897,...,,,,,,,,,75,1
2,chr1,ENSEMBL,tRNA,32624825,32624895,,1,,1275,1275,...,,,,,,,,,70,1
3,chr1,ENSEMBL,tRNA,167276215,167276287,,1,,1914,1914,...,,,,,,,,,72,1
4,chr1,ENSEMBL,tRNA,167323285,167323359,,1,,1915,1915,...,,,,,,,,,74,1


In [30]:
mm10_genes = mm10_gtf[mm10_gtf['type'].isin(('gene', 'tRNA', 'spikein'))]

Is there anything obviously interesting about the genes only detected by Kallisto?

In [31]:
annotated_kallisto_family_only = mm10_genes[mm10_genes['gene_id'].isin(upset_gene_filtered.loc[(True,False,True,False,False,False)]['id'])][['gene_id', 'gene_name', 'gene_type', 'length', 'isoform_count']]
annotated_kallisto_family_only

  annotated_kallisto_family_only = mm10_genes[mm10_genes['gene_id'].isin(upset_gene_filtered.loc[(True,False,True,False,False,False)]['id'])][['gene_id', 'gene_name', 'gene_type', 'length', 'isoform_count']]


Unnamed: 0,gene_id,gene_name,gene_type,length,isoform_count
26254,ENSMUSG00000051951.5,Xkr4,protein_coding,465597,3
26310,ENSMUSG00000102343.1,Gm37381,lincRNA,80476,2
26522,ENSMUSG00000104328.1,Gm37323,lincRNA,3123,1
26930,ENSMUSG00000085623.1,Gm16041,antisense,5963,1
27939,ENSMUSG00000103067.1,Gm30414,lincRNA,20181,1
...,...,...,...,...,...
1864586,ENSMUSG00000055357.8,4933400A11Rik,protein_coding,3378,3
1864622,ENSMUSG00000087263.1,Gm15726,antisense,6185,1
1864768,ENSMUSG00000069053.11,Uba1y,protein_coding,25575,2
1864885,ENSMUSG00000101107.1,Gm28587,lincRNA,3600,1


In [32]:
c1_root = Path('c1_pseudo')
c1_counts = scanpy.read_h5ad(c1_root / 'rsem_gene_counts.h5ad')

(Double check we have the right annotation on the C1 file. it should have the same 81,881 genes as the kallisto matrix

In [33]:
numpy.intersect1d(c1_counts.var_names, mm10_gtf['gene_id']).shape

(81881,)

In [34]:
c1_filtered = filter_combined_matrix(c1_counts)

## Compare 10x kallisto and alevin genes detected to C1 RSEM

In [35]:
upset_gene_var_names = {
    name: datasets_filtered[name].var_names for name in datasets_filtered
}
upset_gene_var_names['C1 filtered'] = c1_filtered.var_names
upset_gene_filtered = upsetplot.from_contents(upset_gene_var_names)
f = pyplot.figure(dpi=100)
f.suptitle('Genes')
_ = upsetplot.plot(upset_gene_filtered, fig=f, show_counts=True)


So what can we tell from that ugly plot.

there's 2,652 genes that are shared among the non-minimal sets... which is fine It makes sense we're ignoring some genes when using a the smaller index.

In the upset plot without the C1 data there's 2,944 genes that are specific to kallisto & kallisto EM. It looks like the majority of those are shared with C1 at 2,321 in common. There were 3,138 in the Kallisto (EM) set and here 2,334 of those are shared with the C1.

There's still 24,412 genes that are specific to the C1 matrix.

It's worth while noticing there's no "Alevin decoy" / C1 only intersection.

In [36]:
paper = scanpy.read_h5ad("../C1_mouse_limb_combined/monocle/200120_10x.h5ad")

In [37]:
upset_paper_var_names = {}
for dataset_name in datasets_filtered:
    upset_paper_var_names[dataset_name] = [x.split('.')[0] for x in datasets_filtered[dataset_name].var_names]
#upset_paper_var_names['C1 filtered'] = [x.split('.')[0] for x in c1_filtered.var_names]
upset_paper_var_names['Cellranger (M4)'] = [x.split('.')[0] for x in paper.var['gene_id']]
upset_paper_filtered = upsetplot.from_contents(upset_paper_var_names)
f = pyplot.figure(dpi=100)
f.suptitle('Genes')
_ = upsetplot.plot(upset_paper_filtered, fig=f, show_counts=True)


In [38]:
paper.obs_names

Index(['limb12_13_0AAACCTGAGATCGATA_1', 'limb12_13_0AAACCTGAGATGAGAG_1',
       'limb12_13_0AAACCTGAGCAGATCG_1', 'limb12_13_0AAACCTGAGCGATCCC_1',
       'limb12_13_0AAACCTGAGTGTACCT_1', 'limb12_13_0AAACCTGAGTTGTAGA_1',
       'limb12_13_0AAACCTGCAAGTTAAG_1', 'limb12_13_0AAACCTGCAATCCGAT_1',
       'limb12_13_0AAACCTGCACAAGCCC_1', 'limb12_13_0AAACCTGCACGTAAGG_1',
       ...
       'limb8_15_0TTTGTCACAGCCTTTC_1', 'limb8_15_0TTTGTCACAGGCTCAC_1',
       'limb8_15_0TTTGTCACATATACGC_1', 'limb8_15_0TTTGTCAGTCCAGTAT_1',
       'limb8_15_0TTTGTCAGTCGAACAG_1', 'limb8_15_0TTTGTCAGTCGAGTTT_1',
       'limb8_15_0TTTGTCAGTTATTCTC_1', 'limb8_15_0TTTGTCAGTTCCGGCA_1',
       'limb8_15_0TTTGTCATCGTTGCCT_1', 'limb8_15_0TTTGTCATCTGTCTCG_1'],
      dtype='object', name='index', length=90637)

In [39]:
set([x[:-18] for x in paper.obs_names])

{'limb12_13_0',
 'limb13_14_0',
 'limb1_13_5',
 'limb3_11_0',
 'limb4_12_0',
 'limb5_13_0',
 'limb6_15_0',
 'limb7_10_5',
 'limb8_15_0'}

In [40]:
def translate_paper_barcode_names(obs_names):
    mapping = {
        'limb12_13_0': '10x-12-',
        'limb13_14_0': '10x-13-',
        'limb1_13_5': '10x-1-',
        'limb3_11_0': '10x-3-',
        'limb4_12_0': '10x-4-',
        'limb5_13_0': '10x-5-',
        'limb6_15_0': '10x-6-',
        'limb7_10_5': '10x-7-',
        'limb8_15_0': '10x-8-',
    }
    for name in obs_names:
        prefix = name[:-18]
        barcode = name[-18:-2]
        yield mapping[prefix] + barcode

In [41]:
list(translate_paper_barcode_names(['limb12_13_0AAACCTGCACAAGCCC_1', 'limb8_15_0TTTGTCATCGTTGCCT_1']))

['10x-12-AAACCTGCACAAGCCC', '10x-8-TTTGTCATCGTTGCCT']

In [42]:
upset_paper_obs_names = {}
for dataset_name in datasets_filtered:
    upset_paper_obs_names[dataset_name] = datasets_filtered[dataset_name].obs_names
upset_paper_obs_names['Paper'] = translate_paper_barcode_names(paper.obs_names)
upset_paper_barcodes_filtered = upsetplot.from_contents(upset_paper_obs_names)
f = pyplot.figure(dpi=100)
f.suptitle('Cells')
_ = upsetplot.plot(upset_paper_barcodes_filtered, fig=f, show_counts=True)


## Compare Alevin to C1

(With the light filtering to limit it to expressed genes)

In [43]:
f = pyplot.figure(dpi=100)
f.suptitle('Genes')
_ = upsetplot.plot(upsetplot.from_contents({
    'Alevin': datasets_filtered['Alevin'].var_names,
    'C1 filtered': c1_filtered.var_names,
}), fig=f, show_counts=True)


## Plot comparing 10x Kallisto (EM) to C1 filtered

In [44]:
f = pyplot.figure(dpi=100)
f.suptitle('Genes')
_ = upsetplot.plot(upsetplot.from_contents({
    'Kallisto (EM)': datasets_filtered['Kallisto (EM)'].var_names,
    'C1 filtered': c1_filtered.var_names,
}), fig=f, show_counts=True)


# What's the expression levels for 10x genes found by just kallisto?

In [45]:
def plot_detection(datasets, gene_ids, title):
    libraries = {}
    for name in datasets:
        d = datasets[name]
        common_genes = numpy.intersect1d(d.var_names, gene_ids)
        libraries[name] = d[:,common_genes].to_df().sum()
        #libraries[name] = array

    libraries = pandas.DataFrame(libraries)
    bins = bin_library_quantification(libraries, 'counts')
    f = plot_gene_detection_histogram(bins, title)
    return f

In [46]:
f = plot_detection(datasets, annotated_kallisto_family_only['gene_id'], 'Kallisto family passed expression threshold')

In [47]:
c1_and_kallisto = upset_gene_filtered.loc[False,False,True,False,False,False,True]

  c1_and_kallisto = upset_gene_filtered.loc[False,False,True,False,False,False,True]


In [48]:
f = plot_detection(datasets, c1_and_kallisto['id'], 'C1 & Kallisto EM')

In [49]:
f = plot_detection(datasets, mm10_genes['gene_id'], 'All genes')

In [50]:
datasets_scaled = {
    name: datasets_filtered[name].copy() for name in datasets_filtered
}

In [51]:
for k in datasets_scaled:
    scanpy.pp.scale(datasets_scaled[k])

In [52]:
f = plot_detection(datasets_scaled, mm10_genes['gene_id'], 'All genes scanpy scaled')

In [53]:
for k in datasets_scaled:
    scanpy.tl.pca(datasets_scaled[k], svd_solver='arpack')


In [54]:
scanpy.pl.pca(datasets_scaled['Kallisto (EM)'])

In [55]:
scanpy.pl.pca_variance_ratio(datasets_scaled['Kallisto (EM)'], log=True)

In [56]:
scanpy.pp.neighbors(datasets_scaled['Kallisto (EM)'])   #, n_neighbors=10, n_pcs=40)

In [57]:
scanpy.tl.umap(datasets_scaled['Kallisto (EM)'])

In [58]:
scanpy.tl.leiden(datasets_scaled['Kallisto (EM)'])

In [59]:
datasets_scaled['Kallisto (EM)']

AnnData object with n_obs × n_vars = 84512 × 25570
    obs: 'counts', 'ngenes', 'leiden'
    var: 'mean', 'std'
    uns: 'pca', 'neighbors', 'umap', 'leiden'
    obsm: 'X_pca', 'X_umap'
    varm: 'expressed_in', 'PCs'
    obsp: 'distances', 'connectivities'

In [60]:
scanpy.pl.umap(datasets_scaled['Kallisto (EM)'], color=['leiden'])