In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
%%appyter hide_code
{% do SectionField(
    name='PRIMARY',
    title='KEA3 Consensus Kinases',
    subtitle='This appyter returns consensus kinases using a set of gene sets',
    img='kea3.png'
) %}

In [None]:
%%appyter markdown

{% set title = StringField(
    name='title',
    label='Notebook name',
    default='KEA3 Consensus Kinases',
    section="PRIMARY",
    constraint='[^<>]*'
) %}

# {{ title.raw_value }}

In [None]:
import time
import requests
import pandas as pd
import json
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display, IFrame, Markdown
import math
import scipy.stats as st
import fastcluster
from sklearn.manifold import TSNE
import plotly.express as px
from umap import UMAP
from maayanlab_bioinformatics.normalization import quantile_normalize, zscore_normalize
from tqdm import tqdm
from matplotlib.patches import Patch


In [None]:
clustergrammer_url = 'https://maayanlab.cloud/clustergrammer/matrix_upload/'
API_URL = 'https://maayanlab.cloud/kea3/api/enrich/'
# libraries = ["ChEA_2016", "GO_Biological_Process_2018" ,"GWAS_Catalog_2019" , "KEGG_2019_Human"]
table = 1
figure = 1

## Get Input

In [None]:
%%appyter code_exec


{% set input_gene_set = FileField(
    name='input_gene_set',
    label='Gene Set',
    default='SARS-CoV-2_differentially_phosphorelated_proteins_down.gmt',
    section="PRIMARY",
    examples={
        'SARS-CoV-2_differentially_phosphorelated_proteins_down.gmt': 'https://appyters.maayanlab.cloud/storage/KEA3Consensus/SARS-CoV-2_differentially_phosphorelated_proteins_down.gmt',
        'SARS-CoV-2_differentially_phosphorelated_proteins_up.gmt': 'https://appyters.maayanlab.cloud/storage/KEA3Consensus/SARS-CoV-2_differentially_phosphorelated_proteins_up.gmt'
    }
) %}

{% set input_meta = FileField(
    name='input_meta',
    label='Metadata File',
    default='SARS-CoV-2_differentially_phosphorelated_proteins_meta.tsv',
    section="PRIMARY",
    examples={
        'SARS-CoV-2_differentially_phosphorelated_proteins_meta.tsv': 'https://appyters.maayanlab.cloud/storage/KEA3Consensus/SARS-CoV-2_differentially_phosphorelated_proteins_meta.tsv',
    }
) %}
{% set color_by =  StringField(name='group_by', label='Group By', description="Group By Metadata", default='', section='PRIMARY')%}

input_gene_set = {{ input_gene_set }}
input_meta = {{ input_meta }}
color_by = {{ color_by }}

In [None]:
enrichment = {}
input_sigs = {}
with open(input_gene_set) as o:
    for line in o:
        unpacked = line.strip().split("\t")
        if len(unpacked) == 1:
            raise ValueError("Line '%s' is either empty or not formatted properly. Please consult README for more information"%line)
        sigid = unpacked[0]
        geneset = [i for i in unpacked[1:] if len(i) > 0]
        input_sigs[sigid] = {
            "genes": ", ".join([i.split(",")[0] for i in geneset])
        }
        enrichment[sigid] = {
            "genes": [i.split(",")[0] for i in geneset]
        }

In [None]:
num_sigs = len(enrichment)
input_sigs = pd.DataFrame.from_dict(input_sigs, orient="index")
display(input_sigs.head(10))
display(Markdown("**Table %d** Input Signatures"%(table)), display_id="input_sigs")
table+=1

In [None]:
if input_meta == '':
    input_meta_df = None
    color_by = None
else:
    input_meta_df = pd.read_csv(input_meta, sep="\t", index_col=0)
    display(input_meta_df.head(10))
    display(Markdown("**Table %d** Input Meta"%(table)), display_id="input_meta")
    table+=1
    if color_by == '' or color_by not in input_meta_df.columns:
        color_by = input_meta_df.columns[0]

## User defined parameters

In [None]:
%%appyter code_exec
top_results = {{IntField(name='min_count', label='Top results', description="Number of top results to keep", default=25, section='PRIMARY')}}

tsne_plot = {{BoolField(name='tsne_plot', label="Show t-SNE plot", default=True, section="PRIMARY")}}
perplexity = {{IntField(name='perplexity', label='Perplexity', description="t-SNE perplexity", default=30, section='PRIMARY')}}

UMAP_plot = {{BoolField(name='UMAP_plot', label="Show UMAP plot", default=True, section="PRIMARY")}}
n_neighbors = {{IntField(name='n_neighbors', label='n_neighbors', description="UMAP's n_neighbors", default=15, section='PRIMARY')}}

random_state = {{IntField(name='random_state', label='Random State', description="Random State", default=21, section='PRIMARY')}}
normalization_method = '''{{ChoiceField(name="normalization_method",
                                     label="Normalization method",
                                     description="Normalization method to use",
                                     choices=[
                                         "quantile",
                                         "zscore",
                                         "none"
                                     ],
                                     default="quantile",
                                     section="PRIMARY"
                        )}}'''


heatmap_plot = {{BoolField(name='heatmap_plot', label="Show heatmap plot", default=True, section="PRIMARY")}}
clustergrammer_plot = {{BoolField(name='clustergrammer_plot', label="Show clustergrammer", default=True, section="PRIMARY")}}
stacked_plot = {{BoolField(name='stacked_plot', label="Show stacked bar chart", default=True, section="PRIMARY")}}

width = {{FloatField(name='width', label='image width', default=15, section='PRIMARY')}}
height = {{FloatField(name='height', label='image height', default=15, section='PRIMARY')}}
bbox_x = {{FloatField(name='bbox_x', label='image height', default=1.2, section='PRIMARY')}}
bbox_y = {{FloatField(name='bbox_y', label='image height', default=1.2, section='PRIMARY')}}



## Enrichment

In [None]:
# KEA3 Functions
def get_kea3_results(gene_set, query_name):
    payload = {
        'gene_set': gene_set,
        'query_name': query_name
    }
    response = requests.post(API_URL, data=json.dumps(payload))
    if not response.ok:
        raise Exception('Error analyzing gene list')
    time.sleep(1)
    return json.loads(response.text)

In [None]:
failed_list = []
for description, values in tqdm(enrichment.items()):
    genes = values["genes"]
    for tries in range(5):
        try:
            result = get_kea3_results(genes, description)
            enrichment[description] = result
            break
        except Exception as e:
            print(e)
            time.sleep(0.5)
    else:
        failed_list.append(description)
        continue
if len(failed_list):
    print("Failed to add %d list"%len(failed_list))

In [None]:
# DataFrame
def get_dataframe(results, method, table):
    method_renamed = method.replace("Integrated--", "").replace("Rank", " Rank").lower()
    df = pd.DataFrame(index=results.keys())
    for k,v in results.items():
        scores = v[method]
        for s in scores:
            tf = s['TF']
            score = float(s['Score'])
            if tf not in df:
                df[tf] = 0.0
            df.at[k, tf] = score
    df = df.transpose()
    df.to_csv("%s_df.tsv"%method, sep="\t")
    display(df.head(10))
    display(Markdown("**Table %d** The table below shows the %s of kinases for each of the \
        %d input gene sets [Download complete table](%s_df.tsv)"%(table, method_renamed, num_sigs, method))
    )
    table+=1
    return df, table

def top_ranked_scores(df, label=None, meta=True):
    if meta:
        for_consensus = df[input_meta_df[input_meta_df[color_by] == label].index] if not label == None else df
        sorted_values = (for_consensus.sum(1)/(for_consensus>0).sum(1)).sort_values()
    else:
        sorted_values = df[label].sort_values()
    
#     string_values = []
#     for i,v in sorted_values.items():
#         string_values.append("%s (%.2f)"%(i,v))
#         if len(string_values) == top_results: 
#             break
    return sorted_values
   
    
def get_consensus_kinases(df, method, table):
    columns = ["all"]
    kinases_ranked = pd.DataFrame("-", index=[i+1 for i in range(0,top_results)], columns=columns)
    sorted_values = top_ranked_scores(df)
    consensus_index = sorted_values.index[0:top_results]
    kinases_ranked['all'] = consensus_index
    consensus_index = set(consensus_index)
    if not input_meta == "":
        for label in input_meta_df[color_by].unique():
            s = top_ranked_scores(df, label)
            t = s.index[0:top_results]
            consensus_index = consensus_index.union(set(t))
            kinases_ranked[label] = t
    else:
        for label in df:
            s = top_ranked_scores(df, label, meta=False)
            t = s.index[0:top_results]
            consensus_index = consensus_index.union(set(t))
            kinases_ranked[label] = t
        
    consensus_index = sorted_values[list(consensus_index)].sort_values().index
    kinases_ranked.to_csv("%s_top_kinase.tsv"%method)
    display(kinases_ranked)
    display(Markdown("**Table %d** Top %d kinases ranked by the mean of %s scores \
        [Download complete list](%s_top_kinase.tsv)"%(table, top_results, method, method)))
    table +=1
    return kinases_ranked, consensus_index, sorted_values, table
    
    
def get_consensus(df, method, table):
    method_renamed = method.replace("Integrated--", "").replace("Rank", " Rank").lower()
    kinases_ranked, consensus_index, sorted_values, table = get_consensus_kinases(df, method_renamed, table)
    top_kinases = sorted_values.loc[consensus_index]
    consensus = df.loc[consensus_index]
    consensus.to_csv("%s_consensus_matrix.tsv"%method)
    display(consensus.head(10))
    if color_by == None:
        display(Markdown("**Table %d** Consensus matrix of top %d kinases ranked by the mean of %s scores (All Signatures) \
            [Download table](%s_consensus_matrix.tsv)"%(table, top_results, method_renamed, method)))
    else:
        display(Markdown("**Table %d** Consensus matrix of the top %d kinases of each %s ranked by the mean of %s scores \
            [Download table](%s_consensus_matrix.tsv)"%(table, top_results, color_by, method_renamed, method)))
    table +=1
    
    c = top_kinases.add(consensus.max(1), axis=0)
    csub = consensus.rsub(c, axis=0)
    
    
    minscore = sorted_values.min()
    maxscore = sorted_values.max()
    diff = maxscore - minscore
    if (method == "Integrated--topRank"):
            scaler=1-(top_kinases-minscore)
    else:
        scaler=-(top_kinases-maxscore)/diff

    consensus_scaled = csub.div(csub.sum(1), axis=0).mul(scaler, axis=0)
    consensus_scaled.to_csv("%s_consensus_scaled.tsv"%method)
    
    display(consensus_scaled.loc[consensus_index].head(10))
    if color_by == None:
        display(Markdown("**Table %d** Consensus matrix of top %d kinases ranked by the scaled mean of %s scores (All Signatures) \
            [Download table](%s_consensus_scaled.tsv)"%(table, top_results, method_renamed, method)))
    else:
        display(Markdown("**Table %d** Consensus matrix of the top %d kinases of each %s ranked by the scaled mean of %s scores \
            [Download table](%s_consensus_scaled.tsv)"%(table, top_results, color_by, method_renamed, method)))
    table +=1
    return consensus, consensus_scaled, table


In [None]:
# methods

def get_cmap(num, rev=False):
    if rev:
        return sns.cubehelix_palette(num,light=0, dark=1)
    return sns.cubehelix_palette(num)

def heatmap(df, method, figure, rev=False):
    vals = set()
    for col in df:
        vals = vals.union(df[col])
    cmap = get_cmap(len(vals), rev)
    method_renamed = method.replace("Integrated--", "").replace("Rank", " Rank").lower()
    
    col_colors = None
    if not input_meta == "":
        cols = sns.hls_palette(len(set(input_meta_df[color_by])), s=.45)
        lut = dict(zip(set(input_meta_df[color_by]), cols))
        columns = pd.Series('-', index=df.columns, name=color_by)
        col_colors = pd.Series('-', index=df.columns, name=color_by)
        for col in df.columns:
            category = input_meta_df.at[col, color_by]
            color = lut[category]
            col_colors[col] = lut[category]
            columns[col] = category
        new_columns = []
        for c in columns.groupby(columns).count().sort_values(ascending=False).index:
            new_columns = new_columns + columns[columns == c].index.to_list()
        df = df[new_columns]

    if not rev:
        method_renamed = "scaled %s"%method_renamed
    cg = sns.clustermap(df, cmap=cmap, figsize=(width, height),
                        cbar_pos=(0.02, 0.65, 0.05, 0.18),
                        col_colors=col_colors,
                        col_cluster=False)

    cg.ax_row_dendrogram.set_visible(False)
    cg.ax_col_dendrogram.set_visible(False)
    if not input_meta == "":
        handles = [Patch(facecolor=lut[category]) for category in columns.unique()]
        plt.legend(handles, lut, title=color_by,
                   bbox_to_anchor=(1, 1), bbox_transform=plt.gcf().transFigure, loc='upper right')

    display(cg)
    plt.show()
    cg.savefig("%s_heatmap.png"%method)
    display(Markdown("**Figure %d** Heatmap of top %d kinases ranked by %s"%(figure, top_results, method_renamed)))
    figure += 1
    return figure

def clustergrammer(df, method, figure):
    clustergram_df = df.rename(columns={i:"Gene set: %s"%i for i in df.columns}, index={i:"Kinase: %s"%i for i in df.index})
    if not (input_meta == ""):
        clustergram_df = clustergram_df.T.set_index([clustergram_df.columns, ["%s: %s"%(color_by, input_meta_df.at[i,color_by]) for i in input_meta_df.index]]).T
    method_renamed = method.replace("Integrated--", "").replace("Rank", " Rank").lower()
    method_renamed = "scaled %s"%method_renamed
    
    name = "%s_clustergrammer.tsv"%method
    
    clustergram_df.to_csv(name, sep="\t")
    response = ''    
    for i in range(5):
        try:
            res = requests.post(clustergrammer_url, files={'file': open(name, 'rb')})
            if not res.ok:
                response = res.text
                time.sleep(1)
            else:
                url = res.text.replace("http:","https:")   
                break
        except Exception as e:
            response = e
            time.sleep(2)
    else:
        if type(response) == Exception:
            raise response
        else:
            raise Exception(response)
    display(IFrame(url, width="1000", height="1000"))
    display(Markdown("**Figure %d** Clustergrammer of top %d kinases \
                    ranked by %s. [Go to url](%s) [Download file](%s)"%(figure, top_results, method_renamed, url, name)))
    figure +=1
    return figure

def stackedBarPlot(input_df, method, figure, rev=False, width = 15, height = 15):
    df = input_df.copy()
    method_renamed = method.replace("Integrated--", "").replace("Rank", " Rank").lower()
    filename = "%s_heatmap.svg"%method
    if not rev:
        method_renamed = "scaled %s"%method_renamed
        filename = "scaled_%s"%method
        
    sorting = df.sum(axis=1).sort_values(ascending=not rev).index[-top_results:]
    if not color_by == None:
        df = pd.DataFrame(index=sorting, columns=input_meta_df[color_by].unique())
        for col in input_meta_df[color_by].unique():
            for row in sorting:
                df_index = input_meta_df[input_meta_df[color_by] == col].index
                df.at[row, col] = input_df.loc[row, df_index].sum()
    else:    
        df = df.loc[sorting]
    if df.shape[0]==0:
        return False
    plot = df.plot.barh(stacked = True, figsize = (width,height), fontsize = 20)
    plt.legend(bbox_to_anchor=(bbox_x, bbox_y), prop={'size': 16}, ncol=2)
    plt.xlabel(method_renamed, labelpad = 20, fontsize = 'xx-large')
    display(plot)
    plt.savefig(filename, format = 'svg', bbox_inches='tight')
    plt.show()
    display(Markdown("**Figure %d** Stacked bar chart of top %d kinases ranked by %s"%(figure, top_results, method_renamed)))
    figure += 1
    return figure

def preprocessing(df, method, table):
    n = normalization_method
    method_renamed = method.replace("Integrated--", "").replace("Rank", " Rank").lower()
    if df.shape[0] > 500:
        p = df.sum(axis=1).sort_values()[:500]
        p_df = df.loc[p.index]
        
    else:
        p_df = df
    if n == "quantile":
        norm = quantile_normalize(p_df)
        display(norm.head(10))
        display(Markdown("**Table %d** Quantile normalized %s scores"%(table, method_renamed)))
    elif n == "zscore":
        norm = zscore_normalize(p_df.T).T
        display(norm.head(10))
        display(Markdown("**Table %d** Z-score normalized %s scores"%(table, method_renamed)))
    else:
        norm = p_df
        display(norm.head(10))
        display(Markdown("**Table %d** Top 500 %s scores"%(table, method_renamed)))
    table +=1
    return norm, table

def metadata_plot(df, x,y):
    colors = input_meta_df.columns
    plot_rows = int(len(colors)/2) if len(colors) % 2 == 0 else int(len(colors)/2 + 1)
    fig, axes = plt.subplots(plot_rows, 2, sharex=True, sharey=True, figsize=(20,8*plot_rows))
    ax_1 = 0
    ax_2 = 0
    for color in colors:
        if ax_2 == 2:
            ax_2 = 0
            ax_1+=1
        sns.scatterplot(
            data=df,
            x=x, y=y,
            hue=color,
            ax=axes[ax_1, ax_2] if plot_rows > 1 else axes[ax_2]
        )
        if plot_rows > 1:
            axes[ax_1, ax_2].set_title(color)
        else:
            axes[ax_2].set_title(color)
        ax_2+=1
    if plot_rows * 2 > len(colors):
        if plot_rows > 1:
            fig.delaxes(axes[ax_1, ax_2])
        else:
            fig.delaxes(axes[ax_2])
    plt.show()
        

def get_tsne(df, method, figure):
    method_renamed = method.replace("Integrated--", "").replace("Rank", " Rank").lower()
#     perplexity = min(10, len(df.columns)-1)
    X_embedded = TSNE(n_components=2,
                      perplexity=perplexity,
                      random_state=random_state,
                     ).fit_transform(df.T)
    tsne_df = pd.DataFrame(X_embedded, columns=["t-SNE 1", "t-SNE 2"])
    tsne_df['label'] = df.columns
    if (not input_meta == ""):
        tsne_df = tsne_df.merge(right=input_meta_df, left_on="label", right_index=True)
    display(
        px.scatter(
            tsne_df,
            x="t-SNE 1",
            y="t-SNE 2",
            color=color_by,
            hover_data=tsne_df.columns,
          )
    )
    display(Markdown("**Figure %d** t-SNE plot using %s"%(figure, method_renamed)))
    if not input_meta == "" and len(input_meta_df.columns) > 1:
        metadata_plot(tsne_df, 't-SNE 1', 't-SNE 2')
        figure+=1
        display(Markdown("**Figure %d** t-SNE plot using %s colored by metadata"%(figure, method_renamed)))
    return figure + 1

def get_umap(df, method, figure):
    method_renamed = method.replace("Integrated--", "").replace("Rank", " Rank").lower()
#     n_neighbors = min(15, len(df.columns)-1)
    
    consensus_umap = UMAP(
      random_state=random_state,
      n_neighbors=n_neighbors,
      n_components=2,
      metric='cosine',
      min_dist=0.3,
    )
    consensus_umap.fit(df.T.values)
    umap_df = pd.DataFrame(consensus_umap.transform(df.T.values),
                           columns=["UMAP 1", "UMAP 2"])
    umap_df['label'] = df.columns
    if (not input_meta == ""):
        umap_df = umap_df.merge(right=input_meta_df, left_on="label", right_index=True)
    
    display(
        px.scatter(
            umap_df,
            x="UMAP 1",
            y="UMAP 2",
            color=color_by,
            hover_data=umap_df.columns,
          )
    )
#     display(app.run_server(mode='inline'))
    if color_by:
        display(Markdown("**Figure %d** UMAP plot using %s colored by %s"%(figure, method_renamed, color_by)))
    else:
        display(Markdown("**Figure %d** UMAP plot using %s"%(figure, method_renamed)))
    if not input_meta == "" and len(input_meta_df.columns) > 1:
        metadata_plot(umap_df, 'UMAP 1', 'UMAP 2')
        figure+=1
        display(Markdown("**Figure %d** UMAP plot using %s colored by metadata"%(figure, method_renamed)))
    return figure + 1


In [None]:
def preprocessing_section(term_df, method, table):
    display(Markdown("### Preprocessing"))
    return preprocessing(term_df, method, table)

def tsne_section(prep, method, figure):
    display(Markdown("### t-SNE"))
    return get_tsne(prep, method, figure)

def umap_section(prep, method, figure):
    display(Markdown("### UMAP"))
    return get_umap(prep, method, figure)

def heatmap_section(consensus_scaled, method, figure):
    display(Markdown("### Heatmap"))
#     display(Markdown("#### Mean Rank"))
#     figure = heatmap(consensus_df, method, figure, rev=True)
    display(Markdown("#### Scaled Mean Rank"))
    figure = heatmap(consensus_scaled, method, figure)
    return figure

def clustergrammer_section(consensus_scaled, method, figure):
    display(Markdown("### Clustergrammer"))
    return clustergrammer(consensus_scaled, method, figure)

def bar_section(consensus_scaled, method, figure):
    display(Markdown("### Stacked bar chart"))
#     display(Markdown("#### Mean Rank"))
#     figure = stackedBarPlot(consensus_df, method, figure, rev=True)
    display(Markdown("#### Scaled Mean Rank"))
    figure = stackedBarPlot(consensus_scaled, method, figure)
    return figure

## Mean Rank

In [None]:
method = 'Integrated--meanRank'
term_df,table = get_dataframe(enrichment, method, table)

In [None]:
if (tsne_plot or UMAP_plot):
    prep, table = preprocessing_section(term_df, method, table)

In [None]:
if tsne_plot:
    figure = tsne_section(prep, method, figure)

In [None]:
if UMAP_plot:
    figure = umap_section(prep, method, figure)

### Top Kinases and Consensus Matrix

In [None]:
consensus_df, consensus_scaled, table = get_consensus(term_df, method, table)

In [None]:
if (heatmap_plot):
    figure = heatmap_section(consensus_scaled, method, figure)

In [None]:
if clustergrammer_plot:
    figure = clustergrammer_section(consensus_scaled, method, figure)

In [None]:
if stacked_plot:
    figure = bar_section(consensus_scaled, method, figure)

## Top Rank

In [None]:
method = 'Integrated--topRank'
term_df,table = get_dataframe(enrichment, method, table)

In [None]:
if (tsne_plot or UMAP_plot):
    prep, table = preprocessing_section(term_df, method, table)

In [None]:
if tsne_plot:
    figure = tsne_section(prep, method, figure)

In [None]:
if UMAP_plot:
    figure = umap_section(prep, method, figure)

### Top Kinases and Consensus Matrix

In [None]:
consensus_df, consensus_scaled, table = get_consensus(term_df, method, table)

In [None]:
if (heatmap_plot):
    figure = heatmap_section(consensus_scaled, method, figure)

In [None]:
if clustergrammer_plot:
    figure = clustergrammer_section(consensus_scaled, method, figure)

In [None]:
if stacked_plot:
    figure = bar_section(consensus_scaled, method, figure)

## References
[1] Lachmann A, Ma’ayan A. KEA: Kinase enrichment analysis. Bioinformatics 25(5) 684-6 (2009) PMID: 19176546

[2] Chen EY, Tan CM, Kou Y, Duan Q, Wang Z, Meirelles GV, Clark NR, Ma'ayan A.
Enrichr: interactive and collaborative HTML5 gene list enrichment analysis tool. BMC Bioinformatics. 2013; 128(14).

[3] Kuleshov MV, Jones MR, Rouillard AD, Fernandez NF, Duan Q, Wang Z, Koplev S, Jenkins SL, Jagodnik KM, Lachmann A, McDermott MG, Monteiro CD, Gundersen GW, Ma'ayan A.
Enrichr: a comprehensive gene set enrichment analysis web server 2016 update. Nucleic Acids Research. 2016; gkw377.

[4] Xie Z, Bailey A, Kuleshov MV, Clarke DJB., Evangelista JE, Jenkins SL, Lachmann A, Wojciechowicz ML, Kropiwnicki E, Jagodnik KM, Jeon M, & Ma’ayan A.
Gene set knowledge discovery with Enrichr. Current Protocols, 1, e90. 2021. doi: 10.1002/cpz1.90 

[5] Fernandez, N. F. et al. Clustergrammer, a web-based heatmap visualization and analysis tool for high-dimensional biological data. Sci. Data 4:170151 doi: 10.1038/sdata.2017.151 (2017).

[6] The COVID-19 Gene and Drug Set Library. Kuleshov et al. Patterns. 2020 Jul 25:100090

[7] Bouhaddou, Mehdi, et al. "The global phosphorylation landscape of SARS-CoV-2 infection." Cell 182.3 (2020): 685-712.

[8] Van der Maaten, Laurens, and Geoffrey Hinton. "Visualizing data using t-SNE." Journal of machine learning research 9.11 (2008).

[9] McInnes, L, Healy, J, UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction, ArXiv e-prints 1802.03426, 2018