In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
%%appyter hide_code
{% do SectionField(
    name='PRIMARY',
    title='Upload gene sets',
    subtitle='Upload your up and down gene sets',
    img='file-upload.png'
) %}
{% do SectionField(
    name='ENRICHMENT',
    title='Choose Drugmonizome and Enrichr Libraries for Enrichment',
    subtitle='Choose libraries.',
    img='find-replace.png'
    
) %}

In [None]:
%%appyter markdown

{% set title = StringField(
    name='title',
    label='Notebook name',
    default='LDP3 Consensus Perturbagens',
    section="PRIMARY",
) %}

# {{ title.raw_value }}

In [None]:
import re
import math
import time
import requests
import pandas as pd
import json
import scipy.stats as st
from IPython.display import display, IFrame, Markdown, HTML
import seaborn as sns
import matplotlib.pyplot as plt
from umap import UMAP
from sklearn.manifold import TSNE
from maayanlab_bioinformatics.normalization import quantile_normalize, zscore_normalize
from maayanlab_bioinformatics.harmonization import ncbi_genes_lookup
from tqdm import tqdm
import plotly.express as px
import numpy as np
from matplotlib.ticker import MaxNLocator

In [None]:
METADATA_API = "https://ldp3.cloud/metadata-api"
DATA_API = "https://ldp3.cloud/data-api/api/v1"
CLUSTERGRAMMER_URL = 'https://maayanlab.cloud/clustergrammer/matrix_upload/'
S3_PREFIX = "https://appyters.maayanlab.cloud/storage/LDP3Consensus/"
drugmonizome_meta_api = "https://maayanlab.cloud/drugmonizome/metadata-api"
drugmonizome_data_api = "https://maayanlab.cloud/drugmonizome/data-api/api/v1"
enrichr_api = 'https://maayanlab.cloud/Enrichr/'

In [None]:
ncbi_lookup = ncbi_genes_lookup('Mammalia/Homo_sapiens')

In [None]:
table = 1
figure = 1

In [None]:
# functions
def convert_genes(up_genes=[], down_genes=[]):
    try:
        payload = {
           "filter": {
               "where": {
                   "meta.symbol": {"inq": up_genes + down_genes}
               }
           }
        }
        timeout = 0.5
        for i in range(5):
            res = requests.post(METADATA_API + "/entities/find", json=payload)
            if res.ok:
                break
            else:
                time.sleep(timeout)
                if res.status_code >= 500:
                    timeout = timeout * 2
        else:
            raise Exception(res.text)
        results = res.json()
        up = set(up_genes)
        down = set(down_genes)
        if len(up_genes) == 0 or len(down_genes) == 0:
            converted = {
                "entities": [],
            }
        else:
            converted = {
                "up_entities": [],
                "down_entities": []
            }
        for i in results:
            symbol = i["meta"]["symbol"]
            if "entities" in converted:
                converted["entities"].append(i["id"])
            elif symbol in up:
                converted["up_entities"].append(i["id"])
            elif symbol in down:
                converted["down_entities"].append(i["id"])
        return converted
    except Exception as e:
        print(e)


def signature_search(genes, library):
    try:
        payload = {
            **genes,
            "database": library,
            "limit": 1000
        }
        timeout = 0.5
        for i in range(5):
            endpoint = "/enrich/rank" if "entities" in payload else "/enrich/ranktwosided"
            res = requests.post(DATA_API + endpoint, json=payload)
            if res.ok:
                break
            else:
                time.sleep(timeout)
                if res.status_code >= 500:
                    timeout = timeout * 2
        else:
            raise Exception(res.text)
        
        return res.json()["results"]
    except Exception as e:
        print(e)

def resolve_rank(s, gene_set_direction):
    try:
        sigs = {}
        for i in s:
            if i[p-]
            uid = i["uuid"]
            direction = "up" if i["zscore"] > 0 else "down"
            if direction == gene_set_direction:
                i["type"] = "mimicker"
                sigs[uid] = i
            else:
                i["type"] = "reverser"
                sigs[uid] = i
            
        payload = {
            "filter": {
                "where": {
                    "id": {"inq": list(sigs.keys())}
                },
                "fields": [
                    "id",
                    "meta.pert_name",
                    "meta.pert_type"
                ]
            }
        }
        timeout = 0.5
        for i in range(5):
            res = requests.post(METADATA_API + "/signatures/find", json=payload)
            if res.ok:
                break
            else:
                time.sleep(timeout)
                if res.status_code >= 500:
                    timeout = timeout * 2
        else:
            raise Exception(res.text)
        results = res.json()
        perturbagens = {
            "mimickers": {},
            "reversers": {}
        }
        for sig in results:
            uid = sig["id"]
            scores = sigs[uid]
            sig["scores"] = scores
            if "pert_name" in sig["meta"]:
                pert_name = sig["meta"]["pert_name"]
                if scores["type"] == "mimicker":
                    if pert_name not in perturbagens["mimickers"]:
                        perturbagens["mimickers"][pert_name] = 0
                    perturbagens["mimickers"][pert_name] += 1
                elif scores["type"] == "reverser":
                    if pert_name not in perturbagens["reversers"]:
                        perturbagens["reversers"][pert_name] = 0
                    perturbagens["reversers"][pert_name] += 1
        return perturbagens

    except Exception as e:
        print(e)


def resolve_ranktwosided(s):
    try:
        sigs = {}
        for i in s:
            uid = i["uuid"]
            i['z-down'] = -i['z-down']
            i['direction-down'] = -i['direction-down']
            if i['z-up'] > 0 and i['z-down'] > 0:
                i["type"] = "mimicker"
                sigs[uid] = i
            elif i['z-up'] < 0 and i['z-down'] < 0:
                i["type"] = "reverser"
                sigs[uid] = i
            
        payload = {
            "filter": {
                "where": {
                    "id": {"inq": list(sigs.keys())}
                },
                "fields": [
                    "id",
                    "meta.pert_name",
                    "meta.pert_type"
                ]
            }
        }
        timeout = 0.5
        for i in range(5):
            res = requests.post(METADATA_API + "/signatures/find", json=payload)
            if res.ok:
                break
            else:
                time.sleep(timeout)
                if res.status_code >= 500:
                    timeout = timeout * 2
        else:
            raise Exception(res.text)
        results = res.json()
        perturbagens = {
            "mimickers": {},
            "reversers": {}
        }
        for sig in results:
            uid = sig["id"]
            scores = sigs[uid]
            sig["scores"] = scores
            if "pert_name" in sig["meta"]:
                pert_name = sig["meta"]["pert_name"]
                if scores["type"] == "mimicker":
                    if pert_name not in perturbagens["mimickers"]:
                        perturbagens["mimickers"][pert_name] = 0
                    perturbagens["mimickers"][pert_name] += 1
                elif scores["type"] == "reverser":
                    if pert_name not in perturbagens["reversers"]:
                        perturbagens["reversers"][pert_name] = 0
                    perturbagens["reversers"][pert_name] += 1
        return perturbagens

    except Exception as e:
        print(e)

In [None]:
def clustergrammer(df, name, figure, label="Clustergrammer"):
    clustergram_df = df.rename(columns={i:"Signature: %s"%i for i in df.columns}, index={i:"Drug: %s"%i for i in df.index})
    clustergram_df.to_csv(name, sep="\t")
    response = ''
    timeout = 0.5
    for i in range(5):
        try:
            res = requests.post(CLUSTERGRAMMER_URL, files={'file': open(name, 'rb')})
            if not res.ok:
                response = res.text
                time.sleep(timeout)
                if res.status_code >= 500:
                    timeout = timeout * 2
            else:
                clustergrammer_url = res.text.replace("http:","https:")   
                break
        except Exception as e:
            response = e
            time.sleep(2)
    else:
        if type(response) == Exception:
            raise response
        else:
            raise Exception(response)
    print(clustergrammer_url)
    display(IFrame(clustergrammer_url, width="1000", height="1000"))
    display(Markdown("**Figure %d** %s [Go to url](%s)"%(figure, label, clustergrammer_url)))
    figure += 1
    return figure

cmap = sns.cubehelix_palette(50, hue=0.05, rot=0, light=1, dark=0)

def heatmap(df, filename, figure, label, width=15, height=15):
    fig = plt.figure(figsize=(width,height))
    cg = sns.clustermap(df, cmap=cmap, figsize=(width, height))
    cg.ax_row_dendrogram.set_visible(False)
    cg.ax_col_dendrogram.set_visible(False)
    display(cg)
    plt.show()
    cg.savefig(filename)
    display(Markdown("**Figure %d** %s"%(figure, label)))
    figure+=1
    return figure

In [None]:
%%appyter code_exec

{% set up_gene_sets = FileField(
    name='up_gene_sets',
    label='up gene sets',
    default='covid19_up.gmt',
    section="PRIMARY",
    examples={
        'covid19_up.gmt': 'https://appyters.maayanlab.cloud/storage/LDP3Consensus/covid19_up.gmt'
    }
) %}

{% set down_gene_sets = FileField(
    name='down_gene_sets',
    label='down gene sets',
    default='covid19_down.gmt',
    section="PRIMARY",
    examples={
        'covid19_down.gmt': 'https://appyters.maayanlab.cloud/storage/LDP3Consensus/covid19_down.gmt'
    }
) %}

{% set input_meta = FileField(
    name='input_meta',
    label='Metadata File',
    default='covid19_meta.tsv',
    section="PRIMARY",
    examples={
        'covid19_meta.tsv': 'https://appyters.maayanlab.cloud/storage/LDP3Consensus/covid19_meta.tsv',
    }
) %}
{% set color_by =  StringField(name='group_by', label='Group By', description="Group By Metadata", default='', section='PRIMARY')%}


up_gene_sets = {{ up_gene_sets }}
down_gene_sets = {{ down_gene_sets }}
input_meta = {{ input_meta }}
color_by = {{ color_by }}

In [None]:
gene_set_direction = None
if up_gene_sets == '':
    gene_set_direction = "down"
    print("Up gene sets was not uploaded. Gene set direction is set to down.")
elif down_gene_sets == '':
    gene_set_direction = "up"
    print("Down gene sets was not uploaded. Gene set direction is set to up.")

In [None]:
meta_df = pd.read_csv(input_meta, sep="\t", index_col=0)
display(meta_df.head())
display(Markdown("**Table %d** Input Metadata"%table))
table+=1
if color_by == "":
    color_by = meta_df.columns[0]

In [None]:
%%appyter code_exec
alpha = {{FloatField(name='alpha', label='p-value cutoff', default=0.05, section='PRIMARY')}}
perc = {{FloatField(name='perc',
                         label='percentage',
                         description='Minimum percentage cutoff for perturbagen instances in signatures',
                         default=0.1, section='PRIMARY')}}
top_perts = {{IntField(name='top_perts', label='top perturbation', default=50, section='PRIMARY')}}
consensus_method = {{ ChoiceField(
  name='consensus_method',
  label='Consensus method',
  description='Please select a method for getting the consensus',
  default='zscore',
  choices={
    'zscore': "'zscore'",
    'top count': "'count'",
  },
  section='PRIMARY') }}
perplexity = {{IntField(name='perplexity', label='Perplexity', description="t-SNE perplexity", default=15, section='PRIMARY')}}
n_neighbors = {{IntField(name='n_neighbors', label='n_neighbors', description="UMAP's n_neighbors", default=15, section='PRIMARY')}}
random_state = {{IntField(name='random_state', label='Random State', description="Random State", default=21, section='PRIMARY')}}

width = {{FloatField(name='width', label='image width', default=10, section='PRIMARY')}}
height = {{FloatField(name='height', label='image height', default=10, section='PRIMARY')}}


In [None]:
signatures = {}
if not up_gene_sets == '':
    with open(up_gene_sets) as upfile:
        for line in upfile:
            unpacked = line.strip().split("\t\t")
            if not len(unpacked) == 2:
                raise ValueError("GMT is not formatted properly, please consult the README of the appyter for proper formatting")
            sigid, geneset_str = unpacked
            genes = []
            for i in geneset_str.split("\t"):
                gene = i.split(",")[0]
                gene_name = ncbi_lookup(gene.upper())
                if gene_name:
                    genes.append(gene_name)
            signatures[sigid] = {
                "up_genes": genes,
                "down_genes": []
            }
if not down_gene_sets == '':
    with open(down_gene_sets) as downfile:
        for line in downfile:
            unpacked = line.strip().split("\t\t")
            if not len(unpacked) == 2:
                raise ValueError("GMT is not formatted properly, please consult the README of the appyter for proper formatting")
            sigid, geneset_str = unpacked
            if sigid not in signatures and gene_set_direction == None:
                raise ValueError("%s did not match any of the up signatures, make sure that the signature names are the same for both up and down genes"%sigid)
            else:
                genes = []
                for i in geneset_str.split("\t"):
                    gene = i.split(",")[0]
                    gene_name = ncbi_lookup(gene)
                    if gene_name:
                        genes.append(gene_name)
                if sigid in signatures:
                    signatures[sigid]["down_genes"] = genes
                else:
                    signatures[sigid] = {
                        "up_genes": [],
                        "down_genes": genes
                    }

In [None]:
%%appyter code_exec

datasets = {{ MultiChoiceField(name='datasets',
                                description='Select the LINCS Dataset that you want to use for Signature Search',
                                label='LINCS Datasets',
                                default=[
                                    "LINCS L1000 CRISPR Perturbations (2021)",
                                    "LINCS L1000 Chemical Perturbations (2021)",
                                ],
                                section = 'PRIMARY',
                                choices=[
                                    "LINCS L1000 Antibody Perturbations (2021)",
                                    "LINCS L1000 Ligand Perturbations (2021)",
                                    "LINCS L1000 Overexpression Perturbations (2021)",
                                    "LINCS L1000 CRISPR Perturbations (2021)",
                                    "LINCS L1000 shRNA Perturbations (2021)",
                                    "LINCS L1000 Chemical Perturbations (2021)",
                                    "LINCS L1000 siRNA Perturbations (2021)",
                                ]
                              )                           
}}

drugmonizome_datasets = {{ MultiChoiceField(name='drugmonizome_datasets',
                                description='Select the drugmonizome libraries to perform enrichment analysis for consensus drugs',
                                label='Drugmonizome Libraries',
                                default=["L1000FWD_GO_Biological_Processes_drugsetlibrary_up", "L1000FWD_GO_Biological_Processes_drugsetlibrary_down"],
                                section = 'ENRICHMENT',
                                choices=[
                                    "KinomeScan_kinase_drugsetlibrary",
                                    "L1000FWD_GO_Cellular_Component_drugsetlibrary_up",
                                    "L1000FWD_KEGG_Pathways_drugsetlibrary_down",
                                    "L1000FWD_signature_drugsetlibrary_up",
                                    "Geneshot_associated_drugsetlibrary",
                                    "Geneshot_predicted_generif_drugsetlibrary",
                                    "SIDER_indications_drugsetlibrary",
                                    "L1000FWD_GO_Molecular_Function_drugsetlibrary_up",
                                    "L1000FWD_GO_Molecular_Function_drugsetlibrary_down",
                                    "L1000FWD_KEGG_Pathways_drugsetlibrary_up",
                                    "SIDER_side_effects_drugsetlibrary",
                                    "DrugRepurposingHub_target_drugsetlibrary",
                                    "L1000FWD_GO_Biological_Processes_drugsetlibrary_down",
                                    "L1000FWD_GO_Biological_Processes_drugsetlibrary_up",
                                    "L1000FWD_GO_Cellular_Component_drugsetlibrary_down",
                                    "ATC_drugsetlibrary",
                                    "Drugbank_smallmolecule_target_drugsetlibrary",
                                    "STITCH_target_drugsetlibrary",
                                    "Geneshot_predicted_autorif_drugsetlibrary",
                                    "Drugbank_smallmolecule_enzyme_drugsetlibrary",
                                    "PharmGKB_OFFSIDES_side_effects_drugsetlibrary",
                                    "CREEDS_signature_drugsetlibrary_down",
                                    "Geneshot_predicted_tagger_drugsetlibrary",
                                    "RDKIT_maccs_fingerprints_drugsetlibrary",
                                    "CREEDS_signature_drugsetlibrary_up",
                                    "DrugCentral_target_drugsetlibrary",
                                    "L1000FWD_signature_drugsetlibrary_down",
                                    "L1000FWD_predicted_side_effects",
                                    "Drugbank_smallmolecule_carrier_drugsetlibrary",
                                    "PubChem_fingerprints_drugsetlibrary",
                                    "Geneshot_predicted_enrichr_drugsetlibrary",
                                    "DrugRepurposingHub_moa_drugsetlibrary",
                                    "Geneshot_predicted_coexpression_drugsetlibrary",
                                    "Drugbank_smallmolecule_transporter_drugsetlibrary",
                                    "PharmGKB_snp_drugsetlibrary"
                                ]
                              )                           
}}

transcription_libraries = {{ MultiChoiceField(name='transcription_libraries', 
                                            description='Select the Enrichr libraries you would like in your figure.',
                                              label='Transcription', 
                                              default=[], 
                                              section = 'ENRICHMENT',
                                              choices=[
                                                'ARCHS4_TFs_Coexp',
                                                'ChEA_2016',
                                                'ENCODE_and_ChEA_Consensus_TFs_from_ChIP-X',
                                                'ENCODE_Histone_Modifications_2015',
                                                'ENCODE_TF_ChIP-seq_2015',
                                                'Epigenomics_Roadmap_HM_ChIP-seq',
                                                'Enrichr_Submissions_TF-Gene_Coocurrence',
                                                'Genome_Browser_PWMs',
                                                'lncHUB_lncRNA_Co-Expression',
                                                'miRTarBase_2017',
                                                'TargetScan_microRNA_2017',
                                                'TF-LOF_Expression_from_GEO',
                                                'TF_Perturbations_Followed_by_Expression',
                                                'Transcription_Factor_PPIs',
                                                'TRANSFAC_and_JASPAR_PWMs',
                                                'TRRUST_Transcription_Factors_2019']) 
                           }}


pathways_libraries = {{ MultiChoiceField(name='pathways_libraries',
                                         description='Select the Enrichr libraries you would like in your figure.',
                                         label='Pathways',
                                         default=[],
                                         section = 'ENRICHMENT',
                                         choices=[
                                            'ARCHS4_Kinases_Coexp',
                                            'BioCarta_2016',
                                            'BioPlanet_2019',
                                            'BioPlex_2017',
                                            'CORUM',
                                            'Elsevier_Pathway_Collection',
                                            'HMS_LINCS_KinomeScan',
                                            'HumanCyc_2016',
                                            'huMAP',
                                            'KEA_2015',
                                            'KEGG_2019_Human',
                                            'KEGG_2019_Mouse',
                                            'Kinase_Perturbations_from_GEO_down',
                                            'Kinase_Perturbations_from_GEO_up',
                                            'L1000_Kinase_and_GPCR_Perturbations_down',
                                            'L1000_Kinase_and_GPCR_Perturbations_up',
                                            'NCI-Nature_2016',
                                            'NURSA_Human_Endogenous_Complexome',
                                            'Panther_2016',
                                            'Phosphatase_Substrates_from_DEPOD',
                                            'PPI_Hub_Proteins',
                                            'Reactome_2016',
                                            'SILAC_Phosphoproteomics',
                                            'SubCell_BarCode',
                                            'Virus-Host_PPI_P-HIPSTer_2020',
                                            'WikiPathways_2019_Human',
                                            'WikiPathways_2019_Mouse']) 
                      }}    
    
  
ontologies_libraries = {{ MultiChoiceField(name='ontologies_libraries', 
                                           description='Select the Enrichr libraries you would like in your figure.',
                                           label='Ontologies',
                                           default=['GO_Biological_Process_2018'],
                                           section = 'ENRICHMENT',
                                           choices=[
                                            'GO_Biological_Process_2018',
                                            'GO_Cellular_Component_2018',
                                            'GO_Molecular_Function_2018',
                                            'Human_Phenotype_Ontology',
                                            'Jensen_COMPARTMENTS',
                                            'Jensen_DISEASES',
                                            'Jensen_TISSUES',
                                            'MGI_Mammalian_Phenotype_Level_4_2019']) 
                        }} 

    
diseases_drugs_libraries = {{ MultiChoiceField(name='diseases_drugs_libraries',
                                               description='Select the Enrichr libraries you would like in your figure.',
                                               label='Diseases/Drugs',
                                               default=[],
                                               section = 'ENRICHMENT',
                                               choices=[    
                                                    'Achilles_fitness_decrease',
                                                    'Achilles_fitness_increase',
                                                    'ARCHS4_IDG_Coexp',
                                                    'ClinVar_2019',
                                                    'dbGaP',
                                                    'DepMap_WG_CRISPR_Screens_Broad_CellLines_2019',
                                                    'DepMap_WG_CRISPR_Screens_Sanger_CellLines_2019',
                                                    'DisGeNET',
                                                    'DrugMatrix',
                                                    'DSigDB',
                                                    'GeneSigDB',
                                                    'GWAS_Catalog_2019',
                                                    'LINCS_L1000_Chem_Pert_down',
                                                    'LINCS_L1000_Chem_Pert_up',
                                                    'LINCS_L1000_Ligand_Perturbations_down',
                                                    'LINCS_L1000_Ligand_Perturbations_up',
                                                    'MSigDB_Computational',
                                                    'MSigDB_Oncogenic_Signatures',
                                                    'Old_CMAP_down',
                                                    'Old_CMAP_up',
                                                    'OMIM_Disease',
                                                    'OMIM_Expanded',
                                                    'PheWeb_2019',
                                                    'Rare_Diseases_AutoRIF_ARCHS4_Predictions',
                                                    'Rare_Diseases_AutoRIF_Gene_Lists',
                                                    'Rare_Diseases_GeneRIF_ARCHS4_Predictions',
                                                    'Rare_Diseases_GeneRIF_Gene_Lists',
                                                    'UK_Biobank_GWAS_v1',
                                                    'Virus_Perturbations_from_GEO_down',
                                                    'Virus_Perturbations_from_GEO_up',
                                                    'VirusMINT']) 
                            }}

In [None]:
enrichr_libraries = transcription_libraries + pathways_libraries + ontologies_libraries + diseases_drugs_libraries

In [None]:
dataset_map = {
  "LINCS L1000 Antibody Perturbations (2021)": "l1000_aby",
  "LINCS L1000 Ligand Perturbations (2021)": "l1000_lig",
  "LINCS L1000 Overexpression Perturbations (2021)": "l1000_oe",
  "LINCS L1000 CRISPR Perturbations (2021)": "l1000_xpr",
  "LINCS L1000 shRNA Perturbations (2021)": "l1000_shRNA",
  "LINCS L1000 Chemical Perturbations (2021)": "l1000_cp",
  "LINCS L1000 siRNA Perturbations (2021)": "l1000_siRNA"
}

gene_page = {
  "LINCS L1000 Ligand Perturbations (2021)",
  "LINCS L1000 Overexpression Perturbations (2021)",
  "LINCS L1000 CRISPR Perturbations (2021)",
  "LINCS L1000 shRNA Perturbations (2021)",
  "LINCS L1000 siRNA Perturbations (2021)"
}

drug_page = {
  "LINCS L1000 Chemical Perturbations (2021)": "l1000_cp",
}

In [None]:
# enriched = {lib:{"mimickers": {}, "reversers": {}} for lib in datasets}
enriched = {"mimickers": {lib: {} for lib in datasets}, "reversers": {lib: {} for lib in datasets}}

for k,sig in tqdm(signatures.items()):    
    try:
        time.sleep(0.1)
        genes = convert_genes(sig["up_genes"],sig["down_genes"])
        if ("entities" in genes and len(genes["entities"]) > 5) or (len(genes["up_entities"]) > 5 and len(genes["down_entities"]) > 5):
            for lib in datasets:
                library = dataset_map[lib]
                s = signature_search(genes, library)
                if gene_set_direction == None:
                    perturbagens = resolve_ranktwosided(s)
                else:
                    perturbagens = resolve_rank(s, gene_set_direction)
                enriched["mimickers"][lib][k] = perturbagens["mimickers"]
                enriched["reversers"][lib][k] = perturbagens["reversers"]
                time.sleep(0.1)
    except Exception as e:
        print(e)

In [None]:
def make_clickable(link):
    # target _blank to open new window
    # extract clickable text to display for your link
    text = link.split('=')[1]
    return f'<a target="_blank" href="{link}">{text}</a>'

def metadata_plot(df, x,y):
    colors = meta_df.columns
    plot_rows = int(len(colors)/2) if len(colors) % 2 == 0 else int(len(colors)/2 + 1)
    fig, axes = plt.subplots(plot_rows, 2, sharex=True, sharey=True, figsize=(20,8*plot_rows))
    ax_1 = 0
    ax_2 = 0
    for color in colors:
        if ax_2 == 2:
            ax_2 = 0
            ax_1+=1
        sns.scatterplot(
            data=df,
            x=x, y=y,
            hue=color,
            ax=axes[ax_1, ax_2] if plot_rows > 1 else axes[ax_2]
        )
        if plot_rows > 1:
            axes[ax_1, ax_2].set_title(color)
        else:
            axes[ax_2].set_title(color)
        ax_2+=1
    if plot_rows * 2 > len(colors):
        if plot_rows > 1:
            fig.delaxes(axes[ax_1, ax_2])
        else:
            fig.delaxes(axes[ax_2])
    plt.show()

def get_tsne(df, label, figure):
#     perplexity = min(10, len(df.columns)-1)
    X_embedded = TSNE(n_components=2,
                      perplexity=perplexity,
                      random_state=random_state,
                     ).fit_transform(df.T)
    tsne_df = pd.DataFrame(X_embedded, columns=["t-SNE 1", "t-SNE 2"])
    tsne_df['label'] = df.columns
    if (not input_meta == ""):
        tsne_df = tsne_df.merge(right=meta_df, left_on="label", right_index=True)
    if color_by:
        display(
            px.scatter(
                tsne_df.loc[~tsne_df[color_by].isna()],
                x="t-SNE 1",
                y="t-SNE 2",
                color=color_by,
                hover_data=tsne_df.columns,
              )
        )
        display(Markdown("**Figure %d** t-SNE plot of %s colored by %s"%(figure, label, color_by)))
    else:
        px.scatter(
            tsne_df,
            x="t-SNE 1",
            y="t-SNE 2",
            hover_data=tsne_df.columns,
          )
        display(Markdown("**Figure %d** t-SNE plot of %s"%(figure, label)))
    if not input_meta == "" and len(meta_df.columns) > 1:
        metadata_plot(tsne_df, 't-SNE 1', 't-SNE 2')
        figure+=1
        display(Markdown("**Figure %d** t-SNE plot of %s colored by metadata"%(figure, label)))
    return figure + 1

def get_umap(df, label, figure):
#     n_neighbors = min(15, len(df.columns)-1)
    
    consensus_umap = UMAP(
      random_state=random_state,
      n_neighbors=n_neighbors,
      n_components=2,
      metric='cosine',
      min_dist=0.3,
    )
    consensus_umap.fit(df.T.values)
    umap_df = pd.DataFrame(consensus_umap.transform(df.T.values),
                           columns=["UMAP 1", "UMAP 2"])
    umap_df['label'] = df.columns
    if (not input_meta == ""):
        umap_df = umap_df.merge(right=meta_df, left_on="label", right_index=True)
#     display(app.run_server(mode='inline'))
    if color_by:
        display(
            px.scatter(
                umap_df.loc[~umap_df[color_by].isna()],
                x="UMAP 1",
                y="UMAP 2",
                color=color_by,
                hover_data=umap_df.columns,
              )
        )
        display(Markdown("**Figure %d** UMAP plot of %s colored by %s"%(figure, label, color_by)))
    else:
        display(
            px.scatter(
                umap_df,
                x="UMAP 1",
                y="UMAP 2",
                color=color_by,
                hover_data=umap_df.columns,
              )
        )
        display(Markdown("**Figure %d** UMAP plot of %s"%(figure, label)))
    if not input_meta == "" and len(meta_df.columns) > 1:
        metadata_plot(umap_df, 'UMAP 1', 'UMAP 2')
        figure+=1
        display(Markdown("**Figure %d** UMAP plot of %s colored by metadata"%(figure, label)))
    return figure + 1

annot_dict = {}
def bar_chart(enrichment, title=''):
    bar_color = 'tomato'
    bar_color_not_sig = 'lightgrey'
    edgecolor=None
    linewidth=0
    if len(enrichment) > 10:
        enrichment = enrichment[0:10]
    enrichment_names = [i["name"] for i in enrichment]
    enrichment_scores = [i["pval"] for i in enrichment]
    plt.figure(figsize=(10,4))
    bar_colors = [bar_color if (x < 0.05) else bar_color_not_sig for x in enrichment_scores]
    fig = sns.barplot(x=np.log10(enrichment_scores)*-1, y=enrichment_names, palette=bar_colors, edgecolor=edgecolor, linewidth=linewidth)
    fig.axes.get_yaxis().set_visible(False)
    fig.set_xlabel('-Log10(p-value)',fontsize=25)
    fig.xaxis.set_major_locator(MaxNLocator(integer=True))
    fig.tick_params(axis='x', which='major', labelsize=20)
    if max(np.log10(enrichment_scores)*-1)<1:
        fig.xaxis.set_ticks(np.arange(0, max(np.log10(enrichment_scores)*-1), 0.1))
    for ii,annot in enumerate(enrichment_names):
        if annot in annot_dict.keys():
            annot = annot_dict[annot]
        if enrichment_scores[ii] < 0.05:
            annot = '  *'.join([annot, str(str(np.format_float_scientific(enrichment_scores[ii],precision=2)))]) 
        else:
            annot = '  '.join([annot, str(str(np.format_float_scientific(enrichment_scores[ii],precision=2)))])

        title_start= max(fig.axes.get_xlim())/200
        fig.text(title_start,ii,annot,ha='left',wrap = True, fontsize = 12)
        fig.patch.set_edgecolor('black')  
        fig.patch.set_linewidth('2')
    plt.show()
        

def get_drugmonizome_plot(consensus, label, figure, dataset):
    payload = {
        "filter":{
            "where": {
                "meta.Name": {
                    "inq": [i.lower() for i in consensus.index]
                }
            }
        }
    }

    res = requests.post(drugmonizome_meta_api + "/entities/find", json=payload)

    query = {
        "entities": [i["id"] for i in res.json()],
        "limit": 1000,
        "database": dataset
    }

    res = requests.post(drugmonizome_data_api + "/enrich/overlap", json=query)

    scores = res.json()["results"]
    uids = {i["uuid"]: i for i in scores}

    payload = {
        "filter":{
            "where": {
                "id": {
                    "inq": list(uids.keys())
                }
            }
        }
    }

    res = requests.post(drugmonizome_meta_api + "/signatures/find", json=payload)

    sigs = res.json()
    sigs = res.json()
    scores = []
    for i in sigs:
        score = uids[i["id"]]
        scores.append({
            "name": i["meta"]["Term"][0]["Name"],
            "pval": score["p-value"]
        })
    
    scores.sort(key=lambda x: x['pval'])
    bar_chart(scores, label)
    display(Markdown("**Figure %d** %s"%(figure, label)))
    figure += 1
    return figure

def get_enrichr_bar(userListId, enrichr_library, figure, label):
    query_string = '?userListId=%s&backgroundType=%s'
    res = requests.get(
        enrichr_api + 'enrich' + query_string % (userListId, enrichr_library)
     )
    if not res.ok:
        raise Exception('Error fetching enrichment results')

    data = res.json()[enrichr_library]
    scores = [{"name": i[1], "pval": i[2]} for i in data]
    scores.sort(key=lambda x: x['pval'])
    bar_chart(scores, label)
    display(Markdown("**Figure %d** %s"%(figure, label)))
    figure +=1
    return figure

def enrichment(consensus, label, figure):
    gene_names = [i.upper() for i in consensus.index]
    genes_str = '\n'.join(gene_names)
    description = label
    payload = {
        'list': (None, genes_str),
        'description': (None, description)
    }

    res = requests.post(enrichr_api + 'addList', files=payload)
    if not res.ok:
        raise Exception('Error analyzing gene list')

    data = res.json()
    userListId = data["userListId"]
    for d in enrichr_libraries:
        l = "Enrichr %s hits for %s"%(d, label)
        figure = get_enrichr_bar(userListId, d, figure, l)
    return figure

## Mimickers

In [None]:
direction = "mimickers"
for lib in datasets:
    library = dataset_map[lib]
    display(Markdown("### %s"%lib))
    index = set()
    pert_dict = enriched[direction][lib]
    for v in pert_dict.values():
        index = index.union(v.keys())
    df = pd.DataFrame(0, index=index, columns=pert_dict.keys())
    for k,v in pert_dict.items():
        for pert, cnt in v.items():
            df.at[pert, k] += cnt
    df = df.loc[df.sum(1).sort_values(ascending=False).index]
    filename = "pert_matrix_%s_%s.tsv"%(library.replace(" ","_"), direction)
    df.to_csv(filename, sep="\t")
#     display(df.head())
#     display(Markdown("**Table %d** Mimicker perturbagens using %s dataset ([download](./%s))"%
#                      (table, lib, filename)))
    table += 1
    # stat_df = pd.DataFrame(0, index=df.index, columns=["count", "z-score", "p-value"])
    empirical_stat = pd.read_csv(S3_PREFIX + "%s_%s.tsv" % (library, direction), sep="\t", index_col=0)
    df = df[(df>0).sum(1) > len(df.columns) * perc]
    index = set(df.index).intersection(empirical_stat.index)
    filtered_df = df.loc[index]    
    if lib in gene_page:
        stat_df = pd.DataFrame(0, index=index, columns=["count", "z-score", "p-value", "Enrichr gene page"])
        stat_df['count'] = filtered_df.sum(1)
        # Compute zstat and p value
        stat_df["z-score"] = (filtered_df.mean(1) - empirical_stat["mean"]) / empirical_stat["std"]
        stat_df["p-value"] = stat_df['z-score'].apply(lambda x: 1-st.norm.cdf(x))
        if consensus_method == 'zscore':
            #Filter by p-value
            stat_df = stat_df[stat_df["p-value"]<alpha].sort_values(by=["z-score"], ascending=False)
        else:
            stat_df = stat_df.sort_values(by=["count"], ascending=False)

        stat_df['Enrichr gene page'] = ["https://maayanlab.cloud/Enrichr/#find!gene=%s"%i for i in stat_df.index]
        filename = "pert_stat_%s_%s.tsv"%(lib.replace(" ","_"), direction)
        stat_df.to_csv(filename, sep="\t")
        stat_df['Enrichr gene page'] = stat_df['Enrichr gene page'].apply(make_clickable)
        stat_html = stat_df.head(25).to_html(escape=False)
        display(HTML(stat_html))
    else:
        stat_df = pd.DataFrame(0, index=index, columns=["count", "z-score", "p-value"])
        stat_df['count'] = filtered_df.sum(1)
        # Compute zstat and p value
        stat_df["z-score"] = (filtered_df.mean(1) - empirical_stat["mean"]) / empirical_stat["std"]
        stat_df["p-value"] = stat_df['z-score'].apply(lambda x: 1-st.norm.cdf(x))

        if consensus_method == 'zscore':
            #Filter by p-value
            stat_df = stat_df[stat_df["p-value"]<alpha].sort_values(by=["z-score"], ascending=False)
        else:
            stat_df = stat_df.sort_values(by=["count"], ascending=False)

        filename = "pert_stat_%s_%s.tsv"%(library.replace(" ","_"), direction)
        stat_df.to_csv(filename, sep="\t")
        display(stat_df.head(25))
    display(Markdown("**Table %d** Top 25 mimicker perturbagens using %s dataset ([download](./%s))"%
                     (table, lib, filename)))

    table+=1

    consensus = df.loc[stat_df.index[0:top_perts]]
    consensus_norm = quantile_normalize(consensus)
#     consensus_norm = consensus.subtract(empirical_stat.loc[consensus.index, "mean"], axis=0).divide(empirical_stat.loc[consensus.index, "std"], axis=0)
    if len(consensus.index) > top_perts:
        consensus = consensus.loc[consensus.index[:top_perts]]
    filename = "consensus_matrix_%s_%s.tsv"%(library.replace(" ","_"), direction)
    consensus.to_csv(filename, sep="\t")
    display(consensus.head())
    display(Markdown("**Table %d** Consensus mimicker perturbation matrix for %s ([download](./%s))"%
                     (table, lib, filename)))

    table+=1
    
    plot_label = "%s %s"%(lib, direction)
    figure = get_tsne(consensus_norm, plot_label, figure)
    figure = get_umap(consensus_norm, plot_label, figure)
    

    label = "Clustergrammer of consensus mimicker perturbation of %s (quantile normalized scores)"%lib
    name = "clustergrammer_%s_%s.tsv"%(library.replace(" ", "_"), direction)
    figure = clustergrammer(consensus_norm, name, figure, label)

    label = "Heatmap of consensus mimicker perturbation of %s (quantile normalized scores)"%lib
    name = "heatmap_%s_%s.png"%(library.replace(" ", "_"), direction)
    figure = heatmap(consensus_norm, name, figure, label)
    
    if lib in drug_page:
        display(Markdown("### Drugmonizome enrichment analysis for the consensus drugs"))
        for d in drugmonizome_datasets:
            label = "Drugmonizome %s hits for %s %s"%(d, lib, direction)
            figure = get_drugmonizome_plot(consensus, label, figure, d)
    elif lib in gene_page:
        display(Markdown("### Enrichr terms for the consensus genes"))
        label = "%s %s"%(lib, direction)
        figure = enrichment(consensus, label, figure)

## Reversers

In [None]:
direction = "reversers"
for lib in datasets:
    library = dataset_map[lib]
    display(Markdown("### %s"%lib))
    index = set()
    pert_dict = enriched[direction][lib]
    for v in pert_dict.values():
        index = index.union(v.keys())
    df = pd.DataFrame(0, index=index, columns=pert_dict.keys())
    for k,v in pert_dict.items():
        for pert, cnt in v.items():
            df.at[pert, k] += cnt
    df = df.loc[df.sum(1).sort_values(ascending=False).index]
    filename = "pert_matrix_%s_%s.tsv"%(library.replace(" ","_"), direction)
    df.to_csv(filename, sep="\t")
#     display(df.head())
#     display(Markdown("**Table %d** Reverser perturbagens using %s dataset ([download](./%s))"%
#                      (table, lib, filename)))
    table += 1
    # stat_df = pd.DataFrame(0, index=df.index, columns=["count", "z-score", "p-value"])
    empirical_stat = pd.read_csv(S3_PREFIX + "%s_%s.tsv" % (library, direction), sep="\t", index_col=0)
    df = df[(df>0).sum(1) > len(df.columns) * perc]
    index = set(df.index).intersection(empirical_stat.index)
    filtered_df = df.loc[index]    
    if lib in gene_page:
        stat_df = pd.DataFrame(0, index=index, columns=["count", "z-score", "p-value", "Enrichr gene page"])
        stat_df['count'] = filtered_df.sum(1)
        # Compute zstat and p value
        stat_df["z-score"] = (filtered_df.mean(1) - empirical_stat["mean"]) / empirical_stat["std"]
        stat_df["p-value"] = stat_df['z-score'].apply(lambda x: 1-st.norm.cdf(x))

        if consensus_method == 'zscore':
            #Filter by p-value
            stat_df = stat_df[stat_df["p-value"]<alpha].sort_values(by=["z-score"], ascending=False)
        else:
            stat_df = stat_df.sort_values(by=["count"], ascending=False)

        stat_df['Enrichr gene page'] = ["https://maayanlab.cloud/Enrichr/#find!gene=%s"%i for i in stat_df.index]
        filename = "pert_stat_%s_%s.tsv"%(lib.replace(" ","_"), direction)
        stat_df.to_csv(filename, sep="\t")
        stat_df['Enrichr gene page'] = stat_df['Enrichr gene page'].apply(make_clickable)
        stat_html = stat_df.head(25).to_html(escape=False)
        display(HTML(stat_html))
    else:
        stat_df = pd.DataFrame(0, index=index, columns=["count", "z-score", "p-value"])
        stat_df['count'] = filtered_df.sum(1)
        # Compute zstat and p value
        stat_df["z-score"] = (filtered_df.mean(1) - empirical_stat["mean"]) / empirical_stat["std"]
        stat_df["p-value"] = stat_df['z-score'].apply(lambda x: 1-st.norm.cdf(x))

        if consensus_method == 'zscore':
            #Filter by p-value
            stat_df = stat_df[stat_df["p-value"]<alpha].sort_values(by=["z-score"], ascending=False)
        else:
            stat_df = stat_df.sort_values(by=["count"], ascending=False)

        filename = "pert_stat_%s_%s.tsv"%(library.replace(" ","_"), direction)
        stat_df.to_csv(filename, sep="\t")
        display(stat_df.head(25))
    display(Markdown("**Table %d** Top 25 reverser perturbagens using %s dataset ([download](./%s))"%
                     (table, lib, filename)))

    table+=1

    consensus = df.loc[stat_df.index[0:top_perts]]
    consensus_norm = quantile_normalize(consensus)
#     consensus_norm = consensus.subtract(empirical_stat.loc[consensus.index, "mean"], axis=0).divide(empirical_stat.loc[consensus.index, "std"], axis=0)
    if len(consensus.index) > top_perts:
        consensus = consensus.loc[consensus.index[:top_perts]]
    filename = "consensus_matrix_%s_%s.tsv"%(library.replace(" ","_"), direction)
    consensus.to_csv(filename, sep="\t")
    display(consensus.head())
    display(Markdown("**Table %d** Consensus reverser perturbation matrix for %s ([download](./%s))"%
                     (table, lib, filename)))

    table+=1
    
    plot_label = "%s %s"%(lib, direction)
    figure = get_tsne(consensus_norm, plot_label, figure)
    figure = get_umap(consensus_norm, plot_label, figure)
    

    label = "Clustergrammer of consensus reverser perturbation of %s (quantile normalized scores)"%lib
    name = "clustergrammer_%s_%s.tsv"%(library.replace(" ", "_"), direction)
    figure = clustergrammer(consensus_norm, name, figure, label)

    label = "Heatmap of consensus reverser perturbation of %s (quantile normalized scores)"%lib
    name = "heatmap_%s_%s.png"%(library.replace(" ", "_"), direction)
    figure = heatmap(consensus_norm, name, figure, label)
    
    
    if lib in drug_page:
        display(Markdown("### Drugmonizome enrichment analysis for the consensus drugs"))
        for d in drugmonizome_datasets:
            label = "Drugmonizome %s hits for %s %s"%(d, lib, direction)
            figure = get_drugmonizome_plot(consensus, label, figure, d)
    elif lib in gene_page:
        display(Markdown("### Enrichr terms for the consensus genes"))
        label = "%s %s"%(lib, direction)
        figure = enrichment(consensus, label, figure)