In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
import json
import time
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
from matplotlib.ticker import MaxNLocator
import seaborn as sns
from scipy.stats import fisher_exact
from IPython.display import HTML, display, Markdown, FileLink
pd.set_option('display.float_format', '{:.2e}'.format)

In [None]:
%%appyter hide_code 

{% do SectionField(
    name = 'input',
    title = 'Gene Set(s) Upload',
    subtitle = 'Upload a gene set you wish to analyze'
)%}


{% set genesets_input = TabField(
    name = 'genesets_input',
    label = 'Gene Set(s) Input',
    default = 'Single Gene Set',
    description = 'Input your gene set or paired up/down gene sets',
    section='input',
    choices = {
        'Single Gene Set': [
            TextField(
            name = 'gene_input',
            hint = 'Gene set (one gene per line)',
            label = 'Input Gene Set  (up/single)',
            default= 'BAD\nTPTEP2-CSNK1E\nDKK1\nGRIN2D\nATG14\nDKK2\nGRIN2C\nATG13\nDKK4\nPSMC6\nMAPK10\nAPC\nPSMC4\nPSMC5\nPSMC2\nPSMC3\nUQCRC2\nUQCRC1\nPSMC1\nULK2\nULK1\nCALM2\nCALM3\nCALM1\nAPP\nCALML6\nLRP1\nTUBAL3\nUQCR10\nLRP6\nCALML3\nLRP5\nFZD10\nCALML4\nCALML5\nARAF\nUQCR11\nPSMD9\nFRAT1\nFRAT2\nPSMD7\nPSMD8\nPSMD6\nAKT3\nPSMD3\nPSMD4\nAKT1\nPSMD1\nNRBF2\nAPBB1\nAKT2\nPSMD2\nNCSTN\nSNCA\nFZD2\nFZD1\nINSR\nCOX8A\nWNT10B\nFZD4\nXBP1\nFZD3\nWNT10A\nCOX8C\nFZD6\nFZD5\nFZD8\nFZD7\nFZD9\nCSNK1E\nCOX7A2L\nBACE1\nBACE2\nPIK3R1\nCSNK2B\nNOX4\nATG2B\nATG2A\nATF6\nNOX1\nATF4\nPPP3R1\nPPP3R2\nGRIN2B\nGRIN2A\nCOX3\nCOX2\nAPOE\nCOX1\nPSENEN\nCSNK1A1\nTUBB\nWNT9A\nBRAF\nWNT9B\nTUBB2A\nATP5F1D\nCOX6C\nATP5F1E\nIL1A\nIL1B\nTUBB2B\nMAPT\nCOX7C\nCOX7B\nIDE\nWNT8B\nWNT8A\nPPP3CA\nPPP3CB\nPPP3CC\nCASP12\nATG101\nFADD\nATP5PF\nAPC2\nCSNK2A2\nATP5PD\nCSNK2A1\nMME\nATP5PB\nCSNK2A3\nWNT7A\nWNT7B\nND4L\nADAM10\nTUBB4B\nTUBB4A\nCOX6B1\nCOX6B2\nPSMA6\nPSMA7\nPLCB4\nPSMA4\nPSMA5\nADAM17\nPSMA2\nPSMA3\nNDUFAB1\nPSMA1\nNDUFA4L2\nATP5PO\nPLCB2\nCDK5R1\nPLCB3\nPLCB1\nRYR3\nCOX5A\nNDUFA12\nNDUFA13\nCTNNB1\nNDUFA10\nNDUFA11\nTUBA1A\nCOX5B\nPSMA8\nPTGS2\nPSMB7\nIKBKB\nATP5F1A\nTUBA1C\nATP5F1B\nPSMB5\nTUBA1B\nPSMB6\nATP5F1C\nPSMB3\nPSMB4\nPSMB1\nPSMB2\nHRAS\nBID\nWNT5A\nWNT5B\nSDHD\nSDHB\nCOX7B2\nSDHC\nGRIN1\nSDHA\nTUBA4A\nERN1\nTUBA8\nFAS\nUQCRQ\nSLC25A31\nPPIF\nRAF1\nPPID\nCHRM3\nRELA\nCHRM1\nNDUFB11\nCSF1\nCHRM5\nNDUFB10\nCHRNA7\nTUBB1\nTNF\nTUBA3C\nCOX6A2\nNDUFC2-KCTD14\nCOX6A1\nKIF5A\nNRAS\nTUBB8\nTUBA3E\nTUBA3D\nTUBB6\nTUBB3\nKIF5C\nKIF5B\nCAPN1\nCAPN2\nNDUFV3\nNDUFV2\nMAP3K5\nNDUFV1\nMAP2K2\nWNT3A\nMAP2K1\nCYBB\nTRAF2\nNFKB1\nEIF2S1\nSEM1\nGNAQ\nND2\nND1\nND4\nND3\nND6\nND5\nWNT2B\nIRS1\nATP8\nIRS2\nUQCRB\nATP6\nLPL\nATP5MC3\nCACNA1C\nPSEN1\nIRS4\nPSEN2\nCACNA1D\nCOX7A1\nAGER\nGPR83\nCOX7A2\nCACNA1F\nATP5MC1\nATP5MC2\nUQCRH\nCACNA1S\nCYC1\nMAP2K7\nAPAF1\nEIF2AK2\nAXIN2\nEIF2AK3\nAXIN1\nWIPI1\nNDUFC2\nWIPI2\nNDUFC1\nTNFRSF1A\nWNT16\nIL6\nPIK3CB\nNDUFS8\nCDK5\nNDUFS7\nPIK3CA\nNDUFS6\nNDUFS5\nNDUFS4\nNDUFS3\nCYCS\nNDUFS2\nNDUFS1\nPIK3C3\nCYTB\nMCU\nRTN3\nPSMD11\nRTN4\nPSMD13\nGSK3B\nSLC25A4\nNDUFB9\nPSMD12\nNDUFB8\nNDUFB7\nPSMD14\nNDUFB6\nCOX4I2\nNDUFB5\nNDUFB4\nITPR2\nNDUFB3\nPIK3R4\nITPR3\nNDUFB2\nPIK3R3\nNDUFB1\nPIK3R2\nKLC2\nKLC1\nITPR1\nAPH1B\nWNT6\nAPH1A\nKLC4\nCASP8\nWNT11\nKLC3\nCASP9\nCASP7\nDVL1\nDVL2\nDVL3\nUQCRFS1\nCOX4I1\nCASP3\nNAE1\nWNT1\nWNT2\nWNT3\nWNT4\nNDUFA9\nNDUFA8\nNDUFA7\nNDUFA6\nCHUK\nNDUFA5\nNDUFA4\nADRM1\nNDUFA3\nNDUFA2\nNDUFA1\nMTOR\nUQCRHL\nCSNK1A1L\nDDIT3\nVDAC3\nVDAC2\nVDAC1\nKRAS\nSLC25A6\nSLC25A5\nGAPDH\nBECN1\nBECN2\nATP2A3\nRB1CC1\nATP2A2\nPIK3CD\nATP2A1\nAMBRA1\nINS\nHSD17B10\nMAPK9\nGRM5\nMAPK8\nMAPK3\nNOS1\nMAPK1\nNOS2',
            description = '',
            requried=True,
            rows=8,
            section = 'input',
            examples = {
                'Alzheimer\'s Disease': 'BAD\nTPTEP2-CSNK1E\nDKK1\nGRIN2D\nATG14\nDKK2\nGRIN2C\nATG13\nDKK4\nPSMC6\nMAPK10\nAPC\nPSMC4\nPSMC5\nPSMC2\nPSMC3\nUQCRC2\nUQCRC1\nPSMC1\nULK2\nULK1\nCALM2\nCALM3\nCALM1\nAPP\nCALML6\nLRP1\nTUBAL3\nUQCR10\nLRP6\nCALML3\nLRP5\nFZD10\nCALML4\nCALML5\nARAF\nUQCR11\nPSMD9\nFRAT1\nFRAT2\nPSMD7\nPSMD8\nPSMD6\nAKT3\nPSMD3\nPSMD4\nAKT1\nPSMD1\nNRBF2\nAPBB1\nAKT2\nPSMD2\nNCSTN\nSNCA\nFZD2\nFZD1\nINSR\nCOX8A\nWNT10B\nFZD4\nXBP1\nFZD3\nWNT10A\nCOX8C\nFZD6\nFZD5\nFZD8\nFZD7\nFZD9\nCSNK1E\nCOX7A2L\nBACE1\nBACE2\nPIK3R1\nCSNK2B\nNOX4\nATG2B\nATG2A\nATF6\nNOX1\nATF4\nPPP3R1\nPPP3R2\nGRIN2B\nGRIN2A\nCOX3\nCOX2\nAPOE\nCOX1\nPSENEN\nCSNK1A1\nTUBB\nWNT9A\nBRAF\nWNT9B\nTUBB2A\nATP5F1D\nCOX6C\nATP5F1E\nIL1A\nIL1B\nTUBB2B\nMAPT\nCOX7C\nCOX7B\nIDE\nWNT8B\nWNT8A\nPPP3CA\nPPP3CB\nPPP3CC\nCASP12\nATG101\nFADD\nATP5PF\nAPC2\nCSNK2A2\nATP5PD\nCSNK2A1\nMME\nATP5PB\nCSNK2A3\nWNT7A\nWNT7B\nND4L\nADAM10\nTUBB4B\nTUBB4A\nCOX6B1\nCOX6B2\nPSMA6\nPSMA7\nPLCB4\nPSMA4\nPSMA5\nADAM17\nPSMA2\nPSMA3\nNDUFAB1\nPSMA1\nNDUFA4L2\nATP5PO\nPLCB2\nCDK5R1\nPLCB3\nPLCB1\nRYR3\nCOX5A\nNDUFA12\nNDUFA13\nCTNNB1\nNDUFA10\nNDUFA11\nTUBA1A\nCOX5B\nPSMA8\nPTGS2\nPSMB7\nIKBKB\nATP5F1A\nTUBA1C\nATP5F1B\nPSMB5\nTUBA1B\nPSMB6\nATP5F1C\nPSMB3\nPSMB4\nPSMB1\nPSMB2\nHRAS\nBID\nWNT5A\nWNT5B\nSDHD\nSDHB\nCOX7B2\nSDHC\nGRIN1\nSDHA\nTUBA4A\nERN1\nTUBA8\nFAS\nUQCRQ\nSLC25A31\nPPIF\nRAF1\nPPID\nCHRM3\nRELA\nCHRM1\nNDUFB11\nCSF1\nCHRM5\nNDUFB10\nCHRNA7\nTUBB1\nTNF\nTUBA3C\nCOX6A2\nNDUFC2-KCTD14\nCOX6A1\nKIF5A\nNRAS\nTUBB8\nTUBA3E\nTUBA3D\nTUBB6\nTUBB3\nKIF5C\nKIF5B\nCAPN1\nCAPN2\nNDUFV3\nNDUFV2\nMAP3K5\nNDUFV1\nMAP2K2\nWNT3A\nMAP2K1\nCYBB\nTRAF2\nNFKB1\nEIF2S1\nSEM1\nGNAQ\nND2\nND1\nND4\nND3\nND6\nND5\nWNT2B\nIRS1\nATP8\nIRS2\nUQCRB\nATP6\nLPL\nATP5MC3\nCACNA1C\nPSEN1\nIRS4\nPSEN2\nCACNA1D\nCOX7A1\nAGER\nGPR83\nCOX7A2\nCACNA1F\nATP5MC1\nATP5MC2\nUQCRH\nCACNA1S\nCYC1\nMAP2K7\nAPAF1\nEIF2AK2\nAXIN2\nEIF2AK3\nAXIN1\nWIPI1\nNDUFC2\nWIPI2\nNDUFC1\nTNFRSF1A\nWNT16\nIL6\nPIK3CB\nNDUFS8\nCDK5\nNDUFS7\nPIK3CA\nNDUFS6\nNDUFS5\nNDUFS4\nNDUFS3\nCYCS\nNDUFS2\nNDUFS1\nPIK3C3\nCYTB\nMCU\nRTN3\nPSMD11\nRTN4\nPSMD13\nGSK3B\nSLC25A4\nNDUFB9\nPSMD12\nNDUFB8\nNDUFB7\nPSMD14\nNDUFB6\nCOX4I2\nNDUFB5\nNDUFB4\nITPR2\nNDUFB3\nPIK3R4\nITPR3\nNDUFB2\nPIK3R3\nNDUFB1\nPIK3R2\nKLC2\nKLC1\nITPR1\nAPH1B\nWNT6\nAPH1A\nKLC4\nCASP8\nWNT11\nKLC3\nCASP9\nCASP7\nDVL1\nDVL2\nDVL3\nUQCRFS1\nCOX4I1\nCASP3\nNAE1\nWNT1\nWNT2\nWNT3\nWNT4\nNDUFA9\nNDUFA8\nNDUFA7\nNDUFA6\nCHUK\nNDUFA5\nNDUFA4\nADRM1\nNDUFA3\nNDUFA2\nNDUFA1\nMTOR\nUQCRHL\nCSNK1A1L\nDDIT3\nVDAC3\nVDAC2\nVDAC1\nKRAS\nSLC25A6\nSLC25A5\nGAPDH\nBECN1\nBECN2\nATP2A3\nRB1CC1\nATP2A2\nPIK3CD\nATP2A1\nAMBRA1\nINS\nHSD17B10\nMAPK9\nGRM5\nMAPK8\nMAPK3\nNOS1\nMAPK1\nNOS2',
                'T1D mellitus GSE10586 up':'LOC100420758\nLOC101928767\nLINC00893\nLOC100286909\nPRKXP1\nSTARD9\nLOC283357\nKIAA1586\nC20ORF197\nEID2\nGPR160\nSPINT2\nFLJ44896\nRPL28\nSPIN2B\nEMC3-AS1\nCHIC2\nPON2\nSMA5\nBACH2\nTMEM71\nCH25H\nLOC100510542\nZEB1-AS1\nLOC100507492\nTMEM68\nGNG8\nPSMD3\nTMEM69\nANO10\nPROK2\nLOC100510697\nC19ORF24\nHLA-DRB4\nZNF256\nZNF134\nANAPC15\nLINC00540\nPOP5\nZNF22\nZXDB\nPTPN20B\nRPL36\nTMEM200A\nJUNB\nSLC19A2\nZNF367\nDUSP5\nCENPV\nGPR18\nUBE2Q2L\nPCIF1\nANXA6\nLYZ\nSULT1B1\nFAM101B\nCENPK\nFXYD2\nHEATR5A\nLOC100132884\nLOC100506090\nPP7080\nC14ORF181\nRRAS2\nPITPNA-AS1\nSTAP1\nZNF227\nGPRASP2\nZNF468\nTIGD2\nHLA-DQA1\nTIGD1\nEGLN3\nSLC2A14\nLOC389765\nPET100\nDYNLL2\nSERPINB6\nADAM1A\nNUCB2\nLHX4-AS1\nSTAG3L2\nPFAS\nGCNT2\nATL1\nSTON1\nLOC100510009\nPIGW\nTMEM263\nHSPA13\nPLAGL1\nANKRD55\nNEFH\nZIK1\nBRPF1\nATHL1\nZNF561\nZBTB10\nMSH2\nHIC2\nUQCRQ\nXIST\nZNF439\nZNF559\nS100A4\nIL6ST\nSNRPE\nFAM8A1\nLINC01215\nCHRM3-AS2\nZNF430\nSLC27A2\nERV3-2\nZBTB24\nLOC100289230\nLCLAT1\nBZW2\nLRRC70\nSRBD1\nLOC100129890\nPAXIP1-AS1\nZNF304\nIL12RB2\nLZTS3\nLINC00597\nZNF662\nCERS6\nATP8B4\nFAM26F\nNBEA\nPHKB\nDMXL1\nMOB1B\nARMCX2\nARMCX3\nLSM7\nARPC3\nDRAP1\nTDRD12\nTPP2\nND3\nTRIM74\nLRRC57\nINO80E\nBZRAP1-AS1\nLOC284513\nAK5\nKRBOX4\nRAB43\nBTBD3\nC6ORF226\nBTLA\nZNF404\nPLS3\nCPA5\nLOC100505728\nSLC37A3\nC1ORF162\nITGA4\nDTX3L\nLOC283713\nSECISBP2L\nLOC100505608\nLOC374443\nTRIM59\nPECAM1\nPRKCQ-AS1\nSERINC5\nCBWD6\nASH1L-AS1\nC15ORF40\nLY96\nATP5I\nBRE-AS1\nLOC100131607\nC8ORF82\nZNF502\nMARCKSL1\nIRX3\nGZMB\nNDUFA3\nGZMA\nILF3-AS1\nLOC439911\nLYSMD4\nLEMD2\nGZMK\nNELL2\nP2RX5\nPPAP2A\nBOLA3\nSPAG1\nCDCA7L\nUHRF1\nLENG8\nLRMP\nTSPAN13\nSCRN1\nLOC100129550\nPOLR2I\nHMG20A\nPOLR2J\nKRT1\nTMEM194B\nLRRN3\nPDHX\nRSAD2\nZNF542P\nLOC100288656\nLACTB2\nCDC16\nZNF836\nMTCP1\nZNF711\nSIK1\nPFN1\nIFITM2\nSATB1\nIFITM1\nWDSUB1\nNDNF\nCCNC\nRBM26-AS1\nMAGOH\nZNF708\nZNF827\nLOC100130837\nCHAC2\nMEST\nATP6V1E2\nEGR3\nARRDC4\nSNHG9\nCOMMD1\nHSPBAP1\nIL17RB\nVNN2\nADCK1\nSNORD104\nZNF813\nITM2C\nBEX4\nSCARNA2\nSESTD1\nGSPT2\nSNHG19\nSOX4\nCOMMD9\nC10ORF32\nSNHG20\nPDK1\nLOC100505656\nCLHC1\nOTUD6B\nNME4\nRAB33B\nNME1\nDERA\nPRPF19\nKLHL8\nID2\nKPTN\nHAR1A\nCOL18A1\nTENM1\nTMEM185B\nMRPS17\nTNFRSF11A\nGYPC\nLINC00282\nLOC100507619\nSWI5\nC10ORF88\nLIMD2\nC17ORF75\nPCSK5\nLOC100506761\nLOC100129973\nOR7E35P\nLOC100509911\nMRPS28\nDBNL\nNOG\nC1GALT1\nPRMT5\nMPP1\nUBE2E2\nLOC286052\nSH3RF3\nDPY19L4\nPER1\nCMPK2\nPNPLA4\nKCNQ5\nLOC100509703\nAPBA2\nLOC100506676\nGPALPP1\nCAPG\nCST7\nAMN1\nSCML1\nSERTAD2\nFAM3C\nSEC14L1P1\nLAIR2\nEOMES\nGEN1\nSPINK2\nCRTAM\nCACHD1\nALYREF\nTRPV1\nTHEMIS\nPUS7\nDPM3\nIFI44L\nARHGAP32\nTMEM258\nATG16L1\nUBE2M\nCRTC3\nITPRIP\nOVOS2\nGIMAP6\nNUP160\nZNF8\nTMEM243\nEMC2\nHSF5\nROMO1\nHIST1H2AC\nPDIA4\nCD93\nNR4A2\nEFNA1\nZNF91\nMXD1\nSELM\nLOC100507448\nGNB4\nMICU3\nLOC100287628\nPMCH\nLGALS3BP\nSLC2A3\nGXYLT1\nMGC70870\nRGS1\nCMTM8\nTTC14\nC4ORF48\nNAP1L5\nAHI1\nIL5\nPIKFYVE\nCARM1\nCHI3L2\nCYCS\nFBP1\nRBMS3\nAVEN\nLMNB1\nHEBP1\nSYNGR3\nNRIP1\nCLEC11A\nZNF184\nKANSL1-AS1\nERCC1\nMYO15B\nLEF1-AS1\nLRIF1\nSPRY1\nCHURC1\nPXYLP1\nERCC6L\nNDUFV2-AS1\nYBX3\nLOC100510525\nPKD2\nDDT\nPOLD2\nMAN1C1\nPDSS2\nTRAT1\nDUS4L\n',
            }
            ), 
            ChoiceField(
                name='direction',
                label='Direction of Drug Regulation', 
                default='down-regulators',
                choices=['up-regulators', 'down-regulators'],
                description='Choose a direction of gene-set regulation', 
                required=True,
                section='input'
            )
        ],
        'Paired Up/Down Genesets': [
           TextField(
                name = 'gene_input',
                hint = 'Up-regulated Gene Set',
                label = 'Input Gene Set  (up/single)',
                default='',
                description = '',
                requried=True,
                rows=8,
                examples = {
                    'Lung transplant rejection GSE3418 (mouse) up':'GBP7\nGBP8\nGBP2B\nWFDC21\nIL1R2\nRSAD2\nMYO7A\nPSMB9\nCOL1A2\nSFRP2\nSLPI\nIGTP\nSERPINE1\nC3AR1\nH2-DMA\n2210407C18RIK\nIL2RG\nMNDAL\nPIRB\nCH25H\nPSMB10\nSLAMF9\nSLAMF8\nCYP1B1\nRPL17\nGBP3\nMEST\nGBP2\nEPSTI1\nMSR1\nABCA1\nH2-EB1\nSLC11A1\nTNNC1\nNCKAP1L\nPPBP\nIRGM2\nIRGM1\nIFI47\nFAM111A\nCSF2RB2\nTNNT1\nLGMN\nGM2397\nPTPLAD2\nFERMT3\nLST1\nMS4A7\nIL1RN\nFCRLS\nSPI1\nNCAPG2\nSAMSN1\nIFIT2\nIFI30\nCFP\nSMC2\nIFIT3\nPLAC8\nHMOX1\nTSPAN4\nSOX7\nOASL2\nHOXC8\nBCL2A1A\nCCR2\nAU020206\nCCR1\nAPOBEC3\nRIPK3\nAF251705\nTFEC\nMFAP1A\nLILRB4\nIL1B\nTLR1\nBST2\nMMP12\nPTPRC\nCOL3A1\nTHEMIS2\nFXYD5\n2610528A11RIK\nTMEM106A\nIL18BP\nIGSF6\nMEFV\nEBI3\nFPR2\nADM\nFPR1\nMS4A4C\nCTSS\nMS4A4B\nSOCS3\nCLEC7A\nSPRR2A1\nSPRR2A2\nTK1\nGM7536\nSH3BGRL3\nCD38\nTIMP1\nFCER1G\nCCR5\nCTSC\nTGTP1\nIL33\nCD53\nCD52\nHELLS\nSTAT1\nARG1\nSYK\nEMR1\nADAM12\nLAPTM5\nHCK\nF7\nMS4A6C\nMS4A6B\nHCLS1\nSAA3\nCKS2\nCHIL1\nCD48\nCHIL3\nC1QC\nCLEC4A2\nC1QB\nSELPLG\nC1QA\nCCL12\nWAS\nFSTL1\nMKI67\nCENPA\nAIF1\nMS4A6D\nPSMB8\nMRC1\nSTAB1\nGPNMB\nXDH\nZBP1\nIFI205\nCD72\nCD300A\nCCDC109B\nTFPI2\nIFI204\nMPEG1\nTYROBP\nVCAM1\nCXCL10\nFCGR1\nFCGR4\nCXCL12\nCLEC4D\nFCGR3\nCLEC4E\nCOL5A2\nCOL5A1\nBUB1B\nTGFBI\nAPLNR\nKLRD1\nNCF1\nPF4\nTRIM34A\nPTPN6\nCD68\nS100A9\nMAD2L1\nS100A8\nCD84\nSLFN8\nSLFN2\nSLFN4\nGPR65\nFBLN2\nCNDP2\nFAM105A\nPTTG1\nSLFN1\nSPP1\nPOSTN\nIIGP1\nSLC15A3\nKLRA3\nNKG7\nLY86\nCYBB\nIGHG2B\nCCNA2\nCYP4F18\nCDH11\nBIRC5\nLCN2\nH2-Q10\nCSF1R\nCD274\nNCF2\nCLEC4N\nNCF4\nCORO1A\nLTBP2\nWISP2\nMMP23\nCCNB2\nC1QTNF3\nGLIPR1\nSERPINA3G\nIGHG1\nC1QTNF6\nSIRPA\nMUP10\nCTLA2B\nMMP8\nSLC37A2\nSRGN\nCTLA2A\nMMP3\nPYHIN1\nPILRA\nNPC1\nRPGRIP1\nMYH3\nFCGR2B\nIL6\nLCP1\nIRF8\nMCM5\nCDK1\nPECAM1\nMCM6\nIRF7\nDOCK2\nMYH7\nGPSM3\nCXCL9\nCXCL2\nSLC40A1\nNID2\nPLD4\nDBP\nP2RY6\nCD300LD\nLPXN\nCPXM1\nCD300LF\nCYTIP\n5430435G22RIK\nRRM2\nHIST1H2AO\nGZMB\nGZMA\nALOX5AP\nSHCBP1\nRAD51\nEVI2B\nCOL8A1\nMNDA\nPTX3\nEVL\nMYO1G\nITGB2\nTRIM30A\nNRROS\nPIRA1\n3930401B19RIK\nPLEK\nH2-Q7\nH2-Q6\nITGAM\nTHBS2\nSAMHD1\nFYB\nIRG1\nADAMTS5\nCCL9\nCCL8\nCCL7\nARHGDIB\nZIC1\nCCL6\nCCL5\nITGAX\nCCL3\nCCL2\nEAR2\nUPP1\nTNNI1',
                },
                section = 'input'
            ),
            TextField(
                name = 'gene_input_dn',
                label = 'Input Gene Set (down)',
                hint = 'Down-regulated Gene Set',
                default= '',
                rows=8,
                description = '',
                requried=True,
                examples = {
                    'Lung transplant rejection GSE3418 (mouse) down': 'MX1\nRPL23\nZNF160\nC20ORF24\nMYL6\nMARCO\nSCGB1A1\nRBP4\nIFI27\nCD9\nVIM\nHSPA1A\nACOT2\nGLA\nCALM2\nPFN1\nRAN\nCES1\nATP6V0D1\nPOMP\nHPGD\nGLRX\nAQP3\nRPS4Y1\nSTX12\nACTB\nRPL36AL\nUGP2\nZNF706\nCAPNS1\nS100A10\nFDFT1\nS100A11\nEGR2\nHLA-DRB5\nSTAC\nATP6AP1\nNCKAP1L\nTALDO1\nVASP\nBACE1\nVAT1\nCYP27A1\nRPS26\nVAMP8\nPLBD1\nPSME2\nTAGLN2\nHLA-DRB1\nIL1RN\nSPI1\nTXN\nMGST3\nIFI30\nMT2A\nSCPEP1\nLGALS3\nPRDX1\nPDGFD\nFTH1\nTSPAN3\nLTA4H\nDECR1\nRHOA\nANXA1\nANXA2\nDUSP1\nPTGES3\nANXA5\nTUBB\nFN1\nTRMT5\nIL1B\nSLC7A7\nANXA2P2\nHLA-DQB1\nALDOA\nTOMM6\nCOX7C\nSERPINA1\nPTPRO\nARPC1B\nRPS27L\nLITAF\nOAZ1\nCTSS\nLDHB\nSNX2\nSH3BGRL\nSH3BGRL3\nCTSH\nCD37\nFCER1G\nCTSD\nCD52\nPARP4\nDYNLL1\nBCAP31\nG0S2\nGSTO1\nAKR1C2\nGAA\nAKR1C1\nCREG1\nLAPTM5\nAKR1C3\nHCK\nLIMA1\nPSMA6\nHADHB\nFABP4\nFABP5\nPPARG\nPNPLA6\nPLIN2\nC1QB\nC1QA\nCSTB\nCD63\nGRN\nCSTA\nCTNNB1\nAMIGO2\nHEXB\nCAPG\nCRIP1\nCST3\nTMSB4X\nTUBA1B\nMRC1\nGPNMB\nGPA33\nTSPO\nMDH1\nHLA-DMA\nOLR1\nGSN\nCD74\nAPLP2\nATP1B3\nTRPV2\nTYROBP\nATP1B1\nPPIA\nRAB31\nNUPR1\nGPD1\nTGFBI\nLY6E\nZYX\nS100A6\nCAPZA1\nPPIF\nABCG1\nS100A4\nPTPN6\nGRINA\nRGS19\nGALNT12\nCD81\nPAPSS1\nCOX6A1\nLMNA\nPYCARD\nTUBB6\nLAMP2\nCFL1\nDENND5A\nHMGN1\nHMGN2\nOPN3\nATP6V0C\nINHBA\nCYBB\nCYBA\nISG15\nPGD\nSDCBP\nRGCC\nARPC2\nGNB2\nERH\nSLC27A3\nNOP10\nNCF2\nIRS2\nUQCRB\nLGALS3BP\nPLAU\nSRP14\nCOX7A2\nCORO1C\nUQCRH\nNME1-NME2\nUBB\nPI3\nEFHD2\nATP6V1F\nTMSB10\nHSP90AA1\nHSPA8\nTXNRD1\nTMBIM1\nLCP1\nCPE\nMAN2B1\nTMBIM6\nFBP1\nACO1\nCXCL8\nRTN4\nYWHAE\nBCL2A1\nLOC100133583\nDBI\nAPIP\nDTX4\nTREM1\nHSPB1\nPDLIM1\nCA2\nLASP1\nPSAP\nATP5E\nIRAK1\nPGK1\nLPXN\nYWHAH\nTCF7L2\nH2AFZ\nNDUFA4\nALOX5AP\nNDUFA1\nPLAUR\nSOD1\nPLA2G16\nPKM\nFGR\nDNAJA1\nMLPH\nVAPA\nAPOC1\nAPOC2\nPCOLCE2\nEVL\nPFDN5\nNACAP1\nFOLR3\nSLC25A5\nCD101\nCEBPB\nSEC11A\nHSP90AB1\nFHL1\nYBX1\nARRB2\nATP5G3\nSQRDL\nHSD17B11\nOASL\nSOBP\nALDH2\nARHGDIB\nUBE2NL\nVSIG4',
                },
                section = 'input',
           ),
            ChoiceField(
                name='direction',
                label='Direction of Drug Regulation', 
                default='reversers',
                choices=['reversers', 'mimickers'],
                description='Choose a direction of gene-set regulation', 
                required=True,
                section='input'
            )
        ]
    }
)%}

In [None]:
%%appyter code_exec

{% if genesets_input.raw_value == 'Single Gene Set' %} # for if the gene list was pasted in
geneset = [x.strip() for x in {{ genesets_input.value[0] }}.split('\n') if len(x.strip()) > 1]
geneset_dn = []
direction = "{{ genesets_input.value[1] }}"
{% else %}
geneset = [x.strip() for x in {{ genesets_input.value[0] }}.split('\n') if len(x.strip()) > 1]
geneset_dn = [x.strip() for x in {{ genesets_input.value[1] }}.split('\n') if len(x.strip()) > 1]
direction = "{{ genesets_input.value[2] }}"
{% endif %}

In [None]:
if direction == 'up-regulators' or direction == 'mimickers':
    direction_str = 'up'
else:
    direction_str = 'down'

use_up_down = False
if len(geneset_dn) > 0:
    use_up_down = True

In [None]:
fig_num = 1
tab_num = 1

In [None]:

def enrich_single_set(geneset: list, first=500, url="http://l2s2.maayanlab.cloud/graphql", fda_approved=True):
    query = {
    "operationName": "EnrichmentQuery",
    "variables": {
        "filterTerm": f" {direction_str}",
        "offset": 0,
        "first": first,
        "filterFda": fda_approved,
        "sortBy": "pvalue",
        "genes": geneset,
    },
    "query": """query EnrichmentQuery(
                    $genes: [String]!
                    $filterTerm: String = ""
                    $offset: Int = 0
                    $first: Int = 10
                    $filterFda: Boolean = false
                    $sortBy: String = ""
                    ) {
                    currentBackground {
                        enrich(
                        genes: $genes
                        filterTerm: $filterTerm
                        offset: $offset
                        first: $first
                        filterFda: $filterFda
                        sortby: $sortBy
                        ) {
                        nodes {
                            geneSetHash
                            pvalue
                            adjPvalue
                            oddsRatio
                            nOverlap
                            geneSets {
                            nodes {
                                term
                                id
                                nGeneIds
                                geneSetFdaCountsById {
                                nodes {
                                    approved
                                    count
                                }
                                }
                            }
                            totalCount
                            }
                        }
                        totalCount
                        }
                    }
                    }
                    """,
    }

    headers = {
        "Accept": "application/json",
        "Content-Type": "application/json"
    }

    response = requests.post(url, data=json.dumps(query), headers=headers)
    response.raise_for_status()
    res = response.json()

    enrichment = res['data']['currentBackground']['enrich']['nodes']# %%

    df_enrichment = pd.json_normalize(
        enrichment, 
        record_path=['geneSets', 'nodes'], 
        meta=['geneSetHash', 'pvalue', 'adjPvalue', 'oddsRatio', 'nOverlap']
    )

    if df_enrichment.empty:
        return pd.DataFrame()
    
    df_enrichment["approved"] = df_enrichment["geneSetFdaCountsById.nodes"].map(lambda x: x[0]['approved'] if len(x) > 0 else False)
    df_enrichment["count"] = df_enrichment["geneSetFdaCountsById.nodes"].map(lambda x: x[0]['count'] if len(x) > 0 else 0)
    df_enrichment.drop(columns=['geneSetFdaCountsById.nodes'], inplace=True)

    return df_enrichment

def enrich_up_down(genes_up: list[str], genes_down: list[str], first=500, url="http://l2s2.maayanlab.cloud/graphql", fda_approved=True):
  query = {
    "operationName": "PairEnrichmentQuery",
    "variables": {
      "filterTerm": f" {direction_str}",
      "offset": 0,
      "first": first,
      "filterFda": fda_approved,
      "sortBy": "pvalue_mimic" if direction_str == "up" else "pvalue_reverse",
      "pvalueLe": 0.05,
      "genesUp": genes_up,
      "genesDown": genes_down
    },
    "query": """query PairEnrichmentQuery($genesUp: [String]!, $genesDown: [String]!, $filterTerm: String = "", $offset: Int = 0, $first: Int = 10, $filterFda: Boolean = false, $sortBy: String = "", $pvalueLe: Float = 0.05) {{
                    currentBackground {{
                        {}(
                        filterTerm: $filterTerm
                        offset: $offset
                        first: $first
                        filterFda: $filterFda
                        sortby: $sortBy
                        pvalueLe: $pvalueLe
                        genesDown: $genesDown
                        genesUp: $genesUp
                        ) {{
                        totalCount
                        nodes {{
                            adjPvalueMimic
                            adjPvalueReverse
                            mimickerOverlap
                            oddsRatioMimic
                            oddsRatioReverse
                            pvalueMimic
                            pvalueReverse
                            reverserOverlap
                            geneSet {{
                            nodes {{
                                id
                                nGeneIds
                                term
                                geneSetFdaCountsById {{
                                nodes {{
                                    count
                                    approved
                                }}
                                }}
                            }}
                            }}
                        }}
                        }}
                    }}
                    }}""".format("pairedEnrich" if 'l2s2' in url else "pairEnrich")
  }

  headers = {
        "Accept": "application/json",
        "Content-Type": "application/json"
  }

  response = requests.post(url, data=json.dumps(query), headers=headers)

  response.raise_for_status()
  res = response.json()
  if 'pairEnrich' in res['data']['currentBackground']:
    enrichment = res['data']['currentBackground']['pairEnrich']['nodes']
  else: 
    enrichment = res['data']['currentBackground']['pairedEnrich']['nodes']
  
  df_enrichment_pair = pd.DataFrame(enrichment)

  if df_enrichment_pair.empty:
    return pd.DataFrame()
  
  df_enrichment_pair["geneSetIdUp"] = df_enrichment_pair["geneSet"].map(
      lambda t: next((node['id'] for node in t['nodes'] if ' up' in node['term']), None)
  )

  df_enrichment_pair["geneSetIdDown"] = df_enrichment_pair["geneSet"].map(
      lambda t: next((node['id'] for node in t['nodes'] if ' down' in node['term']), None)
  )
  
  df_enrichment_pair["term"] = df_enrichment_pair["geneSet"].map(
      lambda t: t['nodes'][0]['term']
  )
  
  df_enrichment_pair["approved"] = df_enrichment_pair["geneSet"].map(
      lambda t: t['nodes'][0]['geneSetFdaCountsById']['nodes'][0]['approved'] if t['nodes'][0]['geneSetFdaCountsById']['nodes'][0]['approved'] else False
  )
  
  df_enrichment_pair["count"] = df_enrichment_pair["geneSet"].map(
      lambda t: t['nodes'][0]['geneSetFdaCountsById']['nodes'][0]['count'] if t['nodes'][0]['geneSetFdaCountsById']['nodes'][0]['count'] else 0
  )
  
  df_enrichment_pair = df_enrichment_pair.drop(columns=['geneSet']).reset_index(drop=True)
  
  return df_enrichment_pair

def get_overlap(genes, id, url="http://l2s2.maayanlab.cloud/graphql"):
    query = {
    "operationName": "OverlapQuery",
    "variables": {
        "id": id,
        "genes": genes
    },
    "query": """query OverlapQuery($id: UUID!, $genes: [String]!) {geneSet(id: $id) {
    overlap(genes: $genes) {
      nodes {
        symbol
        ncbiGeneId
        description
        summary
      }   }}}"""
    }
    
    headers = {
        "Accept": "application/json",
        "Content-Type": "application/json"
    }

    response = requests.post(url, data=json.dumps(query), headers=headers)
    
    response.raise_for_status()
    res = response.json()
    return [item['symbol'] for item in res['data']['geneSet']['overlap']['nodes']]

def get_up_dn_overlap(genes_up: list[str], genes_down: list[str], id_up: str, id_down: str, overlap_type: str,  url="http://l2s2.maayanlab.cloud/graphql"):
    if overlap_type == 'mimickers':
        up_up_overlap = get_overlap(genes_up, id_up, url)
        dn_dn_overlap = get_overlap(genes_down, id_down, url)
        return list(set(up_up_overlap) | set(dn_dn_overlap))
    elif overlap_type == 'reversers':
        up_dn_overlap = get_overlap(genes_up, id_down, url)
        dn_up_overlap = get_overlap(genes_down, id_up, url)
        return list(set(up_dn_overlap) | set(dn_up_overlap))
    
def add_user_geneset(geneset, geneset_dn = None, url="http://l2s2.maayanlab.cloud/graphql"):
    query = {
            "query": "mutation AddUserGeneSet($genes: [String] = [\"AKT1\"], $description: String = \"\") {\n  addUserGeneSet(input: {genes: $genes, description: $description}) {\n    userGeneSet {\n      id\n    }\n  }\n}",
            "variables": {
                "genes": geneset,
                "description": "User gene set" if geneset_dn is not None else "User gene set (up)"
            },
            "operationName": "AddUserGeneSet"
    }
    
    headers = {
        "Accept": "application/json",
        "Content-Type": "application/json"
    }

    response = requests.post(url, data=json.dumps(query), headers=headers)
    
    response.raise_for_status()
    res = response.json()
    
    if geneset_dn is not None:
        query = {
            "query": "mutation AddUserGeneSet($genes: [String] = [\"AKT1\"], $description: String = \"\") {\n  addUserGeneSet(input: {genes: $genes, description: $description}) {\n    userGeneSet {\n      id\n    }\n  }\n}",
            "variables": {
                "genes": geneset_dn,
                "description": "User gene set (down)"
            },
            "operationName": "AddUserGeneSet"
        }
        
        response = requests.post(url, data=json.dumps(query), headers=headers)
        
        response.raise_for_status()
        res_dn = response.json()
        return res['data']['addUserGeneSet']['userGeneSet']['id'], res_dn['data']['addUserGeneSet']['userGeneSet']['id']
    
    return res['data']['addUserGeneSet']['userGeneSet']['id']

def get_l2s2_valid_genes(genes: list[str], url="http://l2s2.maayanlab.cloud/graphql"):
    query = {
    "query": """query GenesQuery($genes: [String]!) {
        geneMap2(genes: $genes) {
            nodes {
                gene
                geneInfo {
                    symbol
                    }
                }
            }
        }""",
    "variables": {"genes": genes},
    "operationName": "GenesQuery"
    }
    
    headers = {
        "Accept": "application/json",
        "Content-Type": "application/json"
    }

    response = requests.post(url, data=json.dumps(query), headers=headers)

    response.raise_for_status()
    res = response.json()
    return [g['geneInfo']['symbol'] for g in res['data']['geneMap2']['nodes'] if g['geneInfo'] != None]


# Drug Consensus Appyter
The provided gene set or a pair of up/down gene sets is submitted to L2S2 [1] and DRUG-seqr [2] to identify FDA approved drugs that mimic or reverse their expression. Overlapping drugs are extracted from the significantly overlapping FDA-approved L2S2 signatures, and FDA-approved DRUG-seqr signatures producing a list of final candidate drugs. To rank the overlapping drugs, the top ranking signature for each is used to compute a mean p-value from L2S2 and DRUG-seqr. Additionally, the targets and mechanisms of actions (MOAs) of the consensus drugs are assessed using DrugEnrichr [3]. The overlapping genes of the top ranked drugs from the L2S2 and DRUG-seqr are also submitted to Enrichr [4] to identify affected pathways and biological processes.

In [None]:
geneset = get_l2s2_valid_genes(geneset)
geneset_dn = get_l2s2_valid_genes(geneset_dn) if use_up_down else []

In [None]:
if use_up_down:
    if len(geneset) == 0 or len(geneset_dn) == 0:
        raise ValueError("Insufficient genes in the input gene sets that overlap with the L2S2 database.")
else:
    if len(geneset) == 0:
        raise ValueError("Insufficient genes in the input gene set that overlap with the L2S2 database.")

In [None]:
if use_up_down:
    l2s2_geneset_up_id, l2s2_geneset_dn_id = add_user_geneset(geneset, geneset_dn=geneset_dn)
    drugseqr_geneset_up_id, drugseqr_geneset_dn_id = add_user_geneset(geneset, geneset_dn=geneset_dn, url="http://drugseqr.maayanlab.cloud/graphql")
    l2s2_df = enrich_up_down(geneset, geneset_dn, first=500)
    drugseqr_df = enrich_up_down(geneset, geneset_dn, url="http://drugseqr.maayanlab.cloud/graphql", first=500)
    
    l2s2_df_nofda = enrich_up_down(geneset, geneset_dn, first=500, fda_approved=False)
    drugseqr_df_nofda = enrich_up_down(geneset, geneset_dn, url="http://drugseqr.maayanlab.cloud/graphql", first=500, fda_approved=False)
else:
    l2s2_geneset_id = add_user_geneset(geneset)
    drugseqr_geneset_id = add_user_geneset(geneset, url="http://drugseqr.maayanlab.cloud/graphql")
    
    l2s2_df = enrich_single_set(geneset, first=500)
    drugseqr_df = enrich_single_set(geneset, url="http://drugseqr.maayanlab.cloud/graphql", first=500)
    
    l2s2_df_nofda = enrich_single_set(geneset, first=500, fda_approved=False)
    drugseqr_df_nofda = enrich_single_set(geneset, url="http://drugseqr.maayanlab.cloud/graphql", first=500, fda_approved=False)

In [None]:
if l2s2_df.empty or drugseqr_df.empty:
    raise ValueError("No results found for the provided gene set(s).")

In [None]:
l2s2_df['perturbation'] = l2s2_df['term'].apply(lambda x: x.split('_')[4].lower() if len(x.split('_')) > 4 else None)
drugseqr_df['perturbation'] = drugseqr_df['term'].apply(lambda x: x.split('_')[0].lower() if len(x.split('_')) > 0 else None)
l2s2_df_nofda['perturbation'] = l2s2_df_nofda['term'].apply(lambda x: x.split('_')[4] if len(x.split('_')) > 4 else None)
drugseqr_df_nofda['perturbation'] = drugseqr_df_nofda['term'].apply(lambda x: x.split('_')[0].lower() if len(x.split('_')) > 0 else None)

## L2S2 and DRUG-seqr Enriched Signatures

In [None]:
if use_up_down:
    display(l2s2_df[['perturbation', 'term', 'pvalueMimic', 'adjPvalueMimic', 'oddsRatioMimic', 'mimickerOverlap', 'pvalueReverse', 'adjPvalueReverse', 'oddsRatioReverse','reverserOverlap',  'approved', 'count']])
    display(Markdown(f"**Table {tab_num}.** Ranked FDA-approved LINCS L1000 signatures predicted to {direction} the uploaded geneset."))
    display(HTML(f"<a href=\"https://l2s2.maayanlab.cloud/enrichpair?dataset={l2s2_geneset_up_id}&dataset={l2s2_geneset_dn_id}&fda=true&dir={direction_str.strip()}&sort={'pvalue_reverse' if direction_str == 'down' else 'pvalue_mimic'}\" target=\"_blank\">View in L2S2</a>"))
    tab_num += 1
else:
    display(l2s2_df[['perturbation', 'term', 'pvalue', 'adjPvalue', 'oddsRatio', 'nOverlap', 'approved', 'count']])
    display(Markdown(f"**Table {tab_num}.** Ranked FDA-approved LINCS L1000 signatures predicted to {direction} the uploaded geneset."))
    display(HTML(f"<a href=\"https://l2s2.maayanlab.cloud/enrich?dataset={l2s2_geneset_id}&fda=true&dir={direction_str.strip()}\" target=\"_blank\">View in L2S2</a>"))
    tab_num += 1
l2s2_df.to_csv('l2s2_enrichment_fda-approved.tsv', sep='\t')
display(FileLink('l2s2_enrichment_fda-approved.tsv', result_html_prefix="Download: "))

In [None]:
if use_up_down:
    display(l2s2_df_nofda[['perturbation', 'term', 'pvalueMimic', 'adjPvalueMimic', 'oddsRatioMimic', 'mimickerOverlap', 'pvalueReverse', 'adjPvalueReverse', 'oddsRatioReverse','reverserOverlap',  'approved', 'count']])
    display(Markdown(f"**Table {tab_num}.** Ranked LINCS L1000 signatures predicted to {direction} the uploaded geneset."))
    display(HTML(f"<a href=\"https://l2s2.maayanlab.cloud/enrichpair?dataset={l2s2_geneset_up_id}&dataset={l2s2_geneset_dn_id}&fda=false&dir={direction_str.strip()}&sort={'pvalue_reverse' if direction_str == 'down' else 'pvalue_mimic'}\" target=\"_blank\">View in L2S2</a>"))
    tab_num += 1
else:
    display(l2s2_df_nofda[['perturbation', 'term', 'pvalue', 'adjPvalue', 'oddsRatio', 'nOverlap', 'approved', 'count']])
    display(Markdown(f"**Table {tab_num}.** Ranked LINCS L1000 signatures predicted to {direction} the uploaded geneset."))
    display(HTML(f"<a href=\"https://l2s2.maayanlab.cloud/enrich?dataset={l2s2_geneset_id}&dir={direction_str.strip()}\" target=\"_blank\">View in L2S2</a>"))
    tab_num += 1
l2s2_df_nofda.to_csv('l2s2_enrichment_all.tsv', sep='\t')
display(FileLink('l2s2_enrichment_all.tsv', result_html_prefix="Download: "))

In [None]:

if use_up_down:
    display(drugseqr_df[['perturbation', 'term', 'pvalueMimic', 'adjPvalueMimic', 'oddsRatioMimic', 'mimickerOverlap', 'pvalueReverse', 'adjPvalueReverse', 'oddsRatioReverse','reverserOverlap',  'approved', 'count']])
    display(Markdown(f"**Table {tab_num}.** Ranked FDA-approved DRUG-seq signatures predicted to {direction} the uploaded geneset."))
    display(HTML(f"<a href=\"https://drugseqr.maayanlab.cloud/enrichpair?dataset={drugseqr_geneset_up_id}&dataset={drugseqr_geneset_dn_id}&fda=true&dir={direction_str.strip()}&sort={'pvalue_reverse' if direction_str == 'down' else 'pvalue_mimic'}\" target=\"_blank\">View in DRUG-seqr</a>"))
    tab_num += 1
else:
    display(drugseqr_df[['perturbation', 'term', 'pvalue', 'adjPvalue', 'oddsRatio', 'nOverlap', 'approved', 'count']])
    display(Markdown(f"**Table {tab_num}.** Ranked FDA-approved DRUG-seq signatures predicted to {direction} the uploaded geneset."))
    display(HTML(f"<a href=\"https://drugseqr.maayanlab.cloud/enrich?dataset={drugseqr_geneset_id}&fda=true&dir={direction_str.strip()}\" target=\"_blank\">View in DRUG-seqr</a>"))
    tab_num += 1
drugseqr_df.to_csv('drugseqr_enrichment_fda-approved.tsv', sep='\t')
display(FileLink('drugseqr_enrichment_fda-approved.tsv', result_html_prefix="Download: "))

In [None]:

if use_up_down:
    display(drugseqr_df_nofda[['perturbation', 'term', 'pvalueMimic', 'adjPvalueMimic', 'oddsRatioMimic', 'mimickerOverlap', 'pvalueReverse', 'adjPvalueReverse', 'oddsRatioReverse','reverserOverlap',  'approved', 'count']])
    display(Markdown(f"**Table {tab_num}.** Ranked DRUG-seq signatures predicted to {direction} the uploaded geneset."))
    display(HTML(f"<a href=\"https://drugseqr.maayanlab.cloud/enrichpair?dataset={drugseqr_geneset_up_id}&dataset={drugseqr_geneset_dn_id}&fda=false&dir={direction_str.strip()}&sort={'pvalue_reverse' if direction_str == 'down' else 'pvalue_mimic'}\" target=\"_blank\">View in DRUG-seqr</a>"))
    tab_num += 1
else:
    display(drugseqr_df_nofda[['perturbation', 'term', 'pvalue', 'adjPvalue', 'oddsRatio', 'nOverlap', 'approved', 'count']])
    display(Markdown(f"**Table {tab_num}.** Ranked DRUG-seq signatures predicted to {direction} the uploaded geneset."))
    display(HTML(f"<a href=\"https://drugseqr.maayanlab.cloud/enrich?dataset={drugseqr_geneset_id}&fda=false&dir={direction_str.strip()}\" target=\"_blank\">View in DRUG-seqr</a>"))
    tab_num += 1
drugseqr_df_nofda.to_csv('drugseqr_enrichment_all.tsv', sep='\t')
display(FileLink('drugseqr_enrichment_all.tsv', result_html_prefix="Download: "))

## L2S2 & DRUG-seqr Overlap

In [None]:
display(HTML(f"<p>{len(l2s2_df)} significant drug signatures were identified with L2S2 and {len(drugseqr_df)} significant drug signatures were identified with DRUG-seqr.</p>"))

In [None]:
overlap = list(set(l2s2_df['perturbation'].values).intersection(set(drugseqr_df['perturbation'].values)))
union = list(set(l2s2_df['perturbation'].values).union(set(drugseqr_df['perturbation'].values)))

a = len(overlap)
b = len(set(drugseqr_df['perturbation'].values)) - a
c = len(set(l2s2_df['perturbation'].values)) - a
d = 3312 - a - b - c # 3312 total number of drugs in drugseqr
oddsratio, p_value = fisher_exact([[a, b], [c, d]], alternative='two-sided')

venn2([set(l2s2_df['perturbation'].values), set(drugseqr_df['perturbation'].values)], ['LINCS L1000', 'DRUG-seq'])
plt.savefig('l2s2_drugseqr_overlap.png', dpi=300, bbox_inches='tight')
plt.savefig('l2s2_drugseqr_overlap.svg', dpi=300, bbox_inches='tight')
plt.show()
display(Markdown(f"**Figure {fig_num}.** Overlapping drugs identified from querying L2S2 and DRUG-seqr (Fisher's exact test, p-value: {p_value.round(4)})."))
fig_num +=1
display(FileLink('l2s2_drugseqr_overlap.png', result_html_prefix="Download PNG: "))
display(FileLink('l2s2_drugseqr_overlap.svg', result_html_prefix="Download SVG: "))

In [None]:
drugseqr_df.set_index('perturbation', inplace=True)
l2s2_df.set_index('perturbation', inplace=True)

In [None]:

pvalue_name = 'adjPvalue'
if use_up_down and direction == 'mimickers':
    pvalue_name = 'adjPvalueMimic'
elif use_up_down and direction == 'reversers':
    pvalue_name = 'adjPvalueReverse'
    

drugseqr_overlap = drugseqr_df[drugseqr_df.index.isin(overlap)][pvalue_name].groupby('perturbation').first()
l2s2_overlap = l2s2_df[l2s2_df.index.isin(overlap)][pvalue_name].groupby('perturbation').first()
combined_overlap = (drugseqr_overlap + l2s2_overlap) / 2

combined_overlap_df = combined_overlap.to_frame()
combined_overlap_df.columns = ['adj. p-value']
combined_overlap_df['-log10(mean adj. p-value)'] = combined_overlap_df['adj. p-value'].map(lambda x: -np.log10(x))

combined_overlap_df = combined_overlap_df.sort_values(by="-log10(mean adj. p-value)", ascending=True)
plt.barh(combined_overlap_df.index.values, combined_overlap_df['-log10(mean adj. p-value)'], color='black')
plt.xlabel('-log10(mean adj. p-value)')
plt.savefig('l2s2_drugseqr_overlap_barchart.png', dpi=300, bbox_inches='tight')
plt.savefig('l2s2_drugseqr_overlap_barchart.svg', dpi=300, bbox_inches='tight')
plt.show()
display(Markdown(f"**Figure {fig_num}.** Overlapping drugs identified from querying L2S2 and DRUG-seqr mean significance of the top ranked signatures."))
display(FileLink('l2s2_drugseqr_overlap_barchart.png', result_html_prefix="Download PNG: "))
display(FileLink('l2s2_drugseqr_overlap_barchart.svg', result_html_prefix="Download SVG: "))
fig_num +=1

In [None]:
def add_list(entities: list, description = 'overlap drug list', source='DrugEnrichr'):
    ENRICHR_URL = f'http://amp.pharm.mssm.edu/{source}/addList'
    entities_str = '\n'.join(entities)
    payload = {
        'list': (None, entities_str),
        'description': (None, description)
    }

    response = requests.post(ENRICHR_URL, files=payload)
    if not response.ok:
        raise Exception('Error analyzing drug list')

    data = json.loads(response.text)
    return data

def enrichr_figure(res_list: list, color='pink'): 
    all_terms,all_pvalues, all_adjusted_pvalues, all_libraries = res_list
    # Bar colors
    bar_color_not_sig = 'lightgrey'
    bar_color = color
    edgecolor=None
    linewidth=0
    fig, axes = plt.subplots(nrows=len(all_libraries), ncols=1)
    
    for i, library_name in enumerate(all_libraries):
        bar_colors = [bar_color if (x < 0.05) else bar_color_not_sig for x in all_pvalues[i]]
        sns.barplot(x=np.log10(all_pvalues[i])*-1, y=all_terms[i],ax=axes[i], palette=bar_colors, edgecolor=edgecolor, linewidth=1)
        axes[i].axes.get_yaxis().set_visible(False)
        axes[i].set_title(library_name.replace('_',' '),fontsize=30)
        if i == len(all_libraries)-1:
            axes[i].set_xlabel('-Log10(p-value)',fontsize=30)
        axes[i].xaxis.set_major_locator(MaxNLocator(integer=True))
        axes[i].tick_params(axis='x', which='major', labelsize=20)
        if max(np.log10(all_pvalues[i])*-1)<1:
            axes[i].xaxis.set_ticks(np.arange(0, max(np.log10(all_pvalues[i])*-1), 0.1))
        for ii,annot in enumerate(all_terms[i]):
            if all_adjusted_pvalues[i][ii] < 0.05:
                annot = '  *'.join([annot, str(str(np.format_float_scientific(all_pvalues[i][ii],precision=2)))]) 
            else:
                annot = '  '.join([annot, str(str(np.format_float_scientific(all_pvalues[i][ii],precision=2)))])

            title_start= max(axes[i].axes.get_xlim())/200
            axes[i].text(title_start,ii,annot,ha='left',wrap = True, fontsize = 30)
    plt.subplots_adjust(bottom=-4.8, right = 4.7,wspace = 0.03,hspace = 0.2)
    plt.show()
    return fig


def enrich_libraries(user_list_id: str, all_libraries: list = ['DrugCentral_Target', 'Drug_Repurposing_Hub_Target', 'Drug_Repurposing_Hub_Mechanism_of_Action'], source='DrugEnrichr'):
    all_terms = []
    all_pvalues =[] 
    all_adjusted_pvalues = []
    library_success = []
    all_sig_results = []
    
    for library_name in all_libraries: 
        ENRICHR_URL = f'http://amp.pharm.mssm.edu/{source}/enrich'
        query_string = '?userListId=%s&backgroundType=%s'
        gene_set_library = library_name
        response = requests.get(
            ENRICHR_URL + query_string % (user_list_id, gene_set_library)
         )
        if not response.ok:
            raise Exception('Error fetching enrichment results')
        try:
            data = json.loads(response.text)
            results_df_og  = pd.DataFrame(data[library_name][0:5])
            results_df_full  = pd.DataFrame(data[library_name])
            results_df_full.columns = ['Rank in Lib', 'Term', 'P-value', 'Odds Ratio', 'Combined Score', 'Overlapping genes', 'Adjusted p-value', 'Old p-value', 'Old adjusted p-value']
            results_df_full = results_df_full[results_df_full['P-value'] < 0.05][['Rank in Lib', 'Term', 'P-value', 'Adjusted p-value', 'Combined Score', 'Overlapping genes']]
            all_sig_results.append(results_df_full)
            results_df_full = results_df_full.sort_values('P-value', ascending=True)
            results_df = results_df_full.head(5)
            all_terms.append(list(results_df['Term']))
            all_pvalues.append(list(results_df['P-value']))
            all_adjusted_pvalues.append(list(results_df['Adjusted p-value']))
            library_success.append(library_name)
        except Exception as e:
            print('Error for ' + library_name + ' library:', e)
        time.sleep(1)

    return [all_terms, all_pvalues, all_adjusted_pvalues, library_success], all_sig_results

## Drugs Analysis with Enrichr

In [None]:
overlap_drugenrichr = add_list(overlap)
res_list, all_results = enrich_libraries(overlap_drugenrichr['userListId'])
fig = enrichr_figure(res_list, color='hotpink')
fig.savefig('DrugEnrichr_overlap_barplots.png', dpi=300, bbox_inches='tight')
fig.savefig('DrugEnrichr_overlap_barplots.svg', dpi=300, bbox_inches='tight')
display(Markdown(f"**Figure {fig_num}.** Drug target and mechanism of action enrichments for the (n={len(overlap)}) overlapping drugs."))
display(HTML(f"<a href=\"https://maayanlab.cloud/DrugEnrichr/enrich?dataset={overlap_drugenrichr['shortId']}\" target=\"_blank\">View in DrugEnrichr</a>"))
display(FileLink('DrugEnrichr_overlap_barplots.png', result_html_prefix="Download PNG: "))
display(FileLink('DrugEnrichr_overlap_barplots.svg', result_html_prefix="Download SVG: "))
fig_num +=1

In [None]:
union_drugenrichr = add_list(union, description='union drugs')
res_list, all_results = enrich_libraries(union_drugenrichr['userListId'])
fig = enrichr_figure(res_list, color='hotpink')
fig.savefig('DrugEnrichr_union_barplots.png', dpi=300, bbox_inches='tight')
fig.savefig('DrugEnrichr_union_barplots.svg', dpi=300, bbox_inches='tight')
display(Markdown(f"**Figure {fig_num}.** Drug target and mechanism of action enrichments for the (n={len(union)}) total unique drugs."))
display(HTML(f"<a href=\"https://maayanlab.cloud/DrugEnrichr/enrich?dataset={union_drugenrichr['shortId']}\" target=\"_blank\">View in DrugEnrichr</a>"))
display(FileLink('DrugEnrichr_union_barplots.png', result_html_prefix="Download PNG: "))
display(FileLink('DrugEnrichr_union_barplots.svg', result_html_prefix="Download SVG: "))
fig_num +=1

In [None]:
top_drug = combined_overlap_df.index[-1]

if use_up_down:
    l2s2_id_up = l2s2_df.loc[[top_drug]].iloc[0]['geneSetIdUp']
    l2s2_id_dn = l2s2_df.loc[[top_drug]].iloc[0]['geneSetIdDown']
    drugseqr_id_up = drugseqr_df.loc[[top_drug]].iloc[0]['geneSetIdUp']
    drugseqr_id_dn = drugseqr_df.loc[[top_drug]].iloc[0]['geneSetIdDown']
    
    l2s2_overlap = get_up_dn_overlap(genes_down=geneset_dn, genes_up=geneset, id_up=l2s2_id_up, id_down=l2s2_id_dn, overlap_type=direction)
    drugseqr_overlap = get_up_dn_overlap(genes_down=geneset_dn, genes_up=geneset, id_up=drugseqr_id_up, id_down=drugseqr_id_dn, overlap_type=direction, url="http://drugseqr.maayanlab.cloud/graphql")
else:
    l2s2_id = l2s2_df.loc[[top_drug]].iloc[0]['id']
    drugseqr_id = drugseqr_df.loc[[top_drug]].iloc[0]['id']
    l2s2_overlap = get_overlap(geneset, l2s2_id)
    drugseqr_overlap = get_overlap(geneset, drugseqr_id, url="http://drugseqr.maayanlab.cloud/graphql")

## Genes Analysis with Enrichr

In [None]:
overlap_genes = list(set(drugseqr_overlap).intersection(l2s2_overlap))
union_genes = list(set(drugseqr_overlap).union(l2s2_overlap))

if len(overlap_genes) >= 1 and len(overlap_genes) <= 50:
    display(HTML(f"<p>Genes identified in both top overlaps: " + '\n'.join(overlap_genes)+ "</p>"))
else:
    display(HTML(f"<p>Union of genes identified in the top overlaps for {top_drug}: " + '\n'.join(union_genes)+ "</p>"))

In [None]:
if len(overlap_genes) >= 3:  
    overlap_enrichr = add_list(overlap_genes, source="Enrichr", description=f"overlapping genes top {top_drug} signature")
    res_list, all_results = enrich_libraries(overlap_enrichr['userListId'], source='Enrichr', all_libraries=['WikiPathway_2023_Human', 'GO_Biological_Process_2023', 'MGI_Mammalian_Phenotype_Level_4_2021'])
    fig = enrichr_figure(res_list, color='tomato')
    fig.savefig('Enrichr_overlap_barplots.png', dpi=300, bbox_inches='tight')
    fig.savefig('Enrichr_overlap_barplots.svg', dpi=300, bbox_inches='tight')
    display(Markdown(f"**Figure {fig_num}.** Overlapping genes (n={len(overlap_genes)}) from the top enriched {top_drug} signature from L2S2 and DRUG-seqr. https://maayanlab.cloud/Enrichr/enrich?dataset={overlap_enrichr['shortId']}"))
    display(HTML(f"<a href=\"https://maayanlab.cloud/Enrichr/enrich?dataset={overlap_enrichr['shortId']}\" target=\"_blan\">View in Enrichr</a>"))
    display(FileLink('Enrichr_overlap_barplots.png', result_html_prefix="Download PNG: "))
    display(FileLink('Enrichr_overlap_barplots.svg', result_html_prefix="Download SVG: "))
    fig_num += 1

In [None]:
union_enrichr = add_list(union_genes, source="Enrichr", description=f"union overlapping genes from top {top_drug} signatures")
res_list, all_results = enrich_libraries(union_enrichr['userListId'], source='Enrichr', all_libraries=['WikiPathway_2023_Human', 'GO_Biological_Process_2023', 'MGI_Mammalian_Phenotype_Level_4_2021'])
fig = enrichr_figure(res_list, color='tomato')
fig.savefig('Enrichr_union_barplots.png', dpi=300, bbox_inches='tight')
fig.savefig('Enrichr_union_barplots.svg', dpi=300, bbox_inches='tight')
display(Markdown(f"**Figure {fig_num}.** Union of overlapping genes (n={len(union_genes)}) from the top enriched {top_drug} signature from L2S2 and DRUG-seqr."))
display(HTML(f"<a href=\"https://maayanlab.cloud/Enrichr/enrich?dataset={union_enrichr['shortId']}\" target=\"_blank\">View in Enrichr</a>"))
display(FileLink('Enrichr_union_barplots.png', result_html_prefix="Download PNG: "))
display(FileLink('Enrichr_union_barplots.svg', result_html_prefix="Download SVG: "))
fig_num += 1

## References

[1]  Marino GB, Evangelista JE, Clarke DJB, Ma’ayan A. L2S2: chemical perturbation and CRISPR KO LINCS L1000 signature search engine. Nucleic Acids Res. 2025; gkaf373.

[2]  Li J, Ho DJ, Henault M, Yang C, Neri M, Ge R, et al. DRUG-seq provides unbiased biological activity readouts for neuroscience drug discovery. ACS Chem Biol. 2022;17: 1401–1414.

[3]  Kropiwnicki E, Evangelista JE, Stein DJ, Clarke DJB, Lachmann A, Kuleshov MV, et al. Drugmonizome and Drugmonizome-ML: integration and abstraction of small molecule attributes for drug enrichment analysis and machine learning. Database (Oxford). 2021;2021.

[4]  Chen EY, Tan CM, Kou Y, Duan Q, Wang Z, Meirelles GV, Clark NR, Ma'ayan A. Enrichr: interactive and collaborative HTML5 gene list enrichment analysis tool. BMC Bioinformatics. 2013 Apr 15;14:128.