In [3]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

# L1000 Knock-Down Display (L1000KD2)

This appyter uses 24,187 gene knockdown induced differential gene expression signatures that were created using L1000 microarrays. Each signature has one gene knocked down, and roughly 2,991 different genes were knocked down in this data. The raw data can be found here: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE92742.


Signatures vary in three ways:
1. The gene that was knocked down (2,991 different types)
2. The cell line used (15 different cell lines)
3. The time after gene knockdown that the L1000 microarray was performed (5 different times)

In [4]:
import pandas as pd
import IPython
from IPython.display import HTML, display, Markdown, IFrame
import numpy as np
import json
import requests
%matplotlib inline
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from react_scatter_board.jupyter_compat import ScatterBoard
from scipy import stats
from fisher import pvalue

In [5]:
%%appyter hide_code

{% do SectionField(name='section1', title = 'Submit a Signature to L1000KD2', subtitle = 'Project your signature onto the L1000KD2 Map', img = 'geneicon.png')%}

{% do DescriptionField( 
    name='description',
    text='The examples below allow you to test out the appyter. To get the full experience, search for a signature or add your own signature. <br><br><b>Note: You can only input one signature, if you would like to use your own signature, leave the search input blank.</b>',
    section='section1'
) %}

In [6]:
%%appyter code_exec


{% set search_input =  StringField(name = 'search_input', label = 'Fetch a Signature', default = 'Breast Cancer', description = 'Type in the name of a Disease, Small Molecule, or gene. You can identify this name using the following link: http://amp.pharm.mssm.edu/CREEDS/#query/7dc48de025979255c03cb42c1ddc3dca', section = 'section1') %}

{% do DescriptionField( 
    name='desc',
    text='<center><b>Cut and paste your up and down gene sets here</b></center>',
    section='section1'
) %}

{% set up_gene_input =  TextField(name='up_gene_input', label='Upregulated Genes', default = 'PCGF5\nMX1\nDNAAF1\nMCTS1\nMX2\nLOC643802\nMARCKS\nCD4\nSCGB1A1\nRBP4\nFAR2\nHNRNPC\nTNFSF8\nCDA\nPI4K2B\nIDO1\nCKLF\nSF3B4\nSAMD9\nC1ORF194\nSERPINE2\nC3AR1\nCLDN2\nTMEM71\nCLECL1\nAPOL6\nGNG2\nMILR1\nLINC00639\nGNG5\nGBP1\nGBP3\nPROK2\nGBP5\nS100A12\nEPSTI1\nS100A11\nGBP4\nF8A1\nJUND\nHLA-DRB5\nC9ORF66\nBIN2\nTREML4\nPLBD1\nST20\nSNORA48\nPSME3\nSNORA74B\nHAMP\nGAPT\nCRYAB\nFREM2\nPADI2\nMS4A7\nGABARAP\nSPI1\nGMFG\nHPR\nSAMSN1\nGATA1\nGLDN\nPLAC8\nZC3H12A\nBLOC1S1\nFLJ45513\nAGMO\nHESX1\nLYN\nSCARNA21\nPHF11\nTET2\nTIMM23\nAPBB1IP\nSULT1B1\nLINC00605\nC4ORF3\nSIGLEC5\nHCST\nSIGLEC7\nSERPINA1\nZC3HAV1\nFPR1\nIQGAP1\nLITAF\nVSIG10L\nTRIM5\nSTAP1\nSH3BGRL3\nP2RY13\nP2RY14\nSF3A2\nSTAT1\nSTAT2\nSTAT4\nWNT7B\nRNASE2\nSERPINB9\nF5\nTMEM154\nGPR141\nPSMA4\nDHRS9\nTAOK3\nPLP2\nPELI1\nCCL11\nCTF1\nPIGR\nSNX10\nIFIH1\nGNLY\nNAMPT\nGCNT3\nMPV17\nMETTL7B\nJAK3\nCCL19\nPGLYRP1\nCCL18\nOLR1\nTLE4\nCD164\nGK\nLOC388813\nTOR1B\nTPM4\nPOU2AF1\nNEAT1\nPNRC1\nSLC9B2\nCXCL10\nCXCL11\nCLEC4D\nCLEC4E\nCLEC4A\nCLC\nLY6E\nANKRD22\nS100A9\nFAM8A1\nS100A8\nVKORC1L1\nCSF3R\nGTF2B\nHP\nUCP2\nGPR65\nTNF\nCXCL16\nCXCL17\nRNU11\nBCL2L14\nRNU12\nLOC399715\nCDH2\nRNF19B\nLAMP3\nCFL1\nNBN\nSLC39A8\nCERS6\nLY86\nADRA2A\nSTK24\nLCN2\nTPP1\nXAF1\nSP140L\nNOP10\nMAP3K13\nKCNE1\nNCF4\nGPR84\nSAT1\nCYP19A1\nFAM216B\nMMP25\nAMDHD1\nPIM2\nMAP2K6\nRPH3A\nMGC12916\nGPX2\nEIF2AK2\nTRPA1\nDBNDD1\nFCGR2A\nLCP1\nBCL6\nBCL3\nSLC26A8\nLCP2\nFCGR2C\nSFT2D1\nLRRK2\nLY96\nTREM1\nTARP\nC15ORF48\nFCGR3A\nFCGR3B\nPSTPIP2\nLBP\nSERF2\nMROH8\nCR1\nPATL1\nKCNH7\nCHIT1\nRP2\nEVI2B\nMNDA\nTRIM38\nSFTPA1\nBPI\nSFTPA2\nPIK3AP1\nSMAP2\nCCDC71L\nWIPF1\nC1S\nFHL3\nDDX60L\nARRB2\nTREML3P\nFCAR\nTNFSF13B\nCLEC1A\nFCGR1A\nCCL8\nCCL7\nCCL4\nCCL3\nCCL2\nIGF2BP3\nATP7B\nFCGR1B\nTRIM21\nARL5B\nKCNJ2\nTRIM22\nTMEM52B\nSP110\nIL1R2\nACSL1\nRSAD2\nITGB2-AS1\nPML\nSNTN\nGBP1P1\nLPAR6\nXRN1\nCOL9A2\nEIF1B\nPFN1\nALPK1\nIFITM2\nIFITM3\nIFITM1\nFGL2\nAQP9\nGLRX\nIL2RG\nUBE2L6\nSTX11\nHERC6\nACTB\nHERC5\nSLAMF7\nB2M\nCDR1\nNTNG2\nRABGAP1L\nHLA-J\nBCL11A\nCMC2\nTNFSF14\nHLA-C\nHLA-A\nHLA-B\nIFI44\nVASP\nVNN2\nVNN3\nRGL4\nCYSTM1\nAPOBEC3A\nLST1\nIL1RN\nANP32A\nIFIT5\nDCUN1D3\nKLHDC7B\nIFIT2\nIFIT1\nIFI30\nIFIT3\nFTH1\nMIER1\nTNFSF10\nFFAR2\nLGALS9\nARSE\nTNS4\nCCR1\nPSENEN\nSIGLEC14\nNCOA7\nCCDC114\nNMI\nRHOH\nHBA1\nTFEC\nLILRB2\nHBA2\nCYP7B1\nLILRB3\nKLK10\nNME1\nSYN1\nIL1A\nCEACAM3\nTLR1\nIL1B\nPTPRC\nKCNJ2-AS1\nCEACAM1\nNME8\nCEACAM6\nRNU6ATAC\nTLR4\nTENM1\nIGSF6\nDAZAP2\nHBB\nTOP2A\nBRCA2\nCTSS\nCHRAC1\nCLEC7A\nPDZD11\nCD38\nCD37\nFCER1G\nCTSE\nSPRR2E\nCAMP\nCTSC\nCD53\nGCH1\nHCK\nPARP9\nFKBP1A\nCD48\nSAA1\nOR7E14P\nPOPDC3\nC1QC\nC1QB\nCSTA\nWAS\nLILRA1\nPTMS\nCST7\nLILRA5\nLILRA6\nZDHHC19\nSPTLC2\nDMBT1\nRAB8A\nRAC2\nENPP6\nRAB8B\nCD55\nCORIN\nLMO2\nCA12\nZBP1\nCD74\nGABRP\nG6PD\nGCA\nHRASLS2\nTYROBP\nPDGFRL\nIFI44L\nDOK3\nCAPZA1\nMYADM\nCD69\nYPEL3\nFCN1\nTFAP2A\nCNTN1\nRGS18\nTPSB2\nNAPA\nNEXN\nIFI6\nSAMD9L\nSTRN4\nBASP1\nREPS2\nMGAM\nATP6V0C\nDDX58\nCYBB\nISG15\nUSB1\nHIST2H2BE\nDLGAP1-AS1\nMXD1\nALDH3A1\nSELL\nCDHR5\nCARD16\nCARD17\nC5AR1\nCACNA1A\nPSEN1\nSEZ6L2\nCSF2RB\nCPLX2\nDAPP1\nDDX60\nHCAR3\nIL18RAP\nHCAR2\nFDCSP\nFLOT1\nZNF385A\nABCC2\nSRGN\nMMP8\nCBX4\nPILRA\nOLIG1\nANXA2R\nOAS1\nOAS2\nOAS3\nTNIP3\nIRF2\nALPL\nCHI3L2\nIRF9\nMNS1\nIRF7\nZMYND12\nNFKBID\nCHI3L1\nHS3ST3A1\nRTN3\nGPSM3\nRNU4ATAC\nYWHAE\nMCTP2\nHSPB6\nBCL2A1\nTNFAIP6\nDNAH7\nLMNB1\nSIRPB1\nCA1\nC20ORF203\nCASP4\nCASP5\nSP140\nPDE4B\nCASP1\nCYTIP\nRBM11\nALOX5AP\nVTRNA1-1\nYWHAZ\nSOD2\nFGR\nAIM2\nHSH2D\nC19ORF66\nPPDPF\nS100P\nDYNLT1\nHIST1H2BC\nLTF\nHIST1H2BD\nUSP15\nSTEAP4\nRTP4\nPLEK\nCLEC12A\nCXORF21\nSIX1\nMSMO1\nOASL\nARHGDIB\nVSIG1\nPTK2B\nMDH1B', description='Paste the Upregulated Genes from your signature (One gene per row).', section = 'section1') %}
    {% set down_gene_input = TextField(name='down_gene_input', label='Downregulated Genes', default= 'STARD3\nRBFOX2\nUNC13B\nPRELID1\nSTARD9\nZNF160\nWDR74\nPRKAB1\nDKK3\nCHFR\nNISCH\nDDX39A\nPSMC5\nPSMC3\nC19ORF48\nCYP1A1\nPSMC1\nDCHS1\nASB1\nVPS28\nSF3B5\nPDXK\nAHNAK\nCOL12A1\nADCY4\nSPG7\nDLL1\nDCAF5\nTPRG1L\nADCY6\nDLL4\nPSMD6\nMYC\nPSMD2\nPXDC1\nS100A13\nZSCAN18\nFZD4\nKLF13\nTSPYL2\nRPL41\nZNF263\nPHC2\nNEBL\nPMM2\nNCAPH2\nMCAM\nSTARD10\nVPS13D\nVPS37B\nNR1D2\nTFIP11\nFIBP\nBCR\nUCK2\nQARS\nITPA\nSIVA1\nSNAI1\nHBP1\nMDC1\nRPL3\nMEPCE\nRRP1\nENO2\nXPC\nGATA2\nELK3\nANAPC11\nABR\nC12ORF57\nPPP3R1\nGUCD1\nGJA4\nGRPEL2\nHMOX1\nIER2\nSFN\nWDR37\nBLCAP\nJUN\nDUSP1\nSPHK1\nNFATC4\nNFATC2\nGNL1\nGNL2\nITPKC\nLETM1\nMMP14\nPXDN\nPQLC1\nITPKB\nPLEKHM2\nRAPGEF3\nMYZAP\nFXYD6\nRAPGEF1\nPLEKHM1\nGAS6\nMGLL\nARPC1B\nAKAP17A\nTIMM13\nCTCF\nLRRC32\nFAM160B2\nMYCT1\nHMGXB3\nMEF2D\nUTP6\nFOS\nTTC7A\nTUBB4B\nSERPINB6\nFARP1\nXAB2\nDDB1\nSPSB3\nEBNA1BP2\nPLEKHO1\nADAM15\nFOSB\nPLEKHO2\nTSR1\nENG\nZNF330\nZNF451\nHEXA\nRPLP0\nTCF25\nHEXB\nNDUFA11\nTCF21\nSNX11\nNOC2L\nTUBA1A\nSMPD4\nNDST1\nTUBA1C\nARFRP1\nCKB\nRRP12\nZSWIM8\nBRD9\nFBXW5\nSEC24C\nUSP7\nSEMA4C\nPPP1R16B\nMUL1\nFAM118A\nHIPK2\nRPSA\nPPP1CA\nNRBP1\nTUBA4A\nMADD\nMFNG\nMAPKAPK2\nCSPG4\nSREK1\nLZTR1\nGOLGA8A\nSLC25A36\nKANK2\nPHLDB1\nRNMT\nPLXND1\nODC1\nDLST\nGLT8D1\nFAM214B\nSLC9A1\nRNF31\nCTGF\nGGA2\nABLIM1\nTUBB6\nABLIM3\nMFN2\nSLC39A7\nRPS5\nFLNC\nSLC25A44\nGRB10\nEIF2B5\nEIF2B4\nLRRC47\nOSBPL5\nATP8B2\nAIP\nFBXO31\nPGF\nPYGO2\nMAFF\nMOB2\nMAFG\nBCL2L1\nDNMT1\nST6GALNAC6\nBCL2L2\nTUBGCP2\nNAB2\nFAM13B\nCUL1\nEDC3\nEDC4\nBAG3\nC6ORF106\nUROD\nPLXNA2\nMAF1\nMOAP1\nSLC38A5\nPPP1R12A\nTMEM255B\nWIPI1\nMICAL3\nFAM219B\nCDC42BPB\nTMBIM1\nCDK9\nDNAJC7\nMFAP1\nNES\nMCM7\nEIF4E2\nBCAR1\nSLC25A4\nSLC25A3\nSRCAP\nLMBR1L\nITPR3\nFOXK2\nITPR1\nLBH\nDNAJB2\nAPH1A\nRRP7A\nPPP6R2\nCCDC85B\nVPS54\nDNAJB4\nRALGDS\nACP2\nMBIP\nPPP1R14B\nDST\nATRX\nCIRBP\nRANGAP1\nSELENBP1\nMYO1D\nKCTD10\nCTDSP2\nCDC37\nEIF3G\nEIF3B\nNOP56\nPPP1R13B\nCANT1\nTSSC4\nHDAC3\nLUZP1\nZBTB44\nSEMA3F\nTTL\nPBXIP1\nHDAC7\nSLC25A29\nADAMTS2\nSIN3B\nKCTD20\nUPP1\nPCDH1\nSLC25A25\nMLXIP\nSLC35A4\nENTPD6\nRSAD1\nBSDC1\nAMFR\nAFG3L2\nCD2BP2\nULK3\nCCM2\nBCOR\nAKAP1\nBAIAP2L1\nEXOSC7\nSEPHS1\nE2F4\nCCNL2\nEGR1\nARRDC2\nXRCC6\nGADD45B\nGADD45G\nHLA-H\nSNHG15\nRPS28\nSMTN\nDHX29\nBAP1\nTCERG1\nPCDH12\nPIEZO1\nMPHOSPH10\nTXNDC11\nCTR9\nUBE2J2\nGOLGA3\nTNS1\nRASSF1\nPELO\nINPP5A\nNUDCD3\nKAT5\nKIAA0930\nMICAL1\nEPC1\nINPP5K\nPDE2A\nASL\nTSPAN4\nLCN10\nATP6V0A1\nATP6V0A2\nSTAT5A\nSFTPC\nHARS2\nFAM32A\nMRPS2\nID3\nCC2D1B\nSGK1\nARHGEF7\nUBXN1\nACVRL1\nRRAD\nC2CD2\nZFAND2B\nSTC1\nMEDAG\nZMYM2\nACTR1B\nMAP4\nSH3PXD2B\nSH3PXD2A\nSTK38\nC1QBP\nQSOX1\nSPRYD3\nMLH1\nMETTL3\nPRMT1\nTHOC6\nMYO9B\nSGSM2\nGAK\nTXLNA\nCTTN\nPER1\nSGSM3\nF2RL3\nERRFI1\nROBO3\nROBO4\nFHOD1\nZDHHC7\nCRIP1\nTRAK2\nTRAK1\nHERPUD1\nCXORF40B\nGUK1\nALS2CL\nSERTAD3\nSERTAD1\nCCT8\nKLHDC3\nPDGFRB\nRALBP1\nPTCD3\nRARRES2\nARHGAP29\nTRPC4AP\nTANC1\nEEF1A1\nILVBL\nWBP1L\nPPIG\nAMOTL1\nMPHOSPH8\nRAF1\nINPP1\nKDM1A\nCD82\nGALNT18\nNOL8\nPRKY\nPRDM2\nCIB1\nNDRG1\nRXRB\nCCND1\nEFNB1\nPES1\nHCFC1R1\nBAALC\nS1PR2\nSASH1\nFDPS\nPCID2\nARHGEF15\nDDX56\nTBCD\nNASP\nCTPS1\nSMAD7\nDEXI\nSSH1\nSSH2\nPAN2\nARHGAP10\nSKIV2L\nCCDC130\nSAE1\nDOCK6\nDDX5\nLRRFIP1\nNR4A1\nRGS3\nPODXL\nKIF3B\nCHMP1A\nTMEM109\nCAMTA2\nCYC1\nANKZF1\nSUN2\nURGCP\nSUN1\nEGFL7\nUSP22\nHSPA8\nARAP3\nNAP1L4\nPYCR2\nPARVB\nRPS3A\nURB1\nTGFBR2\nIFT43\nMEIS1\nPEX5\nNDUFS6\nRASA3\nATG4B\nGRSF1\nLUC7L3\nDOCK1\nB4GALT3\nAGAP3\nTNFAIP1\nDTX4\nDDX42\nDPP7\nZC3H7B\nH2AFJ\nGMPPA\nTMEM208\nZC3H7A\nRHBDF1\nSGTA\nDAPK3\nTBC1D9\nTHAP2\nUSF1\nTAF1\nERCC1\nCLDN5\nPIK3IP1\nCRY2\nPOLDIP3\nLDB1\nEHMT1\nUSP19\nRND1\nSPRED1\nVSIG2\nMAT2A\nGNA12\nRPS6KA2\nLRIG1\nSCYL1\nGET4\nRBM6\nCYTH1', description='Paste the Down Regulated Genes from your signature (One gene per row).', section = 'section1') %}

In [7]:
%%appyter code_exec

upGenes = []
dnGenes = []
if ({{search_input}} != ""):
    CREEDS_URL = 'http://amp.pharm.mssm.edu/CREEDS/'
    response = requests.get(CREEDS_URL + 'search', params={'q':{{search_input}}})
    resp = []
    if response.status_code == 200:
        resp = response.json()
    search_sig = {}
    if resp != []:
        response = requests.get(CREEDS_URL + 'api', params={'id':resp[0]['id']})
        if response.status_code == 200:
            search_sig = response.json()
    if search_sig != {}:
        for gene in search_sig['up_genes']:
            upGenes.append(gene[0])
        for gene in search_sig['down_genes']:
            dnGenes.append(gene[0])

```python
upGenes = []
dnGenes = []
if ('Breast Cancer' != ""):
    CREEDS_URL = 'http://amp.pharm.mssm.edu/CREEDS/'
    response = requests.get(CREEDS_URL + 'search', params={'q':'Breast Cancer'})
    resp = []
    if response.status_code == 200:
        resp = response.json()
    search_sig = {}
    if resp != []:
        response = requests.get(CREEDS_URL + 'api', params={'id':resp[0]['id']})
        if response.status_code == 200:
            search_sig = response.json()
    if search_sig != {}:
        for gene in search_sig['up_genes']:
            upGenes.append(gene[0])
        for gene in search_sig['down_genes']:
            dnGenes.append(gene[0])
```

In [8]:
%%appyter code_exec

if upGenes == []:
    upGenes= {{up_gene_input}}.split('\n')
    upGenes = [x.strip() for x in upGenes]
    dnGenes= {{down_gene_input}}.split('\n')
    dnGenes = [x.strip() for x in dnGenes]

```python
if upGenes == []:
    upGenes= '''PCGF5
MX1
DNAAF1
MCTS1
MX2
LOC643802
MARCKS
CD4
SCGB1A1
RBP4
FAR2
HNRNPC
TNFSF8
CDA
PI4K2B
IDO1
CKLF
SF3B4
SAMD9
C1ORF194
SERPINE2
C3AR1
CLDN2
TMEM71
CLECL1
APOL6
GNG2
MILR1
LINC00639
GNG5
GBP1
GBP3
PROK2
GBP5
S100A12
EPSTI1
S100A11
GBP4
F8A1
JUND
HLA-DRB5
C9ORF66
BIN2
TREML4
PLBD1
ST20
SNORA48
PSME3
SNORA74B
HAMP
GAPT
CRYAB
FREM2
PADI2
MS4A7
GABARAP
SPI1
GMFG
HPR
SAMSN1
GATA1
GLDN
PLAC8
ZC3H12A
BLOC1S1
FLJ45513
AGMO
HESX1
LYN
SCARNA21
PHF11
TET2
TIMM23
APBB1IP
SULT1B1
LINC00605
C4ORF3
SIGLEC5
HCST
SIGLEC7
SERPINA1
ZC3HAV1
FPR1
IQGAP1
LITAF
VSIG10L
TRIM5
STAP1
SH3BGRL3
P2RY13
P2RY14
SF3A2
STAT1
STAT2
STAT4
WNT7B
RNASE2
SERPINB9
F5
TMEM154
GPR141
PSMA4
DHRS9
TAOK3
PLP2
PELI1
CCL11
CTF1
PIGR
SNX10
IFIH1
GNLY
NAMPT
GCNT3
MPV17
METTL7B
JAK3
CCL19
PGLYRP1
CCL18
OLR1
TLE4
CD164
GK
LOC388813
TOR1B
TPM4
POU2AF1
NEAT1
PNRC1
SLC9B2
CXCL10
CXCL11
CLEC4D
CLEC4E
CLEC4A
CLC
LY6E
ANKRD22
S100A9
FAM8A1
S100A8
VKORC1L1
CSF3R
GTF2B
HP
UCP2
GPR65
TNF
CXCL16
CXCL17
RNU11
BCL2L14
RNU12
LOC399715
CDH2
RNF19B
LAMP3
CFL1
NBN
SLC39A8
CERS6
LY86
ADRA2A
STK24
LCN2
TPP1
XAF1
SP140L
NOP10
MAP3K13
KCNE1
NCF4
GPR84
SAT1
CYP19A1
FAM216B
MMP25
AMDHD1
PIM2
MAP2K6
RPH3A
MGC12916
GPX2
EIF2AK2
TRPA1
DBNDD1
FCGR2A
LCP1
BCL6
BCL3
SLC26A8
LCP2
FCGR2C
SFT2D1
LRRK2
LY96
TREM1
TARP
C15ORF48
FCGR3A
FCGR3B
PSTPIP2
LBP
SERF2
MROH8
CR1
PATL1
KCNH7
CHIT1
RP2
EVI2B
MNDA
TRIM38
SFTPA1
BPI
SFTPA2
PIK3AP1
SMAP2
CCDC71L
WIPF1
C1S
FHL3
DDX60L
ARRB2
TREML3P
FCAR
TNFSF13B
CLEC1A
FCGR1A
CCL8
CCL7
CCL4
CCL3
CCL2
IGF2BP3
ATP7B
FCGR1B
TRIM21
ARL5B
KCNJ2
TRIM22
TMEM52B
SP110
IL1R2
ACSL1
RSAD2
ITGB2-AS1
PML
SNTN
GBP1P1
LPAR6
XRN1
COL9A2
EIF1B
PFN1
ALPK1
IFITM2
IFITM3
IFITM1
FGL2
AQP9
GLRX
IL2RG
UBE2L6
STX11
HERC6
ACTB
HERC5
SLAMF7
B2M
CDR1
NTNG2
RABGAP1L
HLA-J
BCL11A
CMC2
TNFSF14
HLA-C
HLA-A
HLA-B
IFI44
VASP
VNN2
VNN3
RGL4
CYSTM1
APOBEC3A
LST1
IL1RN
ANP32A
IFIT5
DCUN1D3
KLHDC7B
IFIT2
IFIT1
IFI30
IFIT3
FTH1
MIER1
TNFSF10
FFAR2
LGALS9
ARSE
TNS4
CCR1
PSENEN
SIGLEC14
NCOA7
CCDC114
NMI
RHOH
HBA1
TFEC
LILRB2
HBA2
CYP7B1
LILRB3
KLK10
NME1
SYN1
IL1A
CEACAM3
TLR1
IL1B
PTPRC
KCNJ2-AS1
CEACAM1
NME8
CEACAM6
RNU6ATAC
TLR4
TENM1
IGSF6
DAZAP2
HBB
TOP2A
BRCA2
CTSS
CHRAC1
CLEC7A
PDZD11
CD38
CD37
FCER1G
CTSE
SPRR2E
CAMP
CTSC
CD53
GCH1
HCK
PARP9
FKBP1A
CD48
SAA1
OR7E14P
POPDC3
C1QC
C1QB
CSTA
WAS
LILRA1
PTMS
CST7
LILRA5
LILRA6
ZDHHC19
SPTLC2
DMBT1
RAB8A
RAC2
ENPP6
RAB8B
CD55
CORIN
LMO2
CA12
ZBP1
CD74
GABRP
G6PD
GCA
HRASLS2
TYROBP
PDGFRL
IFI44L
DOK3
CAPZA1
MYADM
CD69
YPEL3
FCN1
TFAP2A
CNTN1
RGS18
TPSB2
NAPA
NEXN
IFI6
SAMD9L
STRN4
BASP1
REPS2
MGAM
ATP6V0C
DDX58
CYBB
ISG15
USB1
HIST2H2BE
DLGAP1-AS1
MXD1
ALDH3A1
SELL
CDHR5
CARD16
CARD17
C5AR1
CACNA1A
PSEN1
SEZ6L2
CSF2RB
CPLX2
DAPP1
DDX60
HCAR3
IL18RAP
HCAR2
FDCSP
FLOT1
ZNF385A
ABCC2
SRGN
MMP8
CBX4
PILRA
OLIG1
ANXA2R
OAS1
OAS2
OAS3
TNIP3
IRF2
ALPL
CHI3L2
IRF9
MNS1
IRF7
ZMYND12
NFKBID
CHI3L1
HS3ST3A1
RTN3
GPSM3
RNU4ATAC
YWHAE
MCTP2
HSPB6
BCL2A1
TNFAIP6
DNAH7
LMNB1
SIRPB1
CA1
C20ORF203
CASP4
CASP5
SP140
PDE4B
CASP1
CYTIP
RBM11
ALOX5AP
VTRNA1-1
YWHAZ
SOD2
FGR
AIM2
HSH2D
C19ORF66
PPDPF
S100P
DYNLT1
HIST1H2BC
LTF
HIST1H2BD
USP15
STEAP4
RTP4
PLEK
CLEC12A
CXORF21
SIX1
MSMO1
OASL
ARHGDIB
VSIG1
PTK2B
MDH1B'''.split('\n')
    upGenes = [x.strip() for x in upGenes]
    dnGenes= '''STARD3
RBFOX2
UNC13B
PRELID1
STARD9
ZNF160
WDR74
PRKAB1
DKK3
CHFR
NISCH
DDX39A
PSMC5
PSMC3
C19ORF48
CYP1A1
PSMC1
DCHS1
ASB1
VPS28
SF3B5
PDXK
AHNAK
COL12A1
ADCY4
SPG7
DLL1
DCAF5
TPRG1L
ADCY6
DLL4
PSMD6
MYC
PSMD2
PXDC1
S100A13
ZSCAN18
FZD4
KLF13
TSPYL2
RPL41
ZNF263
PHC2
NEBL
PMM2
NCAPH2
MCAM
STARD10
VPS13D
VPS37B
NR1D2
TFIP11
FIBP
BCR
UCK2
QARS
ITPA
SIVA1
SNAI1
HBP1
MDC1
RPL3
MEPCE
RRP1
ENO2
XPC
GATA2
ELK3
ANAPC11
ABR
C12ORF57
PPP3R1
GUCD1
GJA4
GRPEL2
HMOX1
IER2
SFN
WDR37
BLCAP
JUN
DUSP1
SPHK1
NFATC4
NFATC2
GNL1
GNL2
ITPKC
LETM1
MMP14
PXDN
PQLC1
ITPKB
PLEKHM2
RAPGEF3
MYZAP
FXYD6
RAPGEF1
PLEKHM1
GAS6
MGLL
ARPC1B
AKAP17A
TIMM13
CTCF
LRRC32
FAM160B2
MYCT1
HMGXB3
MEF2D
UTP6
FOS
TTC7A
TUBB4B
SERPINB6
FARP1
XAB2
DDB1
SPSB3
EBNA1BP2
PLEKHO1
ADAM15
FOSB
PLEKHO2
TSR1
ENG
ZNF330
ZNF451
HEXA
RPLP0
TCF25
HEXB
NDUFA11
TCF21
SNX11
NOC2L
TUBA1A
SMPD4
NDST1
TUBA1C
ARFRP1
CKB
RRP12
ZSWIM8
BRD9
FBXW5
SEC24C
USP7
SEMA4C
PPP1R16B
MUL1
FAM118A
HIPK2
RPSA
PPP1CA
NRBP1
TUBA4A
MADD
MFNG
MAPKAPK2
CSPG4
SREK1
LZTR1
GOLGA8A
SLC25A36
KANK2
PHLDB1
RNMT
PLXND1
ODC1
DLST
GLT8D1
FAM214B
SLC9A1
RNF31
CTGF
GGA2
ABLIM1
TUBB6
ABLIM3
MFN2
SLC39A7
RPS5
FLNC
SLC25A44
GRB10
EIF2B5
EIF2B4
LRRC47
OSBPL5
ATP8B2
AIP
FBXO31
PGF
PYGO2
MAFF
MOB2
MAFG
BCL2L1
DNMT1
ST6GALNAC6
BCL2L2
TUBGCP2
NAB2
FAM13B
CUL1
EDC3
EDC4
BAG3
C6ORF106
UROD
PLXNA2
MAF1
MOAP1
SLC38A5
PPP1R12A
TMEM255B
WIPI1
MICAL3
FAM219B
CDC42BPB
TMBIM1
CDK9
DNAJC7
MFAP1
NES
MCM7
EIF4E2
BCAR1
SLC25A4
SLC25A3
SRCAP
LMBR1L
ITPR3
FOXK2
ITPR1
LBH
DNAJB2
APH1A
RRP7A
PPP6R2
CCDC85B
VPS54
DNAJB4
RALGDS
ACP2
MBIP
PPP1R14B
DST
ATRX
CIRBP
RANGAP1
SELENBP1
MYO1D
KCTD10
CTDSP2
CDC37
EIF3G
EIF3B
NOP56
PPP1R13B
CANT1
TSSC4
HDAC3
LUZP1
ZBTB44
SEMA3F
TTL
PBXIP1
HDAC7
SLC25A29
ADAMTS2
SIN3B
KCTD20
UPP1
PCDH1
SLC25A25
MLXIP
SLC35A4
ENTPD6
RSAD1
BSDC1
AMFR
AFG3L2
CD2BP2
ULK3
CCM2
BCOR
AKAP1
BAIAP2L1
EXOSC7
SEPHS1
E2F4
CCNL2
EGR1
ARRDC2
XRCC6
GADD45B
GADD45G
HLA-H
SNHG15
RPS28
SMTN
DHX29
BAP1
TCERG1
PCDH12
PIEZO1
MPHOSPH10
TXNDC11
CTR9
UBE2J2
GOLGA3
TNS1
RASSF1
PELO
INPP5A
NUDCD3
KAT5
KIAA0930
MICAL1
EPC1
INPP5K
PDE2A
ASL
TSPAN4
LCN10
ATP6V0A1
ATP6V0A2
STAT5A
SFTPC
HARS2
FAM32A
MRPS2
ID3
CC2D1B
SGK1
ARHGEF7
UBXN1
ACVRL1
RRAD
C2CD2
ZFAND2B
STC1
MEDAG
ZMYM2
ACTR1B
MAP4
SH3PXD2B
SH3PXD2A
STK38
C1QBP
QSOX1
SPRYD3
MLH1
METTL3
PRMT1
THOC6
MYO9B
SGSM2
GAK
TXLNA
CTTN
PER1
SGSM3
F2RL3
ERRFI1
ROBO3
ROBO4
FHOD1
ZDHHC7
CRIP1
TRAK2
TRAK1
HERPUD1
CXORF40B
GUK1
ALS2CL
SERTAD3
SERTAD1
CCT8
KLHDC3
PDGFRB
RALBP1
PTCD3
RARRES2
ARHGAP29
TRPC4AP
TANC1
EEF1A1
ILVBL
WBP1L
PPIG
AMOTL1
MPHOSPH8
RAF1
INPP1
KDM1A
CD82
GALNT18
NOL8
PRKY
PRDM2
CIB1
NDRG1
RXRB
CCND1
EFNB1
PES1
HCFC1R1
BAALC
S1PR2
SASH1
FDPS
PCID2
ARHGEF15
DDX56
TBCD
NASP
CTPS1
SMAD7
DEXI
SSH1
SSH2
PAN2
ARHGAP10
SKIV2L
CCDC130
SAE1
DOCK6
DDX5
LRRFIP1
NR4A1
RGS3
PODXL
KIF3B
CHMP1A
TMEM109
CAMTA2
CYC1
ANKZF1
SUN2
URGCP
SUN1
EGFL7
USP22
HSPA8
ARAP3
NAP1L4
PYCR2
PARVB
RPS3A
URB1
TGFBR2
IFT43
MEIS1
PEX5
NDUFS6
RASA3
ATG4B
GRSF1
LUC7L3
DOCK1
B4GALT3
AGAP3
TNFAIP1
DTX4
DDX42
DPP7
ZC3H7B
H2AFJ
GMPPA
TMEM208
ZC3H7A
RHBDF1
SGTA
DAPK3
TBC1D9
THAP2
USF1
TAF1
ERCC1
CLDN5
PIK3IP1
CRY2
POLDIP3
LDB1
EHMT1
USP19
RND1
SPRED1
VSIG2
MAT2A
GNA12
RPS6KA2
LRIG1
SCYL1
GET4
RBM6
CYTH1'''.split('\n')
    dnGenes = [x.strip() for x in dnGenes]
```

In [9]:
def computeSimilarity(inputUp, inputDn, up, dn, geneList, inputCard):
    intersection_card_up = len(set.intersection(*[set(inputUp), set(up)]))
    intersection_card_dn = len(set.intersection(*[set(inputDn), set(dn)]))
    reverse_intersection_card_1 = len(set.intersection(*[set(inputUp), set(dn)]))
    reverse_intersection_card_2 = len(set.intersection(*[set(inputDn), set(up)]))
    tot_intersection = intersection_card_up + intersection_card_dn
    tot_reverse_intersection = reverse_intersection_card_1 + reverse_intersection_card_2
    return float(tot_intersection - tot_reverse_intersection)/inputCard


def enrich(full_sigs, upGenes, dnGenes, geneList, inputCard):
    sim = []
    for i, sig in enumerate(full_sigs):
         sim.append(round(computeSimilarity(upGenes,dnGenes, sig['upGenes'], sig['dnGenes'], geneList, inputCard),3))
    return sim

def computePvals(inputUp, inputDn, up, dn):
    intersection_card_up = len(set.intersection(*[set(inputUp), set(up)]))
    intersection_card_dn = len(set.intersection(*[set(inputDn), set(dn)]))
    reverse_intersection_card_1 = len(set.intersection(*[set(inputUp), set(dn)]))
    reverse_intersection_card_2 = len(set.intersection(*[set(inputDn), set(up)]))
    tot_intersection = intersection_card_up + intersection_card_dn
    tot_reverse_intersection = reverse_intersection_card_1 + reverse_intersection_card_2
    intersection_diff = tot_intersection-tot_reverse_intersection
    tot_not_input = len(up) + len(dn) - tot_intersection
    tot_not_sig  = len(inputUp) + len(inputDn) - tot_intersection
    tot_neither = 12328 - tot_intersection - tot_not_input - tot_not_sig
    pval = pvalue(intersection_diff, tot_not_sig, tot_not_input, tot_neither)
    return pval

def useEnrichr(genes, title):
    ENRICHR_URL = 'http://amp.pharm.mssm.edu/Enrichr/addList'
    genes_str = '\n'.join(genes)
    payload = {
        'list': (None, genes_str),
        'description': (None, title)
        }
    response = requests.post(ENRICHR_URL, files=payload)
    if not response.ok:
       raise Exception('Error analyzing gene list')
    data = json.loads(response.text)
    url = "https://amp.pharm.mssm.edu/Enrichr/enrich?dataset=" + data['shortId']
    return url

In [10]:
file = 'https://appyters.maayanlab.cloud/storage/L1000FWD/L1000FWD-KD_final_full_signatures.csv '
full_sigs_df = pd.read_csv(file, index_col=0)
full_sigs = full_sigs_df.to_dict('records')

for sig in full_sigs:
    sig['upGenes'] = json.loads(sig['upGenes'])
    sig['dnGenes'] = json.loads(sig['dnGenes'])
    
file = 'https://appyters.maayanlab.cloud/storage/L1000FWD/geneinfo.csv'
genes = pd.read_csv(file, index_col=0)
geneList = genes.set_index('pr_gene_symbol').index.tolist()

inputUp = sig['upGenes']
inputDn = sig['dnGenes']
input_card_up = len(set.intersection(*[set(inputUp), set(geneList)]))
input_card_dn = len(set.intersection(*[set(inputDn), set(geneList)]))
inputCard = input_card_up + input_card_dn
sim = enrich(full_sigs, upGenes, dnGenes, geneList, inputCard)
pvals = []
for sig in full_sigs:
    pvals.append("{:.2e}".format(computePvals(upGenes,dnGenes, sig['upGenes'], sig['dnGenes']).two_tail, 4))
full_sigs_df['p-value'] = pvals
full_sigs_df['Similarity'] = sim
full_sigs_df.sort_values(by='Similarity', ascending=False, inplace=True)
full_sigs_df['z-score'] = stats.zscore(full_sigs_df['Similarity']).round(3)

#### Signatures Similar to the Input Signature

In [11]:
full_sigs_df.reset_index(drop = True, inplace = True)
full_sigs_df.rename(columns={'pert_iname': 'Knockdown Gene'}, inplace=True)
cols = ['sig_id', 'Knockdown Gene','Similarity', 'z-score', 'p-value']
full_sigs_df[cols].head(10)

Unnamed: 0,sig_id,Knockdown Gene,Similarity,z-score,p-value
0,KDA001_PC3_96H:TRCN0000005606:-666,CREBL2,0.077,4.756,0.0807
1,KDA007_A375_96H:TRCN0000019360:-666,KAT6B,0.063,3.895,0.153
2,KDC010_HEPG2_96H:TRCN0000022104:-666,MSL3,0.063,3.895,0.337
3,KDC005_HA1E_96H:TRCN0000233114:-666,HMGCR,0.063,3.895,0.343
4,KDC008_HEPG2_96H:TRCN0000037695:-666,ITPK1,0.063,3.895,0.699
5,KDB004_HEPG2_96H:TRCN0000036225:-666,GLRX,0.063,3.895,0.335
6,KDB005_HA1E_96H:TRCN0000052476:-666,TGDS,0.059,3.648,0.317
7,KDA003_VCAP_120H:TRCN0000000595:-666,CDK7,0.059,3.648,0.496
8,KDC009_VCAP_120H:TRCN0000003882:-666,PSMA3,0.056,3.464,0.476
9,KDA009_A375_96H:TRCN0000194821:-666,NTRK3,0.056,3.464,0.787


#### Signatures Opposite the Input Signature

In [12]:
full_sigs_df.sort_values(by='Similarity', ascending=True, inplace=True)
full_sigs_df[cols].head(10)

Unnamed: 0,sig_id,Knockdown Gene,Similarity,z-score,p-value
24186,KDA005_A549_96H:TRCN0000039794:-666,AKT1,-0.063,-3.861,1.25e-06
24181,KDA003_PC3_96H:TRCN0000050675:-666,FAH,-0.059,-3.615,1.55e-06
24182,KDD001_A375_96H:TRCN0000015725:-666,SNF8,-0.059,-3.615,1.63e-06
24185,KDD006_HA1E_96H:TRCN0000011539:-666,GPR114,-0.059,-3.615,1.22e-06
24184,KDC006_A375_96H:TRCN0000045013:-666,SLC25A6,-0.059,-3.615,8.27e-07
24183,KDB002_A375_96H:TRCN0000281218:-666,TXLNA,-0.059,-3.615,1.13e-06
24180,ERGK017_VCAP_168H:TRCN0000195358:-666,ETNK2,-0.056,-3.43,7.22e-07
24179,DER001_PC3_96H:TRCN0000195247:-666,PGK1,-0.056,-3.43,7.28e-07
24178,KDC007_HA1E_96H:TRCN0000020728:-666,KLF2,-0.056,-3.43,1.33e-06
24177,KDD006_PC3_96H:TRCN0000073744:-666,PSEN2,-0.056,-3.43,1.46e-06


In [13]:
import plotly.figure_factory as ff

fig = ff.create_distplot([full_sigs_df['Similarity']], ['Similarity'], bin_size = .005, show_rug=False)
fig.update_layout(title_text='Density Plot of Similarity Scores')
fig.show()

## Calculations

The metrics seen above are calculated in the following ways:
* Similarity = (Up and down overlap - Opposite overlap)/ (Size of inputed gene set)
    * Up and down overlap is the number of up and down genes shared between the KD signature and the input
    * Opposite overlap is the number of genes up in the KD signature and down in the input or vice versa.
* The z-scores are calculated using the distribution of similarity scores over the 24,191 gene signatures
* The p-values are calculated using the fisher exact test modeled on the following contingency table
<h4><center>Contingency Table</center></h4>

|        Differentially Expressed in input and KD signature        	|   Differentially Expressed in input and not in KD signature  	|
|:----------------------------------------------------------------:	|:------------------------------------------------------------:	|
| <b>Differentially Expressed in KD signature and not in input</b> 	| <b>Not Differentially Expressed in input or KD signature</b> 	|

## Enrichr
A gene set of the genes that were knocked down in the 50 most similar and opposite signatures to the input is now created.

This gene set is then submitted to Enrichr. **Click on the link below to learn more about this gene set!** 

This information includes the following relevant items to this gene set: 
* Transcription factors
* Pathways
* Ontologies
* Diseases and drugs
* Cell types

In [14]:
most_sim_df = full_sigs_df.tail(50)
most_sim_genes = most_sim_df.set_index('Knockdown Gene').index.tolist()
title = 'Similar genes'
url = useEnrichr(most_sim_genes, title)

raw_html = '<a href=' + url + ' target="_blank">' + url + '</a>'
print("Click the link below for enrichment analysis of the 50 most similar signatures!")
display(HTML(raw_html))

most_opp_df = full_sigs_df.head(50)
most_opp_genes = most_opp_df.set_index('Knockdown Gene').index.tolist()
title = 'Opposite genes'
url = useEnrichr(most_opp_genes, title)

raw_html = '<a href=' + url + ' target="_blank">' + url + '</a>'
print("\nClick the link below for enrichment analysis of the 50 most opposite signatures!")
display(HTML(raw_html))

Click the link below for enrichment analysis of the 50 most similar signatures!



Click the link below for enrichment analysis of the 50 most opposite signatures!


***
# UMAP Dimensional Reduction of the Signatures

The coordinates of the UMAP of the knockdown signatures have already been computed and are being downloaded here (Parameters: n_neighbors = 15, metric = euclidean, min_dist = 0.1).

In [15]:
file = 'https://appyters.maayanlab.cloud/storage/L1000FWD/L1000FWD-KD_final_UMAP_coords.csv '
umap_df = pd.read_csv(file, index_col=0)
umap_df['Similarity'] = sim

#### UMAP coloring by similarity score. 

Zoom into clusters of blue and red to learn more about the signatures that are the most similar and opposite to your input.

In [16]:
import plotly.express as px
maxVal = full_sigs_df.loc[0]['Similarity']
minVal = full_sigs_df.loc[24186]['Similarity']
theRange = maxVal - minVal
highTen = full_sigs_df.loc[19]['Similarity'] - .00001
lowTen = full_sigs_df.loc[24166]['Similarity']+.000001
highCutoff = 1 - (maxVal - highTen)/ theRange
lowcutoff = (lowTen- minVal)/ theRange

fig = px.scatter(umap_df, x='x', y='y', color= 'Similarity', hover_data=['pert_iname'], color_continuous_scale=[(0,'red'),(lowcutoff,'red'),(lowcutoff,'rgba(0, 0, 0, 0.01)'),(highCutoff,'rgba(0, 0, 0, 0.01)'), (highCutoff,'blue'), (1,'blue')])
fig.show()

Uploading gene set library labels found on Enrichr (https://amp.pharm.mssm.edu/Enrichr/#stats).

Libraries Include: Chromosome Location (http://hgdownload.cse.ucsc.edu/downloads.html), KEGG pathway (https://www.kegg.jp/), GO Cellular Component and Biological Processes (http://www.geneontology.org/).

In [17]:
file = 'https://appyters.maayanlab.cloud/storage/L1000FWD/chromosomes.csv '
chromosomes_df = pd.read_csv(file, index_col=0)
chromosomes = chromosomes_df.set_index('Chromosome').index.tolist()
umap_df['Chromosome'] = chromosomes

file = 'https://appyters.maayanlab.cloud/storage/L1000FWD/kegg.csv '
kegg_df = pd.read_csv(file, index_col=0)
kegg = kegg_df.set_index('KEGG').index.tolist()
umap_df['KEGG'] = kegg

file = 'https://appyters.maayanlab.cloud/storage/L1000FWD/GO_cellular_component.csv'
go_cell_comp_df = pd.read_csv(file, index_col=0)
go_cell_comp = go_cell_comp_df.set_index('GO-Cellular-Component').index.tolist()
umap_df['GO-Cellular-Component'] = go_cell_comp

file = 'https://appyters.maayanlab.cloud/storage/L1000FWD/GO_biological_processes.csv '
go_bp_df = pd.read_csv(file, index_col=0)
go_bp = go_bp_df.set_index('GO-Biological-Processes').index.tolist()
umap_df['GO-Biological-Processes'] = go_bp

#### Plotting UMAP using the react-scatter-board developed by the Ma'ayan lab.

* Try coloring the visualization by Knockdown Gene (color), Chromosome, KEGG, GO-Biological-Processes, time, GO-Cellular-Component.

* Search by Knockdown Gene (pert_iname), Chromosome, or by pathway.

* Zoom into clusters to explore which genes might be related!

In [18]:
umap_df.reset_index(drop = True, inplace = True)
umap_df.drop(columns = ['upGenes', 'dnGenes'], inplace = True)
umap_json = json.loads(umap_df.to_json(orient='records'))
ScatterBoard(
    id='scatterboard-2d',
    is3d=False,
    data= umap_json,
    shapeKey = 'pert_itime',
    colorKey='color',
    labelKeys=['Chromosome', 'pert_iname', 'cell_id', 'KEGG', 'GO-Biological-Processes', 'GO-Cellular-Component'],
    searchKeys=['pert_iname', 'Chromosome'],
)

## References

[1] Subramanian, Aravind et al. “A Next Generation Connectivity Map: L1000 Platform and the First 1,000,000 Profiles.” Cell vol. 171,6 (2017): 1437-1452.e17. doi:10.1016/j.cell.2017.10.049

[2] Clark, Neil R., et al. "The characteristic direction: a geometrical approach to identify differentially expressed genes." BMC bioinformatics 15.1 (2014): 79. 

[3] Wang, Z., Monteiro, C. D., Jagodnik, K. M., Fernandez, N. F., Gundersen, G. W., ... & Ma'ayan, A. (2016) Extraction and Analysis of Signatures from the Gene Expression Omnibus by the Crowd. Nature Communications doi: 10.1038/ncomms12846

[4] Kuleshov, M.V., Jones, M.R., Rouillard, A.D., Fernandez, N.F., Duan, Q., Wang, Z., Koplev, S., Jenkins, S.L., Jagodnik, K.M. and Lachmann, A. (2016) Enrichr: a comprehensive gene set enrichment analysis web server 2016 update. Nucleic acids research, 44, W90-W97. 

[5] Edgar R, Domrachev M, Lash AE. Gene Expression Omnibus: NCBI gene expression and hybridization array data repository Nucleic Acids Res. 2002 Jan 1;30(1):207-10

[6] Wang, Zichen, et al. "L1000FWD: fireworks visualization of drug-induced transcriptomic signatures." Bioinformatics 34.12 (2018): 2150-2152.

[7] Hafner, Marc, et al. "L1000CDS2: LINCS L1000 characteristic direction signatures search engine.” 

[8] Ashburner, M., Ball, C.A., Blake, J.A., Botstein, D., Butler, H., Cherry, J.M., Davis, A.P., Dolinski, K., Dwight, S.S. and Eppig, J.T. (2000) Gene Ontology: tool for the unification of biology. Nature genetics, 25, 25. 

[9] Kanehisa, M. and Goto, S. (2000) KEGG: kyoto encyclopedia of genes and genomes. Nucleic acids research, 28, 27-30. 