In [1]:
## Full backend analysis for the CREEDS and L1000 dataset 
import os
import pandas as pd
from pandas.compat import StringIO
import numpy as np
from numpy import loadtxt
import sys
import json
from pprint import pprint
import requests
import objectpath
import csv
import re
import matplotlib.pyplot as plt
import json, requests
from pprint import pprint
import itertools
import scipy
from scipy.spatial import distance
from sklearn.metrics.pairwise import pairwise_distances
from clustergrammer_widget import *
def get_geneset(df, indexer):
    df_ = df.loc[indexer, :]
    return list(df_[df_ == 1].index)

In [2]:

import json
import urllib.request, urllib.parse

def fetch(endpoint, method='GET', data={}):
  ''' Helper method for fetching from json APIs with urllib
  '''
  if method == 'GET':
    req = urllib.request.Request(
      endpoint + '?' + urllib.parse.urlencode(data),
      method='GET',
      headers={
        'Content-Type': 'application/json',
      },
    )
  elif method == 'POST':
    req = urllib.request.Request(
      endpoint,
      method='POST',
      headers={
        'Content-Type': 'application/json',
      },
      data=json.dumps(data).encode(),
    )
  res = urllib.request.urlopen(req)
  return json.load(res)

def fetch_sigcom_all(
  where,
  data_api='https://amp.pharm.mssm.edu/enrichmentapi',
  meta_api='https://amp.pharm.mssm.edu/signature-commons-metadata-api',
):
  ''' Fetch anything you possibly could need from a signature query.
  '''
  # signature metadata from metadata API
  signatures = fetch(
    meta_api + '/signatures/find',
    method='POST',
    data={
    'filter': {
      'where': where,
    },
  })
  signature_lookup = {
    sig['id']: sig
    for sig in signatures
  }
  # library metadata from metadata API
  library_ids = list(set(
    sig['library']
    for sig in signatures
  ))
  libraries = fetch(
    meta_api + '/libraries/find',
    method='POST',
    data={
      'filter': {
        'where': {
          'id': {
            'inq': library_ids,
          },
        },
      },
    },
  )
  library_lookup = {
    lib['id']: lib
    for lib in libraries
  }
  # data from data API
  dataset_ids = list(set(
    (lib['dataset'], lib['dataset_type'])
    for lib in libraries
  ))
  datasets = [
    fetch(
      data_api + ('/api/v1/fetch/set' if dataset_type.startswith('geneset') else '/api/v1/fetch/rank'),
      method='POST',
      data={
        'entities': [],
        'signatures': [
          sig['id']
          for sig in signatures
          if library_lookup[sig['library']]['dataset'] == dataset_id
        ],
        'database': dataset_id,
      }
    )
    for dataset_id, dataset_type in dataset_ids
  ]
  # entities from metadata api
  entity_ids = list(set(
    entity
    for dataset in datasets
    for entity in dataset.get('entities', [])
  ))
  entities = fetch(
    meta_api + '/entities/find',
    method='POST',
    data={
      'filter': {
        'where': {
          'id': {
            'inq': entity_ids,
          },
        },
      },
    },
  )
  entity_lookup = {
    ent['id']: ent
    for ent in entities
  }
  # return everything
  return {
    'datasets': datasets,
    'libraries': library_lookup,
    'signatures': signature_lookup,
    'entities': entity_lookup,
  }

def convert_to_genesets(sigcom_all):
  ''' Convert { signatures, entities, libraries, datasets } to metadata genesets.
  '''
  return [
    (
      dict(
        sigcom_all['signatures'][sig['uid']],
        library=sigcom_all['libraries'][sigcom_all['signatures'][sig['uid']]['library']],
      ),
      [
        sigcom_all['entities'].get(ent)
        for ent in sig['entities']
      ],
    )
    for dataset in sigcom_all['datasets']
    for sig in dataset['signatures']
  ]

def simplify_genesets(meta_genesets, sigfmt, entfmt):
  ''' Convert ({sig:meta}, [{ent:meta1}, {ent:meta2}, ...]) => (sig, [ent1, ent2, ...])
  '''
  return [
    (sigfmt(signature), list(map(entfmt, entities)))
    for signature, entities in meta_genesets
  ]

def simplified_to_gmt(simplified):
  ''' Convert (sig, [ent1, ent2, ...]) => GMT
  (warning, drops Nones)
  '''
  return '\n'.join(
    signature + '\t\t' + '\t'.join(filter(None, entities))
    for signature, entities in simplified
    if signature
  )

In [6]:
user_input = "WARFARIN"

In [7]:
### DRUG MATRIX 
# returning none right now. Will need to change later. 

drugmatrix = convert_to_genesets(fetch_sigcom_all({
  'library': '30b22907-d3e2-11e8-92a7-787b8ad942f3',
  'meta': {
    'fullTextSearch': str(user_input),
  },
}))

In [13]:

drugmatrix

[({'$validator': '/dcic/signature-commons-schema/v4/core/signature.json',
   'id': '106779ae-c1a1-11e8-98e8-0242ac170004',
   'library': {'$validator': '/dcic/signature-commons-schema/v4/core/library.json',
    'id': '30b22907-d3e2-11e8-92a7-787b8ad942f3',
    'resource': '762649f1-48dc-43b2-9122-605dd2825f03',
    'dataset': 'enrichr_geneset',
    'dataset_type': 'geneset_library',
    'meta': {'DOI': '10.1038/ncomms12846',
     'Icon': 'static/images/DrugMatrix.ico',
     'PMID': '16005536',
     'Spec': '(?P<Original_String>((?P<UUID>.*):SC=)?(\\(+\\)-)?(?P<Small_Molecule>.+)-(?P<Concentration>.+?)-(?P<Organism>.+?)-(?P<Tissue>.+?)-(?P<Time_Point>.+?)-(?P<Direction>.+?))\\t\\t(?P<Genes>.+)',
     'Assay': 'microarray',
     'Readout': 'mRNA',
     'Version': '2016',
     'Filename': 'DrugMatrix',
     'Organism': 'Rattus rattus',
     'Weighted': 'False',
     '$validator': '/@dcic/signature-commons-schema/v4/meta/library/sigcom.json',
     'Access_URL': 'http://amp.pharm.mssm.edu/E

In [50]:
str(user_input)

'digoxin'

In [None]:


### OMIM EXPANDED Disease 


albuterol_in_drugmatrix = convert_to_genesets(fetch_sigcom_all({
  'library': '30b22907-d3e2-11e8-92a7-787b8ad942f3',
  'meta': {
    'fullTextSearch': 'albuterol',
  },
}))

In [None]:
310a24e6-d3e2-11e8-bf78-787b8ad942f3

In [11]:
simplified = simplify_genesets(
    converted,
    lambda sig: sig['meta']['Original_String'] if sig else None,
    lambda ent: ent['meta']['Name'] if ent else None
)


In [3]:

ENRICHR_URL = 'http://amp.pharm.mssm.edu/Enrichr/addList'
genes_str = '\n'.join([
    'PHF14', 'RBM3', 'MSL1', 'PHF21A', 'ARL10', 'INSR', 'JADE2', 'P2RX7',
    'LINC00662', 'CCDC101', 'PPM1B', 'KANSL1L', 'CRYZL1', 'ANAPC16', 'TMCC1',
    'CDH8', 'RBM11', 'CNPY2', 'HSPA1L', 'CUL2', 'PLBD2', 'LARP7', 'TECPR2', 
    'ZNF302', 'CUX1', 'MOB2', 'CYTH2', 'SEC22C', 'EIF4E3', 'ROBO2',
    'ADAMTS9-AS2', 'CXXC1', 'LINC01314', 'ATF7', 'ATP5F1'
])
description = 'Example gene list'
payload = {
    'list': (None, genes_str),
    'description': (None, description)
}

response = requests.post(ENRICHR_URL, files=payload)
if not response.ok:
    raise Exception('Error analyzing gene list')

data = json.loads(response.text)
print(data)

{'shortId': '227256a622d1f094b28ba410b89509ee', 'userListId': 18101728}
