swagger api specification: http://biggim.ncats.io/api

# A few helper functions for posting and getting api requests


In [1]:
import json
import requests
import pandas
import time

base_url = 'http://biggim.ncats.io/api'

#a couple of simple helper functions
def post(endpoint, data={}, base_url=base_url):
    req = requests.post('%s/%s' % (base_url,endpoint), data=data)
    req.raise_for_status()
    return req.json()

def get(endpoint, data={}, base_url=base_url):
    req = requests.get('%s/%s' % (base_url,endpoint), data=data)
    req.raise_for_status()
    print("Sent: GET %s?%s" % (req.request.url,req.request.body))
    return req.json()
    

def jprint(dct):
    print(json.dumps(dct, indent=2))
    
def wrapper(endpoint, data={}, base_url=base_url):
    try:
        response = get(endpoint, data, base_url)
        jprint(response)
    except requests.HTTPError as e:

        print(e)
        if e.response.status_code == 400:
            jprint(e.response.json())
        raise
    try:
        ctr = 1
        while True:
            query_status = get('%s/status/%s'% (endpoint.split('/')[0],response['request_id'],))
            jprint(query_status)
            if query_status['status'] !='running':
                # query has finished
                break
            else:
                time.sleep(ctr)
                ctr += 1
                #linear backoff
                print("Checking again")
    except requests.HTTPError as e:
        print(e)
        if e.response.status_code == 400:
            jprint(e.response.json())
        raise
    return pandas.concat(map(pandas.read_csv, query_status['request_uri']))

   ## Big GIM query for Mody genes in healthy pancreas

In [2]:
#prep
import time
import pandas
studies = get('metadata/study')
study_names = [s['name'] for s in studies]
tables = get('/metadata/table')
default_table = [t for t in tables if t['default'] == True][0]['name']
print(default_table)

Sent: GET http://biggim.ncats.io/api/metadata/study?None
Sent: GET http://biggim.ncats.io/api//metadata/table?None
BigGIM_70_v1


### Identify genes with high correlation to mody genes (ids1)

In [9]:
example_query = {
  "restriction_gt": "GTEx_Pancreas_Correlation,0.5",
  "table": default_table,
  "columns": "GTEx_Pancreas_Correlation",
  "ids1": "3630,2645,5078,6927,6928,1056,8462,4760,3172,3651,6833,640,3767,26060",
  "limit": 100000
}
try:
    query_submit = get('biggim/query', data=example_query)
    jprint(query_submit)
except requests.HTTPError as e:
    print(e)
    
    jprint(e.response.json())


try:
    while True:
        query_status = get('biggim/status/%s'% (query_submit['request_id'],))
        jprint(query_status)
        if query_status['status'] !='running':
            # query has finished
            break
        else:
            time.sleep(1)
            print("Checking again")
except requests.HTTPError as e:
    print(e)
    
    jprint(e.response.json())
    
result = pandas.concat(map(pandas.read_csv, query_status['request_uri']))
result

Sent: GET http://biggim.ncats.io/api/biggim/query?restriction_gt=GTEx_Pancreas_Correlation%2C0.5&table=BigGIM_70_v1&limit=100000&ids1=3630%2C2645%2C5078%2C6927%2C6928%2C1056%2C8462%2C4760%2C3172%2C3651%2C6833%2C640%2C3767%2C26060&columns=GTEx_Pancreas_Correlation
{
  "request_id": "d4442ba7-bf05-4172-9c1a-26f3fe86af09",
  "status": "submitted"
}
Sent: GET http://biggim.ncats.io/api/biggim/status/d4442ba7-bf05-4172-9c1a-26f3fe86af09?None
{
  "message": "Query job is running.",
  "status": "running",
  "request_id": "d4442ba7-bf05-4172-9c1a-26f3fe86af09"
}
Checking again
Sent: GET http://biggim.ncats.io/api/biggim/status/d4442ba7-bf05-4172-9c1a-26f3fe86af09?None
{
  "message": "Query job is running.",
  "status": "running",
  "request_id": "d4442ba7-bf05-4172-9c1a-26f3fe86af09"
}
Checking again
Sent: GET http://biggim.ncats.io/api/biggim/status/d4442ba7-bf05-4172-9c1a-26f3fe86af09?None
{
  "request_uri": [
    "https://storage.googleapis.com/ncats_bigquery_results/d4442ba7-bf05-4172-9c1a

Unnamed: 0,GPID,Gene1,Gene2,GTEx_Pancreas_Correlation
0,7730000003630,3630,773,0.5713
1,7740000003630,3630,774,0.5155
2,25720000003630,3630,2572,0.8111
3,84620000116151,116151,8462,0.5988
4,84620000029915,29915,8462,0.5297
5,84620000008493,8493,8462,0.5406
6,84620000116064,116064,8462,0.5440
7,84620000219899,219899,8462,0.5231
8,84620000084248,84248,8462,0.5054
9,84620000057590,57590,8462,0.5029


   ## Take 500 genes with highest similarity to the Mody genes 

In [10]:
df = result
df = df.sort_values(by='GTEx_Pancreas_Correlation',ascending=False)
df = df.reset_index()
del df['index']
gene_list = []
i = len(gene_list)
for index, row in df.iterrows():
    if i==500:
        break
    gene_list.append(row['Gene1'])
    gene_list.append(row['Gene2'])
    gene_list = list(set(gene_list))
    i = len(gene_list)

gene_list.sort()
gene_list = [int(x) for x in gene_list]
gene_list = [str(x) for x in gene_list]
#gene_list = gene_list.sort(key=str)
str1 = ','.join(gene_list)
print(str1)

22,51,52,91,116,316,405,573,653,694,773,816,900,976,988,1045,1113,1114,1131,1141,1207,1272,1363,1457,1615,1662,1729,1951,1956,1965,1974,1992,1996,1997,2309,2339,2572,2641,2642,2645,2775,2781,2786,2864,2891,2892,2926,2961,2965,3185,3297,3326,3382,3431,3454,3588,3630,3642,3651,3652,3670,3735,3741,3752,3757,3763,3785,3800,3837,3840,3842,3843,3895,4013,4076,4135,4179,4217,4651,4661,4729,4733,4760,4763,4782,4821,4928,5001,5036,5045,5062,5080,5094,5108,5122,5126,5236,5283,5291,5432,5464,5501,5502,5522,5525,5537,5711,5718,5771,5798,5865,5887,5910,5917,5923,5981,5982,6009,6095,6198,6252,6258,6418,6456,6599,6616,6672,6732,6749,6750,6780,6804,6833,6855,6860,6861,6924,6927,7014,7049,7071,7084,7111,7251,7266,7270,7276,7320,7322,7323,7329,7345,7398,7444,7458,7469,7494,7525,7555,7844,7857,7913,8027,8030,8086,8204,8310,8315,8439,8443,8451,8452,8462,8533,8661,8662,8715,8725,8763,8780,8833,8881,8882,8883,8938,8941,9053,9066,9118,9188,9218,9254,9255,9312,9318,9342,9527,9541,9589,9616,9665,9699,9705,9729

   ## Big GIM query for Mody genes in pancreas - NDEx input   

In [13]:
#ids1 and ids2 now contain the 500 genes most associated with the 14 Mody genes (including the 14 Mody genes)
example_query = {
  "restriction_gt": "GTEx_Pancreas_Correlation,-2",
  "table": default_table,
  "columns": "GTEx_Pancreas_Correlation",
  "ids1": str1,
  "ids2": str1,
  "limit": 100000000000
}
try:
    query_submit = get('biggim/query', data=example_query)
    jprint(query_submit)
except requests.HTTPError as e:
    print(e)
    
    jprint(e.response.json())


try:
    while True:
        query_status = get('biggim/status/%s'% (query_submit['request_id'],))
        jprint(query_status)
        if query_status['status'] !='running':
            # query has finished
            break
        else:
            time.sleep(1)
            print("Checking again")
except requests.HTTPError as e:
    print(e)
    
    jprint(e.response.json())
    
result = pandas.concat(map(pandas.read_csv, query_status['request_uri']))
result

Sent: GET http://biggim.ncats.io/api/biggim/query?restriction_gt=GTEx_Pancreas_Correlation%2C-2&table=BigGIM_70_v1&ids2=22%2C51%2C52%2C91%2C116%2C316%2C405%2C573%2C653%2C694%2C773%2C816%2C900%2C976%2C988%2C1045%2C1113%2C1114%2C1131%2C1141%2C1207%2C1272%2C1363%2C1457%2C1615%2C1662%2C1729%2C1951%2C1956%2C1965%2C1974%2C1992%2C1996%2C1997%2C2309%2C2339%2C2572%2C2641%2C2642%2C2645%2C2775%2C2781%2C2786%2C2864%2C2891%2C2892%2C2926%2C2961%2C2965%2C3185%2C3297%2C3326%2C3382%2C3431%2C3454%2C3588%2C3630%2C3642%2C3651%2C3652%2C3670%2C3735%2C3741%2C3752%2C3757%2C3763%2C3785%2C3800%2C3837%2C3840%2C3842%2C3843%2C3895%2C4013%2C4076%2C4135%2C4179%2C4217%2C4651%2C4661%2C4729%2C4733%2C4760%2C4763%2C4782%2C4821%2C4928%2C5001%2C5036%2C5045%2C5062%2C5080%2C5094%2C5108%2C5122%2C5126%2C5236%2C5283%2C5291%2C5432%2C5464%2C5501%2C5502%2C5522%2C5525%2C5537%2C5711%2C5718%2C5771%2C5798%2C5865%2C5887%2C5910%2C5917%2C5923%2C5981%2C5982%2C6009%2C6095%2C6198%2C6252%2C6258%2C6418%2C6456%2C6599%2C6616%2C6672%2C6732%2C674

Unnamed: 0,GPID,Gene1,Gene2,GTEx_Pancreas_Correlation
0,37350000003840,3840,3735,0.7380
1,9880000003840,3840,988,0.7827
2,27810000003840,3840,2781,0.2982
3,19740000003840,3840,1974,0.6924
4,23390000003840,3840,2339,0.7637
5,10450000003840,3840,1045,0.2566
6,19920000003840,3840,1992,0.5478
7,32970000003840,3840,3297,0.5592
8,14570000003840,3840,1457,0.8412
9,31850000003840,3840,3185,0.7951
