# lil' GIM and Big GIM Mody genes example

swagger api specification: http://biggim.ncats.io/api

# A few helper functions for posting and getting api requests


In [1]:
import json
import requests
import pandas
import time

base_url = 'http://biggim.ncats.io/api'

#a couple of simple helper functions
def post(endpoint, data={}, base_url=base_url):
    req = requests.post('%s/%s' % (base_url,endpoint), data=data)
    req.raise_for_status()
    return req.json()

def get(endpoint, data={}, base_url=base_url):
    req = requests.get('%s/%s' % (base_url,endpoint), data=data)
    req.raise_for_status()
    print("Sent: GET %s?%s" % (req.request.url,req.request.body))
    return req.json()
    

def jprint(dct):
    print(json.dumps(dct, indent=2))
    
def wrapper(endpoint, data={}, base_url=base_url):
    try:
        response = get(endpoint, data, base_url)
        jprint(response)
    except requests.HTTPError as e:

        print(e)
        if e.response.status_code == 400:
            jprint(e.response.json())
        raise
    try:
        ctr = 1
        while True:
            query_status = get('%s/status/%s'% (endpoint.split('/')[0],response['request_id'],))
            jprint(query_status)
            if query_status['status'] !='running':
                # query has finished
                break
            else:
                time.sleep(ctr)
                ctr += 1
                #linear backoff
                print("Checking again")
    except requests.HTTPError as e:
        print(e)
        if e.response.status_code == 400:
            jprint(e.response.json())
        raise
    return pandas.concat(map(pandas.read_csv, query_status['request_uri']))

## lilGIM
Lil' GIM is a simplified interface to BigGIM

### Initial mody query.

We provide a list of 14 MODY genes. A query request is triggered, which returns a request_id You then query a results endpoint to find out if the request is finished processing and where the result is. Provided their are no errors you can download the CSV and interact with the values.

This query will look at all tissues (whole_body in the Brenda tissue ontology) and limit the number of rows to 1000.

In [2]:
endpoint = "lilgim/query"
data = {'ids':'3630,2645,5078,6927,6928,1056,8462,4760,3172,3651,6833,640,3767,26060'}
results = wrapper(endpoint, data)
results.head()

Sent: GET http://biggim.ncats.io/api/lilgim/query?ids=3630%2C2645%2C5078%2C6927%2C6928%2C1056%2C8462%2C4760%2C3172%2C3651%2C6833%2C640%2C3767%2C26060
{
  "status": "submitted",
  "request_id": "bef763c2-24f9-4b3f-ad47-a295deda5350"
}
Sent: GET http://biggim.ncats.io/api/lilgim/status/bef763c2-24f9-4b3f-ad47-a295deda5350?None
{
  "request_id": "bef763c2-24f9-4b3f-ad47-a295deda5350",
  "request_uri": [
    "https://storage.googleapis.com/ncats_bigquery_results/bef763c2-24f9-4b3f-ad47-a295deda5350000000000000.csv"
  ],
  "status": "complete",
  "rows": 5380,
  "processed_data": "0B",
  "size": "305.9 KB"
}


Unnamed: 0,Gene1,Symbol1,Gene2,Symbol2,maxCorr,minCorr,aveCorr
0,931,MS4A1,640,BLK,0.9377,-0.0033,0.631467
1,973,CD79A,640,BLK,0.9356,0.0254,0.604439
2,23495,TNFRSF13B,640,BLK,0.8791,0.0019,0.565091
3,930,CD19,640,BLK,0.928,-0.1733,0.544906
4,79368,FCRL2,640,BLK,0.9078,-0.0791,0.534111


### View available tissues via the metadata/tissue endpoint

In [3]:
try:
    response = get('/metadata/tissue')
    print("Available Tissues")
    jprint(response)
except response.HTTPError as e:
    print(e)

Sent: GET http://biggim.ncats.io/api//metadata/tissue?None
Available Tissues
{
  "tissues": [
    "B_lymphocyte",
    "B_lymphoma_cell_line",
    "T_lymphocyte",
    "acute_myeloid_leukemia_cell",
    "adenocarcinoma_cell",
    "adipose_tissue",
    "adrenal_cortex",
    "adrenal_gland",
    "adrenal_gland_cancer_cell",
    "adrenocortical_carcinoma_cell",
    "adult_stem_cell",
    "alimentary_canal",
    "amygdala",
    "animal",
    "aorta",
    "artery",
    "astroblast",
    "astrocyte",
    "astrocytoma_cell",
    "astroglia",
    "astroglial_cell",
    "avian_pallium",
    "basal_ganglion",
    "basophil",
    "bile_duct",
    "bile_duct_epithelium",
    "biliary_epithelium",
    "bladder",
    "blast_cell",
    "blastomere",
    "blastula",
    "blood",
    "blood_cancer_cell",
    "blood_plasma",
    "blood_platelet",
    "blood_vessel",
    "blood_vessel_endothelium",
    "bone",
    "bone_cancer_cell",
    "bone_marrow",
    "bone_marrow_cell",
    "brain",
    "brain_stem",

### Mody query for pancreas

We provide a list of 14 MODY genes. A query request is triggered, which returns a request_id You then query a results endpoint to find out if the request is finished processing and where the result is. Provided their are no errors you can download the CSV and interact with the values.

This query will look at all tissues (whole_body in the Brenda tissue ontology) and limit the number of rows to 1000.

In [4]:
endpoint = "lilgim/query"
data = {'ids':'3630,2645,5078,6927,6928,1056,8462,4760,3172,3651,6833,640,3767,26060',
        'tissue': 'pancreas',
        'limit':10000
       }
results = wrapper(endpoint, data)
brain_lgresults = results.copy()
results.head()

Sent: GET http://biggim.ncats.io/api/lilgim/query?ids=3630%2C2645%2C5078%2C6927%2C6928%2C1056%2C8462%2C4760%2C3172%2C3651%2C6833%2C640%2C3767%2C26060&limit=10000&tissue=pancreas
{
  "status": "submitted",
  "request_id": "004717e6-5c11-44ad-9238-64cf57642101"
}
Sent: GET http://biggim.ncats.io/api/lilgim/status/004717e6-5c11-44ad-9238-64cf57642101?None
{
  "status": "running",
  "message": "Extraction job is running.",
  "request_id": "004717e6-5c11-44ad-9238-64cf57642101"
}
Checking again
Sent: GET http://biggim.ncats.io/api/lilgim/status/004717e6-5c11-44ad-9238-64cf57642101?None
{
  "request_id": "004717e6-5c11-44ad-9238-64cf57642101",
  "request_uri": [
    "https://storage.googleapis.com/ncats_bigquery_results/004717e6-5c11-44ad-9238-64cf57642101000000000000.csv"
  ],
  "status": "complete",
  "rows": 10000,
  "processed_data": "0B",
  "size": "461.97 KB"
}


Unnamed: 0,Gene1,Symbol1,Gene2,Symbol2,maxCorr,minCorr,aveCorr
0,169026,SLC30A8,4760,NEUROD1,0.9739,0.9017,0.9378
1,169026,SLC30A8,6833,ABCC8,0.9587,0.8869,0.9228
2,11189,CELF3,6833,ABCC8,0.964,0.8737,0.91885
3,29106,SCG3,4760,NEUROD1,0.9649,0.8593,0.9121
4,222546,RFX6,4760,NEUROD1,0.9676,0.831,0.8993


   ## Big GIM query for Mody genes in healthy pancreas

In [5]:
#prep
import time
import pandas
studies = get('metadata/study')
study_names = [s['name'] for s in studies]
tables = get('/metadata/table')
default_table = [t for t in tables if t['default'] == True][0]['name']
print(default_table)

Sent: GET http://biggim.ncats.io/api/metadata/study?None
Sent: GET http://biggim.ncats.io/api//metadata/table?None
BigGIM_70_v1


In [6]:
example_query = {
  "restriction_gt": "GTEx_Pancreas_Correlation,.2",
  "table": default_table,
  "columns": "GTEx_Pancreas_Correlation",
  "ids1": "3630,2645,5078,6927,6928,1056,8462,4760,3172,3651,6833,640,3767,26060",
  "limit": 100000
}
try:
    query_submit = get('biggim/query', data=example_query)
    jprint(query_submit)
except requests.HTTPError as e:
    print(e)
    
    jprint(e.response.json())


try:
    while True:
        query_status = get('biggim/status/%s'% (query_submit['request_id'],))
        jprint(query_status)
        if query_status['status'] !='running':
            # query has finished
            break
        else:
            time.sleep(1)
            print("Checking again")
except requests.HTTPError as e:
    print(e)
    
    jprint(e.response.json())
    
result = pandas.concat(map(pandas.read_csv, query_status['request_uri']))
result

Sent: GET http://biggim.ncats.io/api/biggim/query?table=BigGIM_70_v1&columns=GTEx_Pancreas_Correlation&restriction_gt=GTEx_Pancreas_Correlation%2C.2&ids1=3630%2C2645%2C5078%2C6927%2C6928%2C1056%2C8462%2C4760%2C3172%2C3651%2C6833%2C640%2C3767%2C26060&limit=100000
{
  "status": "submitted",
  "request_id": "d02dc021-27dc-49e4-a661-b1c6a9c9e111"
}
Sent: GET http://biggim.ncats.io/api/biggim/status/d02dc021-27dc-49e4-a661-b1c6a9c9e111?None
{
  "request_id": "d02dc021-27dc-49e4-a661-b1c6a9c9e111",
  "request_uri": [
    "https://storage.googleapis.com/ncats_bigquery_results/d02dc021-27dc-49e4-a661-b1c6a9c9e111000000000000.csv"
  ],
  "status": "complete",
  "rows": 27919,
  "processed_data": "0B",
  "size": "898.09 KB"
}


Unnamed: 0,GPID,Gene1,Gene2,GTEx_Pancreas_Correlation
0,84620000064225,64225,8462,0.2500
1,20640000003172,3172,2064,0.2500
2,26450000009839,9839,2645,0.2500
3,6400000023151,23151,640,0.2500
4,6400000055623,55623,640,0.2500
5,6400000004221,4221,640,0.2500
6,47600100134229,100134229,4760,0.2500
7,68330000085004,85004,6833,0.2500
8,68330000130940,130940,6833,0.2500
9,37670000010979,10979,3767,0.2500


   ## Take 500 genes with highest similarity to the Mody genes 

In [7]:
df = result
df = df.sort_values(by='GTEx_Pancreas_Correlation',ascending=False)
df = df.reset_index()
del df['index']
gene_list = []
i = len(gene_list)
for index, row in df.iterrows():
    if i==500:
        break
    gene_list.append(row['Gene1'])
    gene_list.append(row['Gene2'])
    gene_list = list(set(gene_list))
    i = len(gene_list)

gene_list.sort()
gene_list = [int(x) for x in gene_list]
gene_list = [str(x) for x in gene_list]
#gene_list = gene_list.sort(key=str)
str1 = ','.join(gene_list)
print(str1)

22,51,52,91,116,316,405,573,653,694,773,816,900,976,988,1045,1113,1114,1131,1141,1207,1272,1363,1457,1615,1662,1729,1951,1956,1965,1974,1992,1996,1997,2309,2339,2572,2641,2642,2645,2775,2781,2786,2864,2891,2892,2926,2961,2965,3185,3297,3326,3382,3431,3454,3588,3630,3642,3651,3652,3670,3735,3741,3752,3757,3763,3785,3800,3837,3840,3842,3843,3895,4013,4076,4135,4179,4217,4651,4661,4729,4733,4760,4763,4782,4821,4928,5001,5036,5045,5062,5080,5094,5108,5122,5126,5236,5283,5291,5432,5464,5501,5502,5522,5525,5537,5711,5718,5771,5798,5865,5887,5910,5917,5923,5981,5982,6009,6095,6198,6252,6258,6418,6456,6599,6616,6672,6732,6749,6750,6780,6804,6833,6855,6860,6861,6924,6927,7014,7049,7071,7084,7111,7251,7266,7270,7276,7320,7322,7323,7329,7345,7398,7444,7458,7469,7494,7525,7555,7844,7857,7913,8027,8030,8086,8204,8310,8315,8439,8443,8451,8452,8462,8533,8661,8662,8715,8725,8763,8780,8833,8881,8882,8883,8938,8941,9053,9066,9118,9188,9218,9254,9255,9312,9318,9342,9527,9541,9589,9616,9665,9699,9705,9729

   ## Big GIM query for Mody genes in pancreas - NDEx input   

In [8]:
#ids1 and ids2 now contain the 500 genes most associated with the 14 Mody genes (including the 14 Mody genes)
example_query = {
  "restriction_gt": "GTEx_Pancreas_Correlation,-2",
  "table": default_table,
  "columns": "GTEx_Pancreas_Correlation",
  "ids1": str1,
  "ids2": str1,
  "limit": 124750
}
try:
    query_submit = get('biggim/query', data=example_query)
    jprint(query_submit)
except requests.HTTPError as e:
    print(e)
    
    jprint(e.response.json())


try:
    while True:
        query_status = get('biggim/status/%s'% (query_submit['request_id'],))
        jprint(query_status)
        if query_status['status'] !='running':
            # query has finished
            break
        else:
            time.sleep(1)
            print("Checking again")
except requests.HTTPError as e:
    print(e)
    
    jprint(e.response.json())
    
result = pandas.concat(map(pandas.read_csv, query_status['request_uri']))
result

Sent: GET http://biggim.ncats.io/api/biggim/query?table=BigGIM_70_v1&columns=GTEx_Pancreas_Correlation&limit=124750&ids2=22%2C51%2C52%2C91%2C116%2C316%2C405%2C573%2C653%2C694%2C773%2C816%2C900%2C976%2C988%2C1045%2C1113%2C1114%2C1131%2C1141%2C1207%2C1272%2C1363%2C1457%2C1615%2C1662%2C1729%2C1951%2C1956%2C1965%2C1974%2C1992%2C1996%2C1997%2C2309%2C2339%2C2572%2C2641%2C2642%2C2645%2C2775%2C2781%2C2786%2C2864%2C2891%2C2892%2C2926%2C2961%2C2965%2C3185%2C3297%2C3326%2C3382%2C3431%2C3454%2C3588%2C3630%2C3642%2C3651%2C3652%2C3670%2C3735%2C3741%2C3752%2C3757%2C3763%2C3785%2C3800%2C3837%2C3840%2C3842%2C3843%2C3895%2C4013%2C4076%2C4135%2C4179%2C4217%2C4651%2C4661%2C4729%2C4733%2C4760%2C4763%2C4782%2C4821%2C4928%2C5001%2C5036%2C5045%2C5062%2C5080%2C5094%2C5108%2C5122%2C5126%2C5236%2C5283%2C5291%2C5432%2C5464%2C5501%2C5502%2C5522%2C5525%2C5537%2C5711%2C5718%2C5771%2C5798%2C5865%2C5887%2C5910%2C5917%2C5923%2C5981%2C5982%2C6009%2C6095%2C6198%2C6252%2C6258%2C6418%2C6456%2C6599%2C6616%2C6672%2C6732%2C67

Unnamed: 0,GPID,Gene1,Gene2,GTEx_Pancreas_Correlation
0,35880000003840,3840,3588,0.7143
1,10450000003840,3840,1045,0.2566
2,220000003840,3840,22,0.7148
3,23390000003840,3840,2339,0.7637
4,510000003840,3840,51,0.7199
5,520000003840,3840,52,0.6715
6,33820000003840,3840,3382,0.5434
7,36520000003840,3840,3652,0.6980
8,16150000003840,3840,1615,0.8107
9,910000003840,3840,91,0.6800
