# lilGIM and BigCLAM example

swagger api specification: http://biggim.ncats.io/api

In [1]:
# A few helper functions for posting and getting api requests
import json
import requests
import pandas
import time

base_url = 'http://biggim.ncats.io/api'

#a couple of simple helper functions
def post(endpoint, data={}, base_url=base_url):
    req = requests.post('%s/%s' % (base_url,endpoint), data=data)
    req.raise_for_status()
    return req.json()

def get(endpoint, data={}, base_url=base_url):
    req = requests.get('%s/%s' % (base_url,endpoint), data=data)
    req.raise_for_status()
    print("Sent: GET %s?%s" % (req.request.url,req.request.body))
    return req.json()
    

def jprint(dct):
    print(json.dumps(dct, indent=2))
    
def wrapper(endpoint, data={}, base_url=base_url):
    try:
        response = get(endpoint, data, base_url)
        jprint(response)
    except requests.HTTPError as e:

        print(e)
        if e.response.status_code == 400:
            jprint(e.response.json())
        raise
    try:
        ctr = 1
        while True:
            query_status = get('%s/status/%s'% (endpoint.split('/')[0],response['request_id'],))
            jprint(query_status)
            if query_status['status'] !='running':
                # query has finished
                break
            else:
                time.sleep(ctr)
                ctr += 1
                #linear backoff
                print("Checking again")
    except requests.HTTPError as e:
        print(e)
        if e.response.status_code == 400:
            jprint(e.response.json())
        raise
    return pandas.concat(map(pandas.read_csv, query_status['request_uri']))

    

## lilGIM

Lil' GIM is a simplified interface to BigGIM

### Run the simplest query possible.

You provide a list of entrez gene ids. A query request is triggered, which returns a `request_id`
You then query a `results` endpoint to find out if the request is finished processing and where the result is.
Provided their are no errors you can download the CSV and interact with the values.

This query will look at all tissues (`whole_body` in the Brenda tissue ontology) and limit the number of rows to 1000.

In [2]:

endpoint = "lilgim/query"
data = {'ids':'5111,6996,57697,6815,889,7112,2176,1019,5888,5706'}
try:
    response = get(endpoint, data)
    jprint(response)
except requests.HTTPError as e:
    print(e)
    jprint(e.response.json())

Sent: GET http://biggim.ncats.io/api/lilgim/query?ids=5111%2C6996%2C57697%2C6815%2C889%2C7112%2C2176%2C1019%2C5888%2C5706
{
  "status": "submitted",
  "request_id": "ba1c5782-5120-4324-8ca1-c658bbf42c18"
}


## Check query status

These can take a while to run as they are querying gigs of data. So we keep pinging the server to find out when the query is done and where the results can be found.

In [3]:
import time
try:
    ctr = 1
    while True:
        query_status = get('lilgim/status/%s'% (response['request_id'],))
        jprint(query_status)
        if query_status['status'] !='running':
            # query has finished
            break
        else:
            time.sleep(ctr)
            ctr += 1
            #linear backoff
            print("Checking again")
except requests.HTTPError as e:
    print(e)
    jprint(e.response.json())

Sent: GET http://biggim.ncats.io/api/lilgim/status/ba1c5782-5120-4324-8ca1-c658bbf42c18?None
{
  "status": "running",
  "request_id": "ba1c5782-5120-4324-8ca1-c658bbf42c18",
  "message": "Extraction job is running."
}
Checking again
Sent: GET http://biggim.ncats.io/api/lilgim/status/ba1c5782-5120-4324-8ca1-c658bbf42c18?None
{
  "status": "complete",
  "processed_data": "0B",
  "request_id": "ba1c5782-5120-4324-8ca1-c658bbf42c18",
  "size": "320.3 KB",
  "rows": 5541,
  "request_uri": [
    "https://storage.googleapis.com/ncats_bigquery_results/ba1c5782-5120-4324-8ca1-c658bbf42c18000000000000.csv"
  ]
}


## Get the results from google cloud

This grabs the results, which could be multiple files and creates a dataframe from them.

In [4]:
results = pandas.concat(map(pandas.read_csv, query_status['request_uri']))
results.head()


Unnamed: 0,Gene1,Symbol1,Gene2,Symbol2,maxCorr,minCorr,aveCorr
0,113130,CDCA5,5888,RAD51,0.9695,0.2743,0.722643
1,5888,RAD51,701,BUB1B,0.9555,0.2071,0.697161
2,9133,CCNB2,5888,RAD51,0.9674,0.0481,0.693683
3,8318,CDC45,5888,RAD51,0.9794,0.0142,0.691496
4,51203,NUSAP1,5888,RAD51,0.9532,0.0814,0.671044


## Simplified w/ wrapper

All query resources follow the same pattern. Query the endpoint, wait for the query to finish, get the result.

This simplified helper function performs all of the above and will be used for the rest of the tutorial.

In [5]:
#same query as before, it just waits for the results automatically
endpoint = "lilgim/query"
data = {'ids':'5111,6996,57697,6815,889,7112,2176,1019,5888,5706'}
results = wrapper(endpoint, data)
results.head()

Sent: GET http://biggim.ncats.io/api/lilgim/query?ids=5111%2C6996%2C57697%2C6815%2C889%2C7112%2C2176%2C1019%2C5888%2C5706
{
  "status": "submitted",
  "request_id": "7e03d6d9-e98a-4bfe-bcc2-1b3cf31d6c6c"
}
Sent: GET http://biggim.ncats.io/api/lilgim/status/7e03d6d9-e98a-4bfe-bcc2-1b3cf31d6c6c?None
{
  "status": "running",
  "request_id": "7e03d6d9-e98a-4bfe-bcc2-1b3cf31d6c6c",
  "message": "Extraction job is running."
}
Checking again
Sent: GET http://biggim.ncats.io/api/lilgim/status/7e03d6d9-e98a-4bfe-bcc2-1b3cf31d6c6c?None
{
  "status": "complete",
  "processed_data": "0B",
  "request_id": "7e03d6d9-e98a-4bfe-bcc2-1b3cf31d6c6c",
  "size": "320.3 KB",
  "rows": 5541,
  "request_uri": [
    "https://storage.googleapis.com/ncats_bigquery_results/7e03d6d9-e98a-4bfe-bcc2-1b3cf31d6c6c000000000000.csv"
  ]
}


Unnamed: 0,Gene1,Symbol1,Gene2,Symbol2,maxCorr,minCorr,aveCorr
0,113130,CDCA5,5888,RAD51,0.9695,0.2743,0.722643
1,5888,RAD51,701,BUB1B,0.9555,0.2071,0.697161
2,9133,CCNB2,5888,RAD51,0.9674,0.0481,0.693683
3,8318,CDC45,5888,RAD51,0.9794,0.0142,0.691496
4,51203,NUSAP1,5888,RAD51,0.9532,0.0814,0.671044


## Optional parameters

- `limit` - the maximum number of relationships to return (default:1000)
-  `tissue` - restrict query to specific tissue from the Brenda tissue ontology (default:whole_body)

### View available tissues via the `metadata/tissue` endpoint

In [6]:
try:
    response = get('/metadata/tissue')
    print("Available Tissues")
    jprint(response)
except response.HTTPError as e:
    print(e)

Sent: GET http://biggim.ncats.io/api//metadata/tissue?None
Available Tissues
{
  "tissues": [
    "B_lymphocyte",
    "B_lymphoma_cell_line",
    "T_lymphocyte",
    "acute_myeloid_leukemia_cell",
    "adenocarcinoma_cell",
    "adipose_tissue",
    "adrenal_cortex",
    "adrenal_gland",
    "adrenal_gland_cancer_cell",
    "adrenocortical_carcinoma_cell",
    "adult_stem_cell",
    "alimentary_canal",
    "amygdala",
    "animal",
    "aorta",
    "artery",
    "astroblast",
    "astrocyte",
    "astrocytoma_cell",
    "astroglia",
    "astroglial_cell",
    "avian_pallium",
    "basal_ganglion",
    "basophil",
    "bile_duct",
    "bile_duct_epithelium",
    "biliary_epithelium",
    "bladder",
    "blast_cell",
    "blastomere",
    "blastula",
    "blood",
    "blood_cancer_cell",
    "blood_plasma",
    "blood_platelet",
    "blood_vessel",
    "blood_vessel_endothelium",
    "bone",
    "bone_cancer_cell",
    "bone_marrow",
    "bone_marrow_cell",
    "brain",
    "brain_stem",

### Getting brain only correlations

In [7]:
endpoint = "lilgim/query"
data = {'ids':'5111,6996,57697,6815,889,7112,2176,1019,5888,5706',
        'tissue': 'bone',
        'limit':100
       }
results = wrapper(endpoint, data)
brain_lgresults = results.copy()
results.head()

Sent: GET http://biggim.ncats.io/api/lilgim/query?tissue=bone&ids=5111%2C6996%2C57697%2C6815%2C889%2C7112%2C2176%2C1019%2C5888%2C5706&limit=100
{
  "status": "submitted",
  "request_id": "2cd22f8b-cf03-49a7-82c5-7dac44d03cb5"
}
Sent: GET http://biggim.ncats.io/api/lilgim/status/2cd22f8b-cf03-49a7-82c5-7dac44d03cb5?None
{
  "status": "running",
  "request_id": "2cd22f8b-cf03-49a7-82c5-7dac44d03cb5",
  "message": "Extraction job is running."
}
Checking again
Sent: GET http://biggim.ncats.io/api/lilgim/status/2cd22f8b-cf03-49a7-82c5-7dac44d03cb5?None
{
  "status": "complete",
  "processed_data": "0B",
  "request_id": "2cd22f8b-cf03-49a7-82c5-7dac44d03cb5",
  "size": "4.64 KB",
  "rows": 100,
  "request_uri": [
    "https://storage.googleapis.com/ncats_bigquery_results/2cd22f8b-cf03-49a7-82c5-7dac44d03cb5000000000000.csv"
  ]
}


Unnamed: 0,Gene1,Symbol1,Gene2,Symbol2,maxCorr,minCorr,aveCorr
0,55610,VPS50,889,KRIT1,0.807,0.8053,0.80615
1,221785,ZSCAN25,889,KRIT1,0.8418,0.7565,0.79915
2,79027,ZNF655,889,KRIT1,0.8588,0.7234,0.7911
3,57697,FANCM,55320,MIS18BP1,0.8022,0.7243,0.76325
4,11339,OIP5,5888,RAD51,0.8068,0.6971,0.75195


# BigCLAM

**Big CLAM (Cell Line Association Miner)** is an **NCATS Translator Knowledge Source** that integrates large-scale high-quality data of various cell line resources to uncover associations between genomic and molecular features of cell lines, drug response measurements and gene knockdown viability scores. The cell line data comes from five different sources: 1) CCLE - Cancer Cell Line Encyclopedia, 2) GDSC - Genomics of Drug Sensitivity in Cancer, 3) CTRP - Cancer Therapeutics Response Portal, 4) CMap - Connectivity Map, and 5) CDM - Cancer Dependency Map. These data are stored as a Google BigQuery tables enabling fast access and real-time association analysis.


## Genes to genes

Genomic aberrations in INPUT genes decrease viability upon knockdown of OUTPUT genes

In [8]:
data = {"ids": "TCOF1,DDX46,COPE,RIPK1"}
endpoint = 'bigclam/g2g/query'
results = wrapper(endpoint, data)
results.head()

Sent: GET http://biggim.ncats.io/api/bigclam/g2g/query?ids=TCOF1%2CDDX46%2CCOPE%2CRIPK1
{
  "status": "submitted",
  "request_id": "79055597-eeb7-41f0-9878-544f2357ccc3"
}
Sent: GET http://biggim.ncats.io/api/bigclam/status/79055597-eeb7-41f0-9878-544f2357ccc3?None
{
  "status": "running",
  "request_id": "79055597-eeb7-41f0-9878-544f2357ccc3",
  "message": "Extraction job is running."
}
Checking again
Sent: GET http://biggim.ncats.io/api/bigclam/status/79055597-eeb7-41f0-9878-544f2357ccc3?None
{
  "status": "complete",
  "processed_data": "0B",
  "request_id": "79055597-eeb7-41f0-9878-544f2357ccc3",
  "size": "2.47 KB",
  "rows": 100,
  "request_uri": [
    "https://storage.googleapis.com/ncats_bigquery_results/79055597-eeb7-41f0-9878-544f2357ccc3000000000000.csv"
  ]
}


Unnamed: 0,gene_label,F
0,CSGALNACT1,17.433432
1,TBC1D9,16.436353
2,CTSB,13.711625
3,PSME3,13.099071
4,GLI4,12.840643


## Optional parameters

- `limit` - the maximum number of relationships to return (default:100)
-  `tissue` - restrict query to specific tissue from the Brenda tissue ontology (default:whole_body)

In [None]:
# little fun using correlated genes from lilgim
# example is really about adding a limit parameter

data = {"ids": "TCOF1,DDX46,COPE,RIPK1", "limit":1000}
endpoint = 'bigclam/g2g/query'
results = wrapper(endpoint, data)
results.head()

Sent: GET http://biggim.ncats.io/api/bigclam/g2g/query?limit=1000&ids=TCOF1%2CDDX46%2CCOPE%2CRIPK1
{
  "status": "submitted",
  "request_id": "d7371f31-cc1d-4a74-bf49-d6c3df78f899"
}
Sent: GET http://biggim.ncats.io/api/bigclam/status/d7371f31-cc1d-4a74-bf49-d6c3df78f899?None
{
  "status": "running",
  "request_id": "d7371f31-cc1d-4a74-bf49-d6c3df78f899",
  "message": "Extraction job is running."
}
Checking again
Sent: GET http://biggim.ncats.io/api/bigclam/status/d7371f31-cc1d-4a74-bf49-d6c3df78f899?None
{
  "status": "complete",
  "processed_data": "0B",
  "request_id": "d7371f31-cc1d-4a74-bf49-d6c3df78f899",
  "size": "24.57 KB",
  "rows": 1000,
  "request_uri": [
    "https://storage.googleapis.com/ncats_bigquery_results/d7371f31-cc1d-4a74-bf49-d6c3df78f899000000000000.csv"
  ]
}


Unnamed: 0,gene_label,F
0,CSGALNACT1,17.433432
1,TBC1D9,16.436353
2,CTSB,13.711625
3,PSME3,13.099071
4,GLI4,12.840643


## Genes to Drugs

Genomic aberrations in INPUT genes lead to sensitivity to OUTPUT drugs

In [None]:
data = {"ids": "TCOF1,DDX46,COPE,RIPK1"}
endpoint = 'bigclam/g2d/query'
results = wrapper(endpoint, data)
results.head()

Sent: GET http://biggim.ncats.io/api/bigclam/g2d/query?ids=TCOF1%2CDDX46%2CCOPE%2CRIPK1
{
  "status": "submitted",
  "request_id": "954da484-05d1-4a6e-8d58-9365b3122a85"
}
Sent: GET http://biggim.ncats.io/api/bigclam/status/954da484-05d1-4a6e-8d58-9365b3122a85?None
{
  "status": "running",
  "request_id": "954da484-05d1-4a6e-8d58-9365b3122a85",
  "message": "Extraction job is running."
}
Checking again
