In [1]:
import urllib2
import json
import requests

# BigGIM example

Big GIM (Gene Interaction Miner) is a Translator Knowledge Source that contains function interaction data for all pairs of genes. Functional interaction data are available from four different sources: 

1. tissue-specific gene expression correlations from healthy tissue samples (GTEx), 
2. tissue-specific gene expression correlations from cancer samples (TCGA), 
3. tissue-specific probabilities of function interaction (GIANT), and 
4. direct interactions (BioGRID). 
   
The data is stored as a Google BigQuery table enabling fast access.

## Swagger api specification

http://biggim.ncats.io/api/

## Sections

* 1.0 [Metadata Examples](#metadata_examples)
    * 1.1 [Get available studies](#metadata_studies)
    * 1.2 [Study descriptions](#metadata_study_descriptions)
    * 1.3 [Get a single study](#metadata_single_study)
    * 1.4 [Get substudies](#metadata_substudies)
    * 1.5 [Get all tables](#metadata_tables)
    * 1.6 [Get one table](#metadata_table)
    * 1.7 [Get a single column](#metadata_column)
    * 1.8 [List all tissues](#metadata_tissues)
    * 1.9 [Get single tissue](#metadata_tissue)   
* 2.0 [Query Examples](#query_examples)
    * 2.1 [Simple predefined query](#query_simple)
    * 2.2 [Check the status of the simple predefined query](#query_status)
    * 2.3 [Get the results as dataframe](#query_results)
* 3.0 [Full Example](#full_example)

In [2]:
base_url = 'http://biggim.ncats.io/api'

In [3]:
#a couple of simple helper functions
def post(endpoint, data={}, base_url=base_url):
    req = requests.post('%s/%s' % (base_url,endpoint), data=data)
    req.raise_for_status()
    return req.json()

def get(endpoint, data={}, base_url=base_url):
    req = requests.get('%s/%s' % (base_url,endpoint), data=data)
    req.raise_for_status()
    return req.json()
    

def jprint(dct):
    print json.dumps(dct, indent=2)

<a id="metadata_examples"></a>
## Example Metadata

<a id="metadata_studies"></a>
### Get available studies

In [4]:
try:
    studies = get('metadata/study')
    jprint(studies)
except requests.HTTPError as e:
    print e

[
  {
    "description": " Biological General Repository for Interaction Datasets\n        BioGRID is an interaction repository with data compiled through comprehensive curation efforts.\n\n        https://thebiogrid.org/\n        ", 
    "name": "BioGRID", 
    "substudies": [
      {
        "tissue_hierarchy": null, 
        "description": "Default", 
        "name": "Default", 
        "columns": [
          {
            "datatype": "BOOLEAN", 
            "table": {
              "name": "BigGIM_70_v1"
            }, 
            "name": "BioGRID_Interaction", 
            "interactions_type": "Boolean Flag For Interaction"
          }, 
          {
            "datatype": "STRING", 
            "table": {
              "name": "BigGIM_70_v1"
            }, 
            "name": "BioGRID_Experimental_System", 
            "interactions_type": "Experimental System Used"
          }, 
          {
            "datatype": "STRING", 
            "table": {
              "name": "BigGIM

<a id="metadata_study_descriptions"></a>
### Study descriptions

In [5]:
for study in studies:
    print("Study Name: %s" % study['name'])
    print("Study Description: %s" % study['description'])


Study Name: BioGRID
Study Description:  Biological General Repository for Interaction Datasets
        BioGRID is an interaction repository with data compiled through comprehensive curation efforts.

        https://thebiogrid.org/
        
Study Name: TCGA
Study Description:  The Cancer Genome Atlas
        The Cancer Genome Atlas (TCGA) is a collaboration between the National Cancer Institute (NCI) and the National Human Genome Research Institute (NHGRI) that has generated comprehensive, multi-dimensional maps of the key genomic changes in 33 types of cancer. The TCGA dataset, comprising more than two petabytes of genomic data, has been made publically available, and this genomic information helps the cancer research community to improve the prevention, diagnosis, and treatment of cancer.

        https://cancergenome.nih.gov/
        
Study Name: GIANT
Study Description: Genetic Investigation of ANthropometric Traits
        The Genetic Investigation of ANthropometric Traits (GIANT)

<a id="metadata_single_study"></a>
### Get a single study

In [6]:
try:
    tcga = get('metadata/study/%s' % ('TCGA'))
    jprint(tcga)
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())

{
  "description": " The Cancer Genome Atlas\n        The Cancer Genome Atlas (TCGA) is a collaboration between the National Cancer Institute (NCI) and the National Human Genome Research Institute (NHGRI) that has generated comprehensive, multi-dimensional maps of the key genomic changes in 33 types of cancer. The TCGA dataset, comprising more than two petabytes of genomic data, has been made publically available, and this genomic information helps the cancer research community to improve the prevention, diagnosis, and treatment of cancer.\n\n        https://cancergenome.nih.gov/\n        ", 
  "name": "TCGA", 
  "substudies": [
    {
      "tissue_hierarchy": "0 - ovarian serous carcinoma cell|1 - ovary cancer cell|2 - ovary|3 - female reproductive gland|3 - internal female genital organ|4 - female reproductive system|4 - endocrine gland|5 - reproductive system|5 - gland|6 - urogenital system|6 - whole body|7 - animal|8 - tissues, cell types and enzyme sources", 
      "description": 

<a id="metadata_substudies"></a>
### Get a substudies

In [7]:
for substudy in tcga['substudies']:
    print("Substudy name: %s " % substudy['name'])
    print("Substudy description: %s" % substudy['description'])
    t_c_list = ', '.join("%s.%s" % (column['table']["name"], column['name']) for column in substudy['columns'])
    print("Table.column list: %s" % (t_c_list) )
    print

Substudy name: OV 
Substudy description: Ovarian serous cystadenocarcinoma
Table.column list: BigGIM_70_v1.TCGA_OV_Correlation, BigGIM_70_v1.TCGA_OV_Pvalue, BigGIM_80_v1.TCGA_OV_Correlation, BigGIM_80_v1.TCGA_OV_Pvalue, BigGIM_90_v1.TCGA_OV_Correlation, BigGIM_90_v1.TCGA_OV_Pvalue, BigGIM_v1.TCGA_OV_Correlation, BigGIM_v1.TCGA_OV_Pvalue, CRC_70_v1.TCGA_OV_Correlation, CRC_70_v1.TCGA_OV_Pvalue, CRC_80_v1.TCGA_OV_Correlation, CRC_80_v1.TCGA_OV_Pvalue, CRC_90_v1.TCGA_OV_Correlation, CRC_90_v1.TCGA_OV_Pvalue, CRC_v1.TCGA_OV_Correlation, CRC_v1.TCGA_OV_Pvalue, FA_70_v1.TCGA_OV_Correlation, FA_70_v1.TCGA_OV_Pvalue, FA_80_v1.TCGA_OV_Correlation, FA_80_v1.TCGA_OV_Pvalue, FA_90_v1.TCGA_OV_Correlation, FA_90_v1.TCGA_OV_Pvalue, FA_v1.TCGA_OV_Correlation, FA_v1.TCGA_OV_Pvalue

Substudy name: ACC 
Substudy description: Adrenocortical carcinoma
Table.column list: BigGIM_70_v1.TCGA_ACC_Correlation, BigGIM_70_v1.TCGA_ACC_Pvalue, BigGIM_80_v1.TCGA_ACC_Correlation, BigGIM_80_v1.TCGA_ACC_Pvalue, BigGIM_9

### what an error looks like

In [8]:
try:
    ta = get('metadata/study/%s' % ('TCSG'))
    jprint(studies)
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())

404 Client Error: NOT FOUND for url: http://biggim.ncats.io/api/metadata/study/TCSG

{
  "status": "error", 
  "message": "[TCSG] not a valid study"
}
None


<a id="metadata_tables"></a>
### Get all tables

In [9]:
try:
    tables = get('/metadata/table')
    jprint(tables)
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())

[
  {
    "num_rows": 51090886, 
    "default": true, 
    "description": "Gene pairwise associations: correlation metrics from 33 TCGA tumor types, 21 GTEx tissues, functional interaction scores from 145 tissues (from GIANT), and BioGRID interactions. Containing only rows (gene pairs) where at least one of the TCGA or GTEx absolute correlations is higher than 0.7", 
    "num_bytes": 146013971730, 
    "name": "BigGIM_70_v1"
  }, 
  {
    "num_rows": 14210893, 
    "default": false, 
    "description": "Gene pairwise associations: correlation metrics from 33 TCGA tumor types, 21 GTEx tissues, functional interaction scores from 145 tissues (from GIANT), and BioGRID interactions. Containing only rows (gene pairs) where at least one of the TCGA or GTEx absolute correlations is higher than 0.8", 
    "num_bytes": 41062787154, 
    "name": "BigGIM_80_v1"
  }, 
  {
    "num_rows": 1173151, 
    "default": false, 
    "description": "Gene pairwise associations: correlation metrics from 33 TCG

In [10]:
for table in tables:
    print("Table name: %s" % table['name'])
    print("Table description: %s" % table['description'])
    print

Table name: BigGIM_70_v1
Table description: Gene pairwise associations: correlation metrics from 33 TCGA tumor types, 21 GTEx tissues, functional interaction scores from 145 tissues (from GIANT), and BioGRID interactions. Containing only rows (gene pairs) where at least one of the TCGA or GTEx absolute correlations is higher than 0.7

Table name: BigGIM_80_v1
Table description: Gene pairwise associations: correlation metrics from 33 TCGA tumor types, 21 GTEx tissues, functional interaction scores from 145 tissues (from GIANT), and BioGRID interactions. Containing only rows (gene pairs) where at least one of the TCGA or GTEx absolute correlations is higher than 0.8

Table name: BigGIM_90_v1
Table description: Gene pairwise associations: correlation metrics from 33 TCGA tumor types, 21 GTEx tissues, functional interaction scores from 145 tissues (from GIANT), and BioGRID interactions. Containing only rows (gene pairs) where at least one of the TCGA or GTEx absolute correlations is higher

<a id="metadata_table"></a>
### Get one table

The default table

In [11]:
default_table = [t for t in tables if t['default'] == True][0]['name']
print("Default table name: %s" % default_table)

Default table name: BigGIM_70_v1


In [12]:
try:
    table = get('/metadata/table/%s' %  (default_table,))
    jprint(table)
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())

{
  "description": "Gene pairwise associations: correlation metrics from 33 TCGA tumor types, 21 GTEx tissues, functional interaction scores from 145 tissues (from GIANT), and BioGRID interactions. Containing only rows (gene pairs) where at least one of the TCGA or GTEx absolute correlations is higher than 0.7", 
  "default": true, 
  "num_bytes": 146013971730, 
  "num_rows": 51090886, 
  "columns": [
    {
      "datatype": "FLOAT", 
      "interactions_type": "Spearman Rank Correlation Coefficient", 
      "substudy": {
        "tissue_hierarchy": "0 - adrenocortical carcinoma cell|1 - adrenal cortex|2 - adrenal gland|3 - viscus|3 - endocrine gland|4 - whole body|4 - gland|5 - animal|6 - tissues, cell types and enzyme sources", 
        "study": {
          "name": "TCGA", 
          "description": " The Cancer Genome Atlas\n        The Cancer Genome Atlas (TCGA) is a collaboration between the National Cancer Institute (NCI) and the National Human Genome Research Institute (NHGRI) th

### First 10 columns in default table

In [13]:
for column in table['columns'][:10]:
    print("Column name: %s" % column['name'])
    print("Column interaction: %s" % column['interactions_type'])
    print("Column data type: %s" % column["datatype"])
    print

Column name: TCGA_ACC_Correlation
Column interaction: Spearman Rank Correlation Coefficient
Column data type: FLOAT

Column name: TCGA_ACC_Pvalue
Column interaction: P-value (-log10)
Column data type: FLOAT

Column name: TCGA_BLCA_Correlation
Column interaction: Spearman Rank Correlation Coefficient
Column data type: FLOAT

Column name: TCGA_BLCA_Pvalue
Column interaction: P-value (-log10)
Column data type: FLOAT

Column name: TCGA_BRCA_Correlation
Column interaction: Spearman Rank Correlation Coefficient
Column data type: FLOAT

Column name: TCGA_BRCA_Pvalue
Column interaction: P-value (-log10)
Column data type: FLOAT

Column name: TCGA_CESC_Correlation
Column interaction: Spearman Rank Correlation Coefficient
Column data type: FLOAT

Column name: TCGA_CESC_Pvalue
Column interaction: P-value (-log10)
Column data type: FLOAT

Column name: TCGA_CHOL_Correlation
Column interaction: Spearman Rank Correlation Coefficient
Column data type: FLOAT

Column name: TCGA_CHOL_Pvalue
Column interac

<a id="metadata_column"></a>
### Get a single column

In [14]:
try:
    column = get('/metadata/table/%s/column/%s' %  (default_table,'TCGA_ACC_Correlation'))
    jprint(column)
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())

{
  "datatype": "FLOAT", 
  "interactions_type": "Spearman Rank Correlation Coefficient", 
  "substudy": {
    "tissue_hierarchy": "0 - adrenocortical carcinoma cell|1 - adrenal cortex|2 - adrenal gland|3 - viscus|3 - endocrine gland|4 - whole body|4 - gland|5 - animal|6 - tissues, cell types and enzyme sources", 
    "study": {
      "name": "TCGA", 
      "description": " The Cancer Genome Atlas\n        The Cancer Genome Atlas (TCGA) is a collaboration between the National Cancer Institute (NCI) and the National Human Genome Research Institute (NHGRI) that has generated comprehensive, multi-dimensional maps of the key genomic changes in 33 types of cancer. The TCGA dataset, comprising more than two petabytes of genomic data, has been made publically available, and this genomic information helps the cancer research community to improve the prevention, diagnosis, and treatment of cancer.\n\n        https://cancergenome.nih.gov/\n        "
    }, 
    "description": "Adrenocortical car

<a id="metadata_tissues"></a>
### List all tissues

In [15]:
tissues = get('metadata/tissue')
jprint(tissues)

{
  "tissues": [
    "B_lymphocyte", 
    "B_lymphoma_cell_line", 
    "T_lymphocyte", 
    "acute_myeloid_leukemia_cell", 
    "adenocarcinoma_cell", 
    "adipose_tissue", 
    "adrenal_cortex", 
    "adrenal_gland", 
    "adrenal_gland_cancer_cell", 
    "adrenocortical_carcinoma_cell", 
    "adult_stem_cell", 
    "alimentary_canal", 
    "amygdala", 
    "animal", 
    "aorta", 
    "artery", 
    "astroblast", 
    "astrocyte", 
    "astrocytoma_cell", 
    "astroglia", 
    "astroglial_cell", 
    "avian_pallium", 
    "basal_ganglion", 
    "basophil", 
    "bile_duct", 
    "bile_duct_epithelium", 
    "biliary_epithelium", 
    "bladder", 
    "blast_cell", 
    "blastomere", 
    "blastula", 
    "blood", 
    "blood_cancer_cell", 
    "blood_plasma", 
    "blood_platelet", 
    "blood_vessel", 
    "blood_vessel_endothelium", 
    "bone", 
    "bone_cancer_cell", 
    "bone_marrow", 
    "bone_marrow_cell", 
    "brain", 
    "brain_stem", 
    "brain_ventricle", 
    "brea

<a id="metadata_tissue"></a>
### Get single tissue

In [16]:
substudies = get('metadata/tissue/%s' % ("lymphoid_cell",))
jprint(substudies)

{
  "tissue": "lymphoid_cell", 
  "substudies": [
    {
      "tissue_hierarchy": "0 - natural killer cell|1 - null cell|1 - large granular lymphocyte|2 - lymphocyte|3 - leukocyte|3 - lymphoid cell|4 - hematopoietic cell|4 - lymphoid tissue|5 - blood|5 - immune system|5 - hematopoietic system|6 - whole body|7 - animal|8 - tissues, cell types and enzyme sources", 
      "description": "natural_killer_cell", 
      "name": "natural_killer_cell", 
      "columns": [
        {
          "datatype": "FLOAT", 
          "table": {
            "name": "BigGIM_70_v1"
          }, 
          "name": "GIANT_natural_killer_cell_KnownFunctionalInteraction", 
          "interactions_type": "Binary Call For Known Functional Interaction"
        }, 
        {
          "datatype": "FLOAT", 
          "table": {
            "name": "BigGIM_70_v1"
          }, 
          "name": "GIANT_natural_killer_cell_ProbabilityOfFunctionalInteraction", 
          "interactions_type": "Bayesian Posterior Probabili

<a id="query_examples"></a>
## Query examples

<a id="query_simple"></a>
### Simple predefined query

In [17]:
example_query = {
      # The table to select from.
      "table": default_table, 
      # A comma delimited list of column names to return.
      "columns": "TCGA_GBM_Correlation,TCGA_GBM_Pvalue,GTEx_Brain_Correlation,GTEx_Brain_Pvalue", 
      # A comma delimited list of Entrez gene ids to select.
      "ids1": "5111,6996,57697,6815,889,7112,2176,1019,5888,5706,5722,1111,112,3333",
      # Entrez gene ids to select. If not given, the query selects any gene related to a gene in ids 1. 
      # If given, the query only selects relations that contain a gene in ids1 and a gene in ids2.
      "ids2": "5111,6996,57697,6815,889,7112,2176,1019,5888,5706,3333,1111,112,3333", 
      # The type of join made on restrictions. Either intersect or union
      "restriction_join": "union", 
      # The maximum number of rows to return
      "restriction_gt": "TCGA_GBM_Correlation,.2, GTEx_Brain_Correlation,.2", 
      # A list of pairs of values column name,value with which to restrict 
      # the results of the query to rows where the value of the column is greater than the given value.
      "restriction_lt": "TCGA_GBM_Pvalue,.05, GTEx_Brain_Pvalue,.01",
      # The maximum number of rows to return.
      "limit": 10000 
}
print("Query request:")
jprint(example_query)
try:
    query_submit = get('interactions/query', data=example_query)
    jprint(query_submit)
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())

Query request:
{
  "restriction_join": "union", 
  "limit": 10000, 
  "restriction_gt": "TCGA_GBM_Correlation,.2, GTEx_Brain_Correlation,.2", 
  "restriction_lt": "TCGA_GBM_Pvalue,.05, GTEx_Brain_Pvalue,.01", 
  "table": "BigGIM_70_v1", 
  "ids2": "5111,6996,57697,6815,889,7112,2176,1019,5888,5706,3333,1111,112,3333", 
  "columns": "TCGA_GBM_Correlation,TCGA_GBM_Pvalue,GTEx_Brain_Correlation,GTEx_Brain_Pvalue", 
  "ids1": "5111,6996,57697,6815,889,7112,2176,1019,5888,5706,5722,1111,112,3333"
}
{
  "status": "submitted", 
  "request_id": "107b518b-6e7b-4024-a664-0b0638fc9028"
}


<a id="query_status"></a>
### Check the status of the simple predefined query

In [18]:
import time
try:
    while True:
        query_status = get('interactions/query/status/%s'% (query_submit['request_id'],))
        jprint(query_status)
        if query_status['status'] !='running':
            # query has finished
            break
        else:
            time.sleep(1)
            print "Checking again"
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())

{
  "status": "running", 
  "message": "Extraction job is running.", 
  "request_id": "107b518b-6e7b-4024-a664-0b0638fc9028"
}
Checking again
{
  "status": "complete", 
  "rows": 54, 
  "processed_data": "0B", 
  "request_id": "107b518b-6e7b-4024-a664-0b0638fc9028", 
  "request_uri": [
    "https://storage.googleapis.com/ncats_bigquery_results/107b518b-6e7b-4024-a664-0b0638fc9028000000000000.csv"
  ], 
  "size": "2.72 KB"
}


<a id="query_results"></a>
### Get the results as dataframe

In [19]:
import pandas
result = pandas.concat(map(pandas.read_csv, query_status['request_uri']))
result

Unnamed: 0,GPID,Gene1,Gene2,TCGA_GBM_Correlation,TCGA_GBM_Pvalue,GTEx_Brain_Correlation,GTEx_Brain_Pvalue
0,58880000057697,57697,5888,0.4545,6.74,0.6227,135.34
1,58880000007112,7112,5888,0.7331,20.76,0.5651,106.4
2,57060000057697,57697,5706,0.288,2.85,0.7209,201.65
3,57060000006996,6996,5706,0.4229,5.83,0.6719,165.37
4,57060000006815,6815,5706,0.4009,5.24,0.4885,75.74
5,57060000007112,7112,5706,0.1942,1.47,0.7647,241.41
6,69960000007112,7112,6996,0.4991,8.18,0.6653,161.01
7,69960000057697,57697,6996,0.3473,3.99,0.8152,299.7
8,11110000006815,6815,1111,-0.1197,0.72,0.2608,20.3
9,11110000007112,7112,1111,0.7026,18.42,-0.0678,1.79


<a id="full_example"></a>
# Full example

Lets get relationships for a certain tissue where any tissue related correlation has a pvalue < .05

 Assume we are interested in issues in lymphoid tissues

In [20]:
# get lymphoid substudies
import numpy as np
query_status = None
# get lymphoid substudies
substudies = get('metadata/tissue/%s' % ("lymphoid_tissue",))

# we only want things in a specific table, we'll use the default
table = get('/metadata/table/%s' %  (default_table,))

print("Using table %s\n%s" % (table['name'], table['description']))

# grab pvalue columns
pvalue_columns = []
column_names = []
for ss in substudies['substudies']:
    for column in ss['columns']:
        # only if column is from our table
        if column['table']['name'] == table['name']:
            # add column to select
            column_names.append(column['name'])
            if column['interactions_type'] == 'P-value (-log10)':
                #add pvalue to where
                pvalue_columns.append(column['name'])
 
# grab columns with lower pvalues
pv = []
for p in pvalue_columns:
    pv.append(p)
    pv.append(str(abs(np.log10(.05))))
query_arg = {}
query_arg['table'] = table['name']
query_arg['columns'] = ','.join(sorted(column_names))
if len(pv):
    query_arg['restriction_gt'] = ','.join(pv)
# get if any of the lymphoid columns have 
query_arg['restriction_join'] = 'union'
query_arg['limit'] = 1000000
print "The constructed query."
jprint(query_arg)

print("Submitting query request.")
try:
    query_submit = get('interactions/query', data=query_arg)
    print("Query request response.")
    jprint(query_submit)
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())

print("Check query status")
try:
    ctr = 1
    while True:
        
        query_status = get('interactions/query/status/%s'% (query_submit['request_id'],))
        jprint(query_status)
        if query_status['status'] !='running':
            # query has finished
            break
        else:
            time.sleep(ctr)
            ctr += 1
            #linear backoff
            print "Checking again"
except requests.HTTPError as e:
    print e
    print #returned message
    print jprint(e.response.json())


if query_status and query_status['request_uri']:
    print("Query successful. Getting result.")
    result = pandas.concat(map(pandas.read_csv, query_status['request_uri']))
else:
    print "Error see above"

Using table BigGIM_70_v1
Gene pairwise associations: correlation metrics from 33 TCGA tumor types, 21 GTEx tissues, functional interaction scores from 145 tissues (from GIANT), and BioGRID interactions. Containing only rows (gene pairs) where at least one of the TCGA or GTEx absolute correlations is higher than 0.7
The constructed query.
{
  "table": "BigGIM_70_v1", 
  "limit": 1000000, 
  "restriction_gt": "GTEx_Spleen_Pvalue,1.30102999566,TCGA_DLBC_Pvalue,1.30102999566,TCGA_THYM_Pvalue,1.30102999566", 
  "columns": "GIANT_b_lymphocyte_KnownFunctionalInteraction,GIANT_b_lymphocyte_ProbabilityOfFunctionalInteraction,GIANT_dendritic_cell_KnownFunctionalInteraction,GIANT_dendritic_cell_ProbabilityOfFunctionalInteraction,GIANT_lymph_node_KnownFunctionalInteraction,GIANT_lymph_node_ProbabilityOfFunctionalInteraction,GIANT_lymphocyte_KnownFunctionalInteraction,GIANT_lymphocyte_ProbabilityOfFunctionalInteraction,GIANT_natural_killer_cell_KnownFunctionalInteraction,GIANT_natural_killer_cell_P

In [21]:
#Full example result
result.head()

Unnamed: 0,GPID,Gene1,Gene2,GIANT_b_lymphocyte_KnownFunctionalInteraction,GIANT_b_lymphocyte_ProbabilityOfFunctionalInteraction,GIANT_dendritic_cell_KnownFunctionalInteraction,GIANT_dendritic_cell_ProbabilityOfFunctionalInteraction,GIANT_lymph_node_KnownFunctionalInteraction,GIANT_lymph_node_ProbabilityOfFunctionalInteraction,GIANT_lymphocyte_KnownFunctionalInteraction,...,GIANT_thymocyte_KnownFunctionalInteraction,GIANT_thymocyte_ProbabilityOfFunctionalInteraction,GIANT_tonsil_KnownFunctionalInteraction,GIANT_tonsil_ProbabilityOfFunctionalInteraction,GTEx_Spleen_Correlation,GTEx_Spleen_Pvalue,TCGA_DLBC_Correlation,TCGA_DLBC_Pvalue,TCGA_THYM_Correlation,TCGA_THYM_Pvalue
0,91170000171022,171022,9117,,,,,,,,...,,,,,-0.0303,0.12,,,-0.3085,3.19
1,842700101927402,101927402,84270,,,,,,,,...,,,,,-0.2865,2.5,,,,
2,46010000644759,644759,4601,,,,,,,,...,,,,,0.2115,1.51,,,,
3,796400100506686,100506686,79640,,,,,,,,...,,,,,0.204,1.42,,,,
4,237740000729867,729867,23774,,,,,,,,...,,,,,0.2811,2.42,,,,
