# Expansion of nodes using Monarch APIs
* https://api.monarchinitiative.org/api/#/
* https://scigraph-ontology.monarchinitiative.org/scigraph/docs/#/

In [1]:
import pandas as pd
import requests
from pandas.io.json import json_normalize

In [2]:
# input gene
gene = 'NCBIGene:358'

# output files
path = 'aqp1_human_expansion/aqp1_human'

## Graph queries (SciGraph)

In [3]:
# api address
api = 'https://scigraph-ontology.monarchinitiative.org/scigraph'
endpoint = '/graph'

In [4]:
# get neighbors (JSON content)
r = requests.get('{}{}/neighbors/{}'.format(api,endpoint,gene))
r.headers

{'Vary': 'Accept, Accept-Encoding', 'Transfer-Encoding': 'chunked', 'Content-Type': 'application/json', 'Server': 'nginx/1.10.0 (Ubuntu)', 'Date': 'Wed, 17 May 2017 02:41:50 GMT', 'Content-Encoding': 'gzip', 'Connection': 'keep-alive', 'Cache-Control': 'no-transform, max-age=7200'}

In [5]:
# Read results 
r.json()

{'edges': [{'meta': {'isDefinedBy': ['https://data.monarchinitiative.org/ttl/ncbigene.ttl'],
    'lbl': ['http://purl.obolibrary.org/obo/IAO_0000136']},
   'obj': 'NCBIGene:358',
   'pred': 'IAO:0000136',
   'sub': 'PMID:19306058'},
  {'meta': {'isDefinedBy': ['https://data.monarchinitiative.org/ttl/ncbigene.ttl'],
    'lbl': ['http://purl.obolibrary.org/obo/IAO_0000136']},
   'obj': 'NCBIGene:358',
   'pred': 'IAO:0000136',
   'sub': 'PMID:9405233'},
  {'meta': {'isDefinedBy': ['https://data.monarchinitiative.org/ttl/ncbigene.ttl'],
    'lbl': ['http://purl.obolibrary.org/obo/IAO_0000136']},
   'obj': 'NCBIGene:358',
   'pred': 'IAO:0000136',
   'sub': 'PMID:22334691'},
  {'meta': {'isDefinedBy': ['https://data.monarchinitiative.org/ttl/ncbigene.ttl'],
    'lbl': ['http://purl.obolibrary.org/obo/IAO_0000136']},
   'obj': 'NCBIGene:358',
   'pred': 'IAO:0000136',
   'sub': 'PMID:17894331'},
  {'meta': {'isDefinedBy': ['https://data.monarchinitiative.org/ttl/ncbigene.ttl'],
    'lbl': [

In [6]:
neighbors_df = json_normalize(r.json(), 'nodes')
neighbors_df.head()

Unnamed: 0,id,lbl,meta
0,PMID:21612401,,"{'types': ['cliqueLeader', 'Node', 'NamedIndiv..."
1,PMID:16596446,,"{'types': ['cliqueLeader', 'Node', 'NamedIndiv..."
2,PMID:17012249,,"{'types': ['cliqueLeader', 'Node', 'NamedIndiv..."
3,PMID:26074259,,"{'types': ['cliqueLeader', 'Node', 'NamedIndiv..."
4,PMID:17273788,,"{'types': ['cliqueLeader', 'Node', 'NamedIndiv..."


In [7]:
# Analyze neighbours' types
neighbors_df.id.unique()
print('nodes: {}'.format(len(neighbors_df.id.unique())))

nodes: 288


In [8]:
neighbors_df['node_type'] = neighbors_df.id.apply(lambda x: x.split(':')[0])
neighbors_df.node_type.value_counts()

PMID           279
NCBIGene         2
OMIM             1
ENSEMBL          1
HGNC             1
SO               1
CHR              1
NCBITaxon        1
MonarchData      1
Name: node_type, dtype: int64

In [9]:
# Filters
# Filter by interaction_type. BUT what are the strings per interaction_type???

In [10]:
# get reachable nodes (JSON content)
r = requests.get('{}{}/reachablefrom/{}'.format(api,endpoint,gene))
r.headers

{'Server': 'nginx/1.10.0 (Ubuntu)', 'Content-Type': 'application/json', 'Date': 'Wed, 17 May 2017 02:41:54 GMT', 'Vary': 'Accept, Accept-Encoding', 'Content-Encoding': 'gzip', 'Content-Length': '447', 'Connection': 'keep-alive', 'Cache-Control': 'no-transform, max-age=7200'}

In [11]:
# Read results 
r.json()
reach_df = json_normalize(r.json(), 'nodes')
reach_df.head()

Unnamed: 0,id,lbl,meta
0,MonarchData:ncbigene.ttl,,"{'types': ['Ontology', 'cliqueLeader', 'Node']}"
1,CHR:9606chr7p14.3,,"{'types': ['cliqueLeader', 'Node', 'NamedIndiv..."
2,NCBITaxon:9606,Homo sapiens,"{'synonym': ['man', 'humans', 'human'], 'http:..."
3,SO:0001217,protein_coding_gene,"{'synonym': ['protein coding gene'], 'http://w..."


In [12]:
reach_df.id.unique()
print('nodes: {}'.format(len(reach_df.id.unique())))

nodes: 4


In [13]:
reach_df['node_type'] = reach_df.id.apply(lambda x: x.split(':')[0])
reach_df.node_type.value_counts()

NCBITaxon      1
MonarchData    1
SO             1
CHR            1
Name: node_type, dtype: int64

## Edge Queries (Monarch)

In [14]:
# api address
api = 'https://api.monarchinitiative.org/api'
endpoint = '/bioentity'

In [15]:
# get gene info
r = requests.get('{}{}/gene/{}'.format(api,endpoint,gene))
#r = requests.get('https://api.monarchinitiative.org/api/bioentity/gene/%s/phenotypes/'%gene, headers={'Accept':'application/json'})
r.headers
r.json()

{'categories': ['gene', 'sequence feature'],
 'chromosome': {'categories': None,
  'consider': None,
  'deprecated': None,
  'description': None,
  'id': None,
  'label': None,
  'replaced_by': None,
  'synonyms': None,
  'taxon': {'id': None, 'label': None},
  'types': None,
  'xrefs': None},
 'consider': None,
 'deprecated': None,
 'description': None,
 'disease_associations': [{'evidence_graph': {'edges': [{'obj': 'ECO:0000033',
      'pred': 'RO:0002558',
      'sub': 'MONARCH:8e8f5c993a9ac478fb84f162b39dfdd2b5cf56fe'},
     {'obj': 'DOID:899',
      'pred': 'RO:0002607',
      'sub': ':.well-known/genid/NCBIGene358-MESHD015529VL'},
     {'obj': 'NCBIGene:358',
      'pred': 'GENO:0000418',
      'sub': ':.well-known/genid/NCBIGene358-MESHD015529VL'},
     {'obj': ':.well-known/genid/NCBIGene358-MESHD015529VL',
      'pred': 'OBAN:association_has_subject',
      'sub': 'MONARCH:8e8f5c993a9ac478fb84f162b39dfdd2b5cf56fe'},
     {'obj': 'PMID:18988797',
      'pred': 'dc:source',
    

In [16]:
# get gene-gene interactions
r = requests.get('{}{}/gene/{}/interactions/'.format(api,endpoint,gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))

Number of nodes associated are 42


In [17]:
ggi_df = json_normalize(r.json(), 'objects')
ggi_df.columns = ['gene_id']
ggi_df.head(2)

Unnamed: 0,gene_id
0,NCBIGene:100996717
1,NCBIGene:10188


In [18]:
ggi_df.to_csv('{}_ggi.tsv'.format(path), sep='\t', index=False, header=True)

In [19]:
# get gene-phenotype
r = requests.get('{}{}/gene/{}/phenotypes/'.format(api,endpoint,gene))
nassociations = len(r.json()['associations'])
print('Number of nodes associated are {}'.format(nassociations))

Number of nodes associated are 0


In [20]:
# Get gene-disease
r = requests.get('{}{}/gene/{}/diseases/'.format(api,endpoint,gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))

Number of nodes associated are 13


In [21]:
r_dict = r.json()
gda_df = json_normalize(r_dict, 'objects')
gda_df.columns = ['disease_id']
gda_df.head(2)

Unnamed: 0,disease_id
0,DOID:0060164
1,DOID:10763


In [22]:
gda_df.to_csv('{}_gene_disease.tsv'.format(path), sep='\t', index=False, header=True)

In [23]:
# get gene-function
r = requests.get('{}{}/gene/{}/function/'.format(api, endpoint, gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))
r.json()

Number of nodes associated are 0


{'associations': [],
 'compact_associations': None,
 'facet_counts': {'isa_partof_closure': {}, 'taxon_label': {}},
 'facet_pivot': None,
 'numFound': None,
 'objects': [],
 'start': None}

In [24]:
# get gene-expressedInAnatomy
r = requests.get('{}{}/gene/{}/expressed/'.format(api, endpoint, gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))

Number of nodes associated are 20


In [25]:
r_data = r.json()
gaa_df = json_normalize(r_data, 'objects')
gaa_df.columns = ['expressed_in_anatomy']
gaa_df.head(2)

Unnamed: 0,expressed_in_anatomy
0,UBERON:0000007
1,UBERON:0000057


In [26]:
gaa_df.to_csv('{}_gene_anatomy.tsv'.format(path), sep='\t', index=False, header=True)

In [27]:
# get gene-pub
r = requests.get('{}{}/gene/{}/pubs/'.format(api, endpoint, gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))

Number of nodes associated are 0


In [29]:
# get gene-homolog
r = requests.get('{}{}/gene/{}/homologs/'.format(api, endpoint, gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))

Number of nodes associated are 49


In [30]:
r_dict = r.json()
gha_df = json_normalize(r_dict, 'objects')
gha_df.columns = ['homolog_id']
gha_df.head(2)

Unnamed: 0,homolog_id
0,FlyBase:FBgn0015872
1,FlyBase:FBgn0033635


In [31]:
gha_df.to_csv('{}_gene_homolog.tsv'.format(path), sep='\t', index=False, header=True)