# Expansion of nodes using Monarch APIs
* https://api.monarchinitiative.org/api/#/
* https://scigraph-ontology.monarchinitiative.org/scigraph/docs/#/

In [1]:
import pandas as pd
import requests
from pandas.io.json import json_normalize

In [2]:
# input gene
gene = 'NCBIGene:11826'

# output files
path = 'aqp1_mouse_expansion/aqp1_mouse'

## Graph queries (SciGraph)

In [3]:
# api address
api = 'https://scigraph-ontology.monarchinitiative.org/scigraph'
endpoint = '/graph'

In [4]:
# get neighbors (JSON content)
r = requests.get('{}{}/neighbors/{}'.format(api,endpoint,gene))
r.headers

{'Server': 'nginx/1.10.0 (Ubuntu)', 'Vary': 'Accept, Accept-Encoding', 'Date': 'Sat, 13 May 2017 02:11:31 GMT', 'Connection': 'keep-alive', 'Content-Encoding': 'gzip', 'Content-Type': 'application/json', 'Transfer-Encoding': 'chunked', 'Cache-Control': 'no-transform, max-age=7200'}

In [5]:
# Read results 
r.json()

{'edges': [{'meta': {'isDefinedBy': ['https://data.monarchinitiative.org/ttl/ncbigene.ttl'],
    'lbl': ['equivalentClass']},
   'obj': 'NCBIGene:11826',
   'pred': 'equivalentClass',
   'sub': 'ENSEMBL:ENSMUSG00000004655'},
  {'meta': {'isDefinedBy': ['https://data.monarchinitiative.org/ttl/ncbigene.ttl'],
    'lbl': ['http://purl.obolibrary.org/obo/IAO_0000136']},
   'obj': 'NCBIGene:11826',
   'pred': 'IAO:0000136',
   'sub': 'PMID:11741725'},
  {'meta': {'isDefinedBy': ['https://data.monarchinitiative.org/ttl/ncbigene.ttl'],
    'lbl': ['http://purl.obolibrary.org/obo/IAO_0000136']},
   'obj': 'NCBIGene:11826',
   'pred': 'IAO:0000136',
   'sub': 'PMID:15815633'},
  {'meta': {'isDefinedBy': ['https://data.monarchinitiative.org/ttl/ncbigene.ttl'],
    'lbl': ['http://purl.obolibrary.org/obo/IAO_0000136']},
   'obj': 'NCBIGene:11826',
   'pred': 'IAO:0000136',
   'sub': 'PMID:15163632'},
  {'meta': {'isDefinedBy': ['https://data.monarchinitiative.org/ttl/ncbigene.ttl'],
    'lbl': ['

In [6]:
neighbors_df = json_normalize(r.json(), 'nodes')
neighbors_df.head()

Unnamed: 0,id,lbl,meta
0,PMID:20383338,,"{'types': ['cliqueLeader', 'Node', 'NamedIndiv..."
1,PMID:19298815,,"{'types': ['cliqueLeader', 'Node', 'NamedIndiv..."
2,PMID:25119150,,"{'types': ['cliqueLeader', 'Node', 'NamedIndiv..."
3,PMID:25160756,,"{'types': ['cliqueLeader', 'Node', 'NamedIndiv..."
4,PMID:24700466,,"{'types': ['cliqueLeader', 'Node', 'NamedIndiv..."


In [7]:
# Analyze neighbours' types
neighbors_df.id.unique()
print('nodes: {}'.format(len(neighbors_df.id.unique())))

nodes: 215


In [8]:
neighbors_df['node_type'] = neighbors_df.id.apply(lambda x: x.split(':')[0])
neighbors_df.node_type.value_counts()

PMID           208
SO               1
ENSEMBL          1
MonarchData      1
MGI              1
NCBITaxon        1
CHR              1
NCBIGene         1
Name: node_type, dtype: int64

In [9]:
# Filters
# Filter by interaction_type. BUT what are the strings per interaction_type???

In [10]:
# get reachable nodes (JSON content)
r = requests.get('{}{}/reachablefrom/{}'.format(api,endpoint,gene))
r.headers

{'Server': 'nginx/1.10.0 (Ubuntu)', 'Vary': 'Accept, Accept-Encoding', 'Date': 'Sat, 13 May 2017 02:11:33 GMT', 'Connection': 'keep-alive', 'Content-Encoding': 'gzip', 'Content-Length': '502', 'Content-Type': 'application/json', 'Cache-Control': 'no-transform, max-age=7200'}

In [11]:
# Read results 
r.json()
reach_df = json_normalize(r.json(), 'nodes')
reach_df.head()

Unnamed: 0,id,lbl,meta
0,MonarchData:ncbigene.ttl,,"{'types': ['Ontology', 'cliqueLeader', 'Node']}"
1,NCBITaxon:10090,Mus musculus,{'http://www.w3.org/2000/01/rdf-schema#label':...
2,CHR:10090chr6,chr6,{'http://www.w3.org/2000/01/rdf-schema#label':...
3,SO:0001217,protein_coding_gene,"{'category': ['gene', 'sequence feature'], 'ht..."


In [12]:
reach_df.id.unique()
print('nodes: {}'.format(len(reach_df.id.unique())))

nodes: 4


In [13]:
reach_df['node_type'] = reach_df.id.apply(lambda x: x.split(':')[0])
reach_df.node_type.value_counts()

CHR            1
SO             1
NCBITaxon      1
MonarchData    1
Name: node_type, dtype: int64

## Edge Queries (Monarch)

In [14]:
# api address
api = 'https://api.monarchinitiative.org/api'
endpoint = '/bioentity'

In [15]:
# get gene info
r = requests.get('{}{}/gene/{}'.format(api,endpoint,gene))
#r = requests.get('https://api.monarchinitiative.org/api/bioentity/gene/%s/phenotypes/'%gene, headers={'Accept':'application/json'})
r.headers
r.json()

{'categories': ['gene', 'sequence feature'],
 'chromosome': {'categories': None,
  'consider': None,
  'deprecated': None,
  'description': None,
  'id': None,
  'label': None,
  'replaced_by': None,
  'synonyms': None,
  'taxon': {'id': None, 'label': None},
  'types': None,
  'xrefs': None},
 'consider': None,
 'deprecated': None,
 'description': None,
 'disease_associations': [],
 'families': None,
 'full_name': None,
 'function_associations': None,
 'genotype_associations': [{'evidence_graph': {'edges': [{'obj': ':.well-known/genid/mgivslckey99377',
      'pred': 'GENO:0000382',
      'sub': 'MGI:5797368'},
     {'obj': 'MGI:103201', 'pred': 'GENO:0000408', 'sub': 'MGI:5692545'},
     {'obj': 'MGI:5692545',
      'pred': 'GENO:0000382',
      'sub': ':.well-known/genid/mgivslckey99377'}],
    'nodes': [{'id': ':.well-known/genid/mgivslckey99377',
      'lbl': 'Aqp1<tm1b(EUCOMM)Wtsi>/Aqp1<tm1b(EUCOMM)Wtsi>'},
     {'id': 'MGI:5692545', 'lbl': 'Aqp1<tm1b(EUCOMM)Wtsi>'},
     {'id': '

In [16]:
# get gene-gene interactions
r = requests.get('{}{}/gene/{}/interactions/'.format(api,endpoint,gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))

Number of nodes associated are 0


In [17]:
# get gene-phenotype
r = requests.get('{}{}/gene/{}/phenotypes/'.format(api,endpoint,gene))
nassociations = len(r.json()['associations'])
print('Number of nodes associated are {}'.format(nassociations))
r.json()

Number of nodes associated are 20


{'associations': [{'evidence_graph': {'edges': [{'obj': 'ECO:0000059',
      'pred': 'RO:0002558',
      'sub': 'MONARCH:8a52e167842d166d9505c5b756c12175ed31ff4d'},
     {'obj': 'PMID:9468475',
      'pred': 'dc:source',
      'sub': 'MONARCH:8a52e167842d166d9505c5b756c12175ed31ff4d'},
     {'obj': 'HP:0001254',
      'pred': 'OBAN:association_has_object',
      'sub': 'MONARCH:8a52e167842d166d9505c5b756c12175ed31ff4d'},
     {'obj': 'MGI:103201',
      'pred': 'OBAN:association_has_subject',
      'sub': 'MONARCH:8a52e167842d166d9505c5b756c12175ed31ff4d'},
     {'obj': 'HP:0001254', 'pred': 'RO:0002200', 'sub': 'MGI:103201'}],
    'nodes': [{'id': 'HP:0001254', 'lbl': 'Lethargy'},
     {'id': 'MONARCH:8a52e167842d166d9505c5b756c12175ed31ff4d', 'lbl': None},
     {'id': 'ECO:0000059', 'lbl': 'experimental phenotypic evidence'},
     {'id': 'PMID:9468475', 'lbl': None},
     {'id': 'MGI:103201', 'lbl': 'Aqp1'}]},
   'evidence_types': None,
   'id': '5476e36e-d6b4-47e9-af4c-716b39e9b0d5'

In [18]:
r_dict = r.json()
gph_df = json_normalize(r_dict, 'objects')
gph_df.columns = ['phenotype_id']
gph_df.head(2)

Unnamed: 0,phenotype_id
0,GO:0003094PHENOTYPE
1,GO:0006833PHENOTYPE


In [19]:
gph_df.to_csv('{}_gene_phenotype.tsv'.format(path), sep='\t', index=False, header=True)

In [20]:
# Get gene-disease
r = requests.get('{}{}/gene/{}/diseases/'.format(api,endpoint,gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))

Number of nodes associated are 0


In [21]:
# get gene-function
r = requests.get('{}{}/gene/{}/function/'.format(api, endpoint, gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))
r.json()

Number of nodes associated are 0


{'associations': [],
 'compact_associations': None,
 'facet_counts': {'isa_partof_closure': {}, 'taxon_label': {}},
 'facet_pivot': None,
 'numFound': None,
 'objects': [],
 'start': None}

In [22]:
# get gene-expressedInAnatomy
r = requests.get('{}{}/gene/{}/expressed/'.format(api, endpoint, gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))

Number of nodes associated are 19


In [23]:
r_data = r.json()
gaa_df = json_normalize(r_data, 'objects')
gaa_df.columns = ['expressed_in_anatomy']
gaa_df.head(2)

Unnamed: 0,expressed_in_anatomy
0,UBERON:0000998
1,UBERON:0001083


In [24]:
gaa_df.to_csv('{}_gene_anatomy.tsv'.format(path), sep='\t', index=False, header=True)

In [25]:
# get gene-pub
r = requests.get('{}{}/gene/{}/pubs/'.format(api, endpoint, gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))

Number of nodes associated are 0
