# Expansion of nodes using Monarch APIs
* https://api.monarchinitiative.org/api/#/
* https://scigraph-ontology.monarchinitiative.org/scigraph/docs/#/

In [1]:
import pandas as pd

## Graph queries (SciGraph)
### Get Neighbors
#### GET /graph/neighbors/{id}

In [2]:
# Donwload neighbors 
# id: NCBIGene:358
# Default parameters
# curl -X GET --header 'Accept: text/tab-separated-values' 'https://scigraph-ontology.monarchinitiative.org/scigraph/graph/neighbors/NCBIGene%3A358?depth=1&blankNodes=false&direction=BOTH&entail=false&project=*' > aqp1_getNeighbors_defaultparm.tsv

In [3]:
# Read results (depth=1)
layer1_df = pd.read_table('/home/nuria/workspace/monarch/aqp1_getNeighbors_defaultparm.tsv')
layer1_df.head()

Unnamed: 0,id,label,categories
0,PMID:21612401,,[]
1,PMID:16596446,,[]
2,PMID:17012249,,[]
3,PMID:26074259,,[]
4,PMID:17273788,,[]


In [4]:
# Analyze neighbours' types
layer1_df.describe()

Unnamed: 0,id,label,categories
count,288,4,288
unique,288,4,2
top,NCBIGene:1273,AQP1,[]
freq,1,1,283


In [5]:
layer1_df.id.unique()

array(['PMID:21612401', 'PMID:16596446', 'PMID:17012249', 'PMID:26074259',
       'PMID:17273788', 'PMID:23928039', 'PMID:7491270', 'PMID:18544259',
       'PMID:18538351', 'PMID:1373524', 'PMID:19670620', 'PMID:23268390',
       'PMID:24777974', 'PMID:7521883', 'PMID:12051745', 'PMID:12084581',
       'PMID:21360438', 'PMID:20578142', 'PMID:22964306', 'PMID:7532004',
       'PMID:8408657', 'PMID:1510932', 'PMID:24014128', 'PMID:26838488',
       'PMID:20461409', 'PMID:18282122', 'PMID:21244858', 'PMID:23450058',
       'PMID:7507481', 'PMID:19787701', 'PMID:10872456', 'PMID:11773634',
       'PMID:17545093', 'PMID:12477932', 'PMID:26823734', 'PMID:18563339',
       'PMID:26151179', 'PMID:15135660', 'PMID:11922632', 'PMID:18501347',
       'PMID:17077939', 'PMID:15948717', 'PMID:11076974', 'PMID:20969805',
       'PMID:10564231', 'PMID:20806077', 'PMID:18280225', 'PMID:20149606',
       'PMID:21373963', 'PMID:14701836', 'PMID:26181025', 'PMID:26786101',
       'PMID:16189514', 'PMID:23

In [6]:
layer1_df['node_type'] = layer1_df.id.apply(lambda x: x.split(':')[0])
layer1_df.node_type.value_counts()

PMID           279
NCBIGene         2
OMIM             1
HGNC             1
NCBITaxon        1
CHR              1
ENSEMBL          1
MonarchData      1
SO               1
Name: node_type, dtype: int64

In [7]:
# Filters
# Depth = 2 does not work
# Filter by interaction_type. BUT what are the strings per interaction_type???

### Get reachable nodes
#### GET /graph/reachablefrom/{id}

In [8]:
# Download reachable nodes
# id: NCBIGene:358
# Default parameters
# curl -X GET --header 'Accept: text/tab-separated-values' 'https://scigraph-ontology.monarchinitiative.org/scigraph/graph/reachablefrom/NCBIGene%3A358' > aqp1_getReachableNodes_defaultparm.tsv

In [9]:
# Read results 
reach_df = pd.read_table('/home/nuria/workspace/monarch/aqp1_getReachableNodes_defaultparm.tsv')
reach_df.head()

Unnamed: 0,id,label,categories
0,MonarchData:ncbigene.ttl,,[]
1,CHR:9606chr7p14.3,,[]
2,NCBITaxon:9606,Homo sapiens,[]
3,SO:0001217,protein_coding_gene,"[gene, sequence feature]"


In [10]:
# Analyze neighbours' types
reach_df.describe()

Unnamed: 0,id,label,categories
count,4,2,4
unique,4,2,2
top,CHR:9606chr7p14.3,Homo sapiens,[]
freq,1,1,3


In [11]:
reach_df.id.unique()

array(['MonarchData:ncbigene.ttl', 'CHR:9606chr7p14.3', 'NCBITaxon:9606',
       'SO:0001217'], dtype=object)

In [12]:
reach_df['node_type'] = reach_df.id.apply(lambda x: x.split(':')[0])
reach_df.node_type.value_counts()

MonarchData    1
SO             1
NCBITaxon      1
CHR            1
Name: node_type, dtype: int64

## Edge Queries (Monarch)
### Get GGI
#### GET /bioentity/gene/{id}/interactions/

In [13]:
# Download gene-gene interactions
# id: NCBIGene:358
# Default parameters
# curl -X GET --header 'Accept: application/json' 'https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene%3A358/interactions/?fetch_objects=true&rows=20' > aqp1_getGeneInteractions_defaultparm.json

In [14]:
# Read results
import json
from pprint import pprint
from pandas.io.json import json_normalize

with open('/home/nuria/workspace/monarch/aqp1_getGeneInteractions_defaultparm.json') as ggi_f:
    data = json.load(ggi_f)
ggi_f.close()
pprint(data)

{'associations': [{'evidence_graph': {'edges': [{'obj': 'NCBIGene:358',
                                                 'pred': 'OBAN:association_has_subject',
                                                 'sub': 'MONARCH:18fafed02009af95ed344c953ca07f085873576d'},
                                                {'obj': 'NCBIGene:123722',
                                                 'pred': 'OBAN:association_has_object',
                                                 'sub': 'MONARCH:18fafed02009af95ed344c953ca07f085873576d'},
                                                {'obj': 'NCBIGene:123722',
                                                 'pred': 'RO:0002434',
                                                 'sub': 'NCBIGene:358'},
                                                {'obj': 'PMID:25416956',
                                                 'pred': 'dc:source',
                                                 'sub': 'MONARCH:18fafed02009af95ed344c953ca07f0

                                     'consider': None,
                                     'deprecated': None,
                                     'description': None,
                                     'id': 'PMID:25416956',
                                     'label': None,
                                     'replaced_by': None,
                                     'synonyms': None,
                                     'types': None}],
                   'qualifiers': None,
                   'relation': {'categories': None,
                                'consider': None,
                                'deprecated': None,
                                'description': None,
                                'id': 'RO:0002434',
                                'label': 'interacts with',
                                'replaced_by': None,
                                'synonyms': None,
                                'types': None},
                   'slim': None,
          

In [15]:
ggi_df = json_normalize(data, 'objects')
ggi_df.columns = ['entrez']
ggi_df

Unnamed: 0,entrez
0,NCBIGene:100996717
1,NCBIGene:10188
2,NCBIGene:102466755
3,NCBIGene:10253
4,NCBIGene:10488
5,NCBIGene:11007
6,NCBIGene:11043
7,NCBIGene:123722
8,NCBIGene:125115
9,NCBIGene:158405


In [16]:
import ijson

with open('/home/nuria/workspace/monarch/aqp1_getGeneInteractions_defaultparm.json','r') as ggi_f:
    objects = ijson.items(ggi_f, 'objects.item')
    columns = list(objects)
ggi_f.close()
print(columns[0])

NCBIGene:100996717


In [17]:
# Analyze neighbours' types
columns

['NCBIGene:100996717',
 'NCBIGene:10188',
 'NCBIGene:102466755',
 'NCBIGene:10253',
 'NCBIGene:10488',
 'NCBIGene:11007',
 'NCBIGene:11043',
 'NCBIGene:123722',
 'NCBIGene:125115',
 'NCBIGene:158405',
 'NCBIGene:22806',
 'NCBIGene:22807',
 'NCBIGene:23281',
 'NCBIGene:2353',
 'NCBIGene:26575',
 'NCBIGene:284001',
 'NCBIGene:285622',
 'NCBIGene:30008',
 'NCBIGene:339834',
 'NCBIGene:358',
 'NCBIGene:373',
 'NCBIGene:386675',
 'NCBIGene:386682',
 'NCBIGene:3881',
 'NCBIGene:3884',
 'NCBIGene:4188',
 'NCBIGene:54507',
 'NCBIGene:54793',
 'NCBIGene:55118',
 'NCBIGene:64651',
 'NCBIGene:64753',
 'NCBIGene:6477',
 'NCBIGene:6925',
 'NCBIGene:7185',
 'NCBIGene:7186',
 'NCBIGene:79734',
 'NCBIGene:80817',
 'NCBIGene:83755',
 'NCBIGene:83899',
 'NCBIGene:85291',
 'NCBIGene:85376',
 'NCBIGene:8601']

In [18]:
nassociations = len(columns)
print('Number of interactors are {}'.format(nassociations))

Number of interactors are 42


In [19]:
ggi_df.to_csv('aqp1_expansion/aqp1_ggi.tsv', sep='\t', index=False, header=True)

### Get GPh
#### GET /bioentity/gene/{id}/phenotypes/

In [20]:
# Download gene-phenotype associations
# id: NCBIGene:358
# Default parameters
# curl -X GET --header 'Accept: application/json' 'https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene%3A358/phenotypes/?rows=20&fetch_objects=true'
# Request url = https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene%3A358/phenotypes/?rows=20&fetch_objects=true

In [22]:
import urllib3 

# Input gene
gene = 'NCBIGene:358'

# Get phenotypes
# Making requests
# Request parameters
# headers, fields
http = urllib3.PoolManager()
r = http.request('GET', 'https://api.monarchinitiative.org/api/bioentity/gene/%s/phenotypes/'%gene)

# Errors and Exceptions
try:
    http.request('GET', 'https://api.monarchinitiative.org/api/bioentity/gene/%s/phenotypes/'%gene, retries=False)
except urllib3.exceptions.NewConnectionError:
    print('Connection failed.')

# Response content
r.status
r.data
r.headers

# JSON content
json.loads(r.data.decode('utf-8'))




{'associations': [],
 'compact_associations': None,
 'facet_counts': {'object_closure': {}, 'subject_taxon_label': {}},
 'facet_pivot': None,
 'numFound': None,
 'objects': [],
 'start': None}

In [50]:
import requests

# api address
api = 'https://api.monarchinitiative.org/api'
endpoint = '/bioentity'

# input gene
gene = 'NCBIGene:358'

# get gene info
r = requests.get('{}{}/gene/{}'.format(api,endpoint,gene))
#r = requests.get('https://api.monarchinitiative.org/api/bioentity/gene/%s/phenotypes/'%gene, headers={'Accept':'application/json'})
r.headers
r.json()

{'categories': ['gene', 'sequence feature'],
 'chromosome': {'categories': None,
  'consider': None,
  'deprecated': None,
  'description': None,
  'id': None,
  'label': None,
  'replaced_by': None,
  'synonyms': None,
  'taxon': {'id': None, 'label': None},
  'types': None,
  'xrefs': None},
 'consider': None,
 'deprecated': None,
 'description': None,
 'disease_associations': [{'evidence_graph': {'edges': [{'obj': 'ECO:0000033',
      'pred': 'RO:0002558',
      'sub': 'MONARCH:8e8f5c993a9ac478fb84f162b39dfdd2b5cf56fe'},
     {'obj': 'DOID:899',
      'pred': 'RO:0002607',
      'sub': ':.well-known/genid/NCBIGene358-MESHD015529VL'},
     {'obj': 'NCBIGene:358',
      'pred': 'GENO:0000418',
      'sub': ':.well-known/genid/NCBIGene358-MESHD015529VL'},
     {'obj': ':.well-known/genid/NCBIGene358-MESHD015529VL',
      'pred': 'OBAN:association_has_subject',
      'sub': 'MONARCH:8e8f5c993a9ac478fb84f162b39dfdd2b5cf56fe'},
     {'obj': 'PMID:18988797',
      'pred': 'dc:source',
    

In [47]:
# get gene-phenotype
r = requests.get('{}{}/gene/{}/phenotypes/'.format(api,endpoint,gene))
nassociations = len(r.json()['associations'])
print('Number of nodes associated are {}'.format(nassociations))

Number of nodes associated are 0


In [29]:
# get gene-disease
r = requests.get('{}{}/gene/{}/diseases/'.format(api,endpoint,gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))

Number of diseases associated are 13


In [33]:
r_dict = r.json()
gda_df = json_normalize(r_dict, 'objects')
gda_df.columns = ['disease_id']
gda_df.head(2)

Unnamed: 0,disease_id
0,DOID:0060164
1,DOID:10763


In [46]:
gda_df.to_csv('aqp1_expansion/aqp1_gene_disease.tsv', sep='\t', index=False, header=True)

In [39]:
# get gene-function
r = requests.get('{}{}/gene/{}/function/'.format(api, endpoint, gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))
r.json()

Number of diseases associated are 0


{'associations': [],
 'compact_associations': None,
 'facet_counts': {'isa_partof_closure': {}, 'taxon_label': {}},
 'facet_pivot': None,
 'numFound': None,
 'objects': [],
 'start': None}

In [42]:
# get gene-expressedInAnatomy
r = requests.get('{}{}/gene/{}/expressed/'.format(api, endpoint, gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))

Number of nodes associated are 20


In [44]:
r_data = r.json()
gaa_df = json_normalize(r_data, 'objects')
gaa_df.columns = ['expressed_in_anatomy']
gaa_df.head(2)

Unnamed: 0,expressed_in_anatomy
0,UBERON:0000007
1,UBERON:0000057


In [45]:
gaa_df.to_csv('aqp1_expansion/aqp1_gene_anatomy.tsv', sep='\t', index=False, header=True)

In [51]:
# get gene-pub
r = requests.get('{}{}/gene/{}/pubs/'.format(api, endpoint, gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))

Number of nodes associated are 0
