# Expansion of nodes using Monarch APIs
* https://api.monarchinitiative.org/api/#/
* https://scigraph-ontology.monarchinitiative.org/scigraph/docs/#/

In [1]:
import pandas as pd

## Graph queries (SciGraph)
### Get Neighbors
#### GET /graph/neighbors/{id}

In [2]:
# Donwload neighbors 
# id: NCBIGene:358
# Default parameters
# curl -X GET --header 'Accept: text/tab-separated-values' 'https://scigraph-ontology.monarchinitiative.org/scigraph/graph/neighbors/NCBIGene%3A358?depth=1&blankNodes=false&direction=BOTH&entail=false&project=*' > aqp1_getNeighbors_defaultparm.tsv

In [3]:
# Read results (depth=1)
layer1_df = pd.read_table('/home/nuria/workspace/monarch/test/aqp1_getNeighbors_defaultparm.tsv')
layer1_df.head()

OSError: File b'/home/nuria/workspace/monarch/aqp1_getNeighbors_defaultparm.tsv' does not exist

In [None]:
# Analyze neighbours' types
layer1_df.describe()

In [None]:
layer1_df.id.unique()

In [None]:
layer1_df['node_type'] = layer1_df.id.apply(lambda x: x.split(':')[0])
layer1_df.node_type.value_counts()

In [None]:
# Filters
# Depth = 2 does not work
# Filter by interaction_type. BUT what are the strings per interaction_type???

### Get reachable nodes
#### GET /graph/reachablefrom/{id}

In [None]:
# Download reachable nodes
# id: NCBIGene:358
# Default parameters
# curl -X GET --header 'Accept: text/tab-separated-values' 'https://scigraph-ontology.monarchinitiative.org/scigraph/graph/reachablefrom/NCBIGene%3A358' > aqp1_getReachableNodes_defaultparm.tsv

In [None]:
# Read results 
reach_df = pd.read_table('/home/nuria/workspace/monarch/test/aqp1_getReachableNodes_defaultparm.tsv')
reach_df.head()

In [None]:
# Analyze neighbours' types
reach_df.describe()

In [None]:
reach_df.id.unique()

In [None]:
reach_df['node_type'] = reach_df.id.apply(lambda x: x.split(':')[0])
reach_df.node_type.value_counts()

## Edge Queries (Monarch)
### Get GGI
#### GET /bioentity/gene/{id}/interactions/

In [None]:
# Download gene-gene interactions
# id: NCBIGene:358
# Default parameters
# curl -X GET --header 'Accept: application/json' 'https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene%3A358/interactions/?fetch_objects=true&rows=20' > aqp1_getGeneInteractions_defaultparm.json

In [None]:
# Read results
import json
from pprint import pprint
from pandas.io.json import json_normalize

with open('/home/nuria/workspace/monarch/aqp1_getGeneInteractions_defaultparm.json') as ggi_f:
    data = json.load(ggi_f)
ggi_f.close()
pprint(data)

In [None]:
ggi_df = json_normalize(data, 'objects')
ggi_df.columns = ['entrez']
ggi_df

In [None]:
import ijson

with open('/home/nuria/workspace/monarch/aqp1_getGeneInteractions_defaultparm.json','r') as ggi_f:
    objects = ijson.items(ggi_f, 'objects.item')
    columns = list(objects)
ggi_f.close()
print(columns[0])

In [None]:
# Analyze neighbours' types
columns

In [None]:
nassociations = len(columns)
print('Number of interactors are {}'.format(nassociations))

In [None]:
ggi_df.to_csv('aqp1_expansion/aqp1_ggi.tsv', sep='\t', index=False, header=True)

### Get GPh
#### GET /bioentity/gene/{id}/phenotypes/

In [None]:
# Download gene-phenotype associations
# id: NCBIGene:358
# Default parameters
# curl -X GET --header 'Accept: application/json' 'https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene%3A358/phenotypes/?rows=20&fetch_objects=true'
# Request url = https://api.monarchinitiative.org/api/bioentity/gene/NCBIGene%3A358/phenotypes/?rows=20&fetch_objects=true

In [None]:
import urllib3 

# Input gene
gene = 'NCBIGene:358'

# Get phenotypes
# Making requests
# Request parameters
# headers, fields
http = urllib3.PoolManager()
r = http.request('GET', 'https://api.monarchinitiative.org/api/bioentity/gene/%s/phenotypes/'%gene)

# Errors and Exceptions
try:
    http.request('GET', 'https://api.monarchinitiative.org/api/bioentity/gene/%s/phenotypes/'%gene, retries=False)
except urllib3.exceptions.NewConnectionError:
    print('Connection failed.')

# Response content
r.status
r.data
r.headers

# JSON content
json.loads(r.data.decode('utf-8'))


In [None]:
import requests

# api address
api = 'https://api.monarchinitiative.org/api'
endpoint = '/bioentity'

# input gene
gene = 'NCBIGene:358'

# get gene info
r = requests.get('{}{}/gene/{}'.format(api,endpoint,gene))
#r = requests.get('https://api.monarchinitiative.org/api/bioentity/gene/%s/phenotypes/'%gene, headers={'Accept':'application/json'})
r.headers
r.json()

In [None]:
# get gene-phenotype
r = requests.get('{}{}/gene/{}/phenotypes/'.format(api,endpoint,gene))
nassociations = len(r.json()['associations'])
print('Number of nodes associated are {}'.format(nassociations))

In [None]:
# get gene-disease
r = requests.get('{}{}/gene/{}/diseases/'.format(api,endpoint,gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))

In [None]:
r_dict = r.json()
gda_df = json_normalize(r_dict, 'objects')
gda_df.columns = ['disease_id']
gda_df.head(2)

In [None]:
gda_df.to_csv('aqp1_expansion/aqp1_gene_disease.tsv', sep='\t', index=False, header=True)

In [None]:
# get gene-function
r = requests.get('{}{}/gene/{}/function/'.format(api, endpoint, gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))
r.json()

In [None]:
# get gene-expressedInAnatomy
r = requests.get('{}{}/gene/{}/expressed/'.format(api, endpoint, gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))

In [None]:
r_data = r.json()
gaa_df = json_normalize(r_data, 'objects')
gaa_df.columns = ['expressed_in_anatomy']
gaa_df.head(2)

In [None]:
gaa_df.to_csv('aqp1_expansion/aqp1_gene_anatomy.tsv', sep='\t', index=False, header=True)

In [None]:
# get gene-pub
r = requests.get('{}{}/gene/{}/pubs/'.format(api, endpoint, gene))
nassociations = len(r.json()['objects'])
print('Number of nodes associated are {}'.format(nassociations))