In [67]:
import os, sys
import pandas as pd
from tqdm import tqdm
import requests
import json
from pandas.io.json import json_normalize
sys.path.insert(0,'/home/nuria/workspace/utils3/lib/')
import abravo_lib as utils

In [68]:
# api addresses
biolink = 'https://api.monarchinitiative.org/api'

# endpoint
endpoint = '/association'

# path to data
path = os.getcwd() + '/find_associations'
if not os.path.isdir(path): os.makedirs(path)

# read data
sys.path.insert(0, './find_associations/')
#/workspace/repurposing/rephetio-su/ngly1-net/networks/curated_1shellMonarchExpansion/data

In [69]:
# function get monarch associations
def hitMonarchApi(node = 'NCBIGene:55768'):
    '''This function performs api calls to Monarch for in and out edges. 
        It returns associated nodes'''
    
    # api addresses
    biolink = 'https://api.monarchinitiative.org/api/association'
    
    # parameters
    #data = {'use_compact_associations':'true','fl_excludes_evidence':'true'}
    data = {'fl_excludes_evidence':'true'}
    
    # out edges: from/
    r_out = requests.get('{}/from/{}'.format(biolink,node),data)

    # in edges: to/
    r_in = requests.get('{}/to/{}'.format(biolink,node),data)

    return r_out, r_in 

In [85]:
# prepare monarch api response as a tuple
def getMonarchEdges(r_out, r_in):
    '''This function prepare the api object response from monarch to a tuple. 
       It returns a tuple of edges: {(s,r,o),(s,r,o),...}.
    '''
    
    # variables
    tripleId = set()
    tripleXref = set()
    tripleType = set()
    tripleCategory = set()

    # compose tuple of triples
    for associations in [r_out.json()['associations'], r_in.json()['associations']]:
        for association in associations:
            tripleId.add((association['subject']['id'], association['relation']['id'], association['object']['id']))
            tripleXref.add((association['subject']['xrefs'], association['relation']['id'], association['object']['xrefs']))
            tripleType.add((association['subject']['types'], association['relation']['types'], association['object']['types']))
            tripleCategory.add((association['subject']['categories'], association['relation']['categories'], association['object']['categories']))

    # print    
    #print(tripleId)
    #print(tripleXref)
    #print(tripleType)
    #print(tripleCategory)
    
    return tripleId, tripleXref, tripleType, tripleCategory

In [72]:
# function to evaluate the intersection with network nodes
def filtering(nodes, edges):
    """filters all edges down to those which have both nodes in the nodes set"""
    nodes = set(nodes)
    keep = set()
    for (start, pred, stop) in edges:
        if {start, stop} <= nodes:
            keep.add((start, pred, stop))
            
    return keep

In [73]:
# add triple
def keepEdges(keep = (), new = ()):
    '''This function adds triples in a tuple'''
    
    for (start, pred, stop) in new:
        keep.add((start, pred, stop))
        
    return keep

In [88]:
def printFile(path,fileName,data):
    '''Function to save output to file.'''
    
    #connections_df.fillna('None').to_csv('{}/{}.tsv'.format(path,fileName), sep='\t', index=False, header=True)
    with open('{}/{}.tsv'.format(path,fileName), 'w') as f:
        f.write('subject_id\trelation_id\tobject_id\n')
        for (sub, pred, obj) in data:
            f.write('{}\t{}\t{}\n'.format(sub, pred, obj))
    
    return print("File '{}/{}.tsv' saved.".format(path,fileName))

In [75]:
# read ngly1 network nodes list
# ngly1 network
route = 'repurposing/rephetio-su/ngly1-net/connectivity/curated_1shellMonarchExpansion/network'
network_df = pd.read_table('../{}/network_nodes_monarch_list.tsv'.format(route))
network_df.head(2)
nodes = list(network_df.monarch_id)

In [76]:
# get associations from Monarch: api - i
nodes_l = ['NCBIGene:55768','REACT:R-HSA-532668'] 
keep = set()
for node in tqdm(nodes_l, total = len(nodes_l)):
    #print('Starts loop with: {}'.format(node))
    r_out, r_in = hitMonarchApi(node)
    edges = getMonarchEdges(r_out, r_in)
    #print('2 layer: {}'.format(edges))
    filteredEdges = filtering(nodes, edges)
    #print('filtered edges: {}'.format(filteredEdges))
    keep = keepEdges(keep, filteredEdges)
#print('Final All Connections: {}'.format(keep))
printFile(path,'new_connections.tsv', keep)

 50%|█████     | 1/2 [00:01<00:01,  1.69s/it]

{('Coriell:GM25990', None, 'NCBIGene:55768'), ('NCBIGene:55768', 'BFO:0000050', 'GO:0005634'), ('NCBIGene:55768', 'RO:0002331', 'GO:0006457'), ('NCBIGene:55768', 'BFO:0000050', 'GO:0005737'), ('NCBIGene:55768', 'RO:0002331', 'KEGG-path:map04141'), (':.well-known/genid/person-3213-1', None, 'NCBIGene:55768'), (':.well-known/genid/person-GM25990', None, 'NCBIGene:55768'), ('NCBIGene:55768', 'BFO:0000050', 'GO:0005829'), ('NCBIGene:55768', 'RO:0002331', 'REACT:R-HSA-532668'), ('NCBIGene:55768', 'RO:0002327', 'GO:0005515'), (':.well-known/genid/person-3200-1', None, 'NCBIGene:55768'), (':.well-known/genid/person-3199-1', None, 'NCBIGene:55768'), ('NCBIGene:55768', None, 'OMIM:615273'), ('NCBIGene:55768', 'RO:0002327', 'GO:0046872'), ('Coriell:GM25340', None, 'NCBIGene:55768'), ('NCBIGene:55768', 'RO:0002331', 'GO:0006517')}
{(None, None)}
{(None, None, None)}
{(None, None, None)}


100%|██████████| 2/2 [00:03<00:00,  1.62s/it]

{('REACT:R-HSA-532668', None, 'NCBIGene:6233'), ('REACT:R-HSA-532668', None, 'NCBIGene:23193'), ('REACT:R-HSA-532668', None, 'NCBIGene:55768'), ('REACT:R-HSA-532668', None, 'NCBIGene:7415'), ('Orphanet:100069', 'RO:0002331', 'REACT:R-HSA-532668'), ('OMIM:616687', 'RO:0002331', 'REACT:R-HSA-532668'), ('NCBIGene:5589', 'RO:0002331', 'REACT:R-HSA-532668'), ('REACT:R-HSA-532668', None, 'NCBIGene:5589'), ('OMIM:606056', 'RO:0002331', 'REACT:R-HSA-532668'), ('REACT:R-HSA-532668', None, 'NCBIGene:64772'), ('REACT:R-HSA-532668', None, 'NCBIGene:79139'), ('NCBIGene:51035', 'RO:0002331', 'REACT:R-HSA-532668'), ('REACT:R-HSA-532668', None, 'NCBIGene:7314'), ('DOID:0060213', 'RO:0002331', 'REACT:R-HSA-532668'), ('Orphanet:275864', 'RO:0002331', 'REACT:R-HSA-532668'), ('DOID:0050881', 'RO:0002331', 'REACT:R-HSA-532668'), ('Orphanet:329478', 'RO:0002331', 'REACT:R-HSA-532668'), ('NCBIGene:55768', 'RO:0002331', 'REACT:R-HSA-532668'), ('REACT:R-HSA-532668', None, 'NCBIGene:7316'), ('REACT:R-HSA-532668




In [71]:
# prepare monarch api response as a tuple but with the intemediate step to keep all node information?
def getEdgesObjects(r_out, r_in):
    '''This function prepare the api object response from monarch to a tuple. 
       It returns three lists, one for subjects, relations, and objects, 
       where each list is a list of dictionaries where each dict is a node.
    '''
    
    # variables
    sub_l = list()
    rel_l = list()
    obj_l = list()

    # compose list of dictionaries
    for associations in [r_out.json()['associations'], r_in.json()['associations']]:
        for association in associations:
            sub_l.append(association['subject'])
            rel_l.append(association['relation'])
            obj_l.append(association['object'])  
    
    return sub_l, rel_l, obj_l

def getEdges(sub_l, rel_l, obj_l, attribute = 'id'):
    '''
       Function that builds triples with an attribute for each node. it returns a tuple with the edges
    '''
    edges = set()
    # compose tuple
    for i in range(len(sub_l)):
        sub = sub_l[i][attribute]
        rel = rel_l[i][attribute]
        obj = obj_l[i][attribute]
        edges.add((sub, rel, obj))
        
    return edges

In [77]:
# get associations from Monarch: api - ii
nodes_l = ['NCBIGene:55768','REACT:R-HSA-532668'] 
keep = set()
for node in tqdm(nodes_l, total = len(nodes_l)):
    print('Starts loop with: {}'.format(node))
    r_out, r_in = hitMonarchApi(node)
    sub_l, rel_l, obj_l = getEdgesObjects(r_out, r_in)
    edges = getEdges(sub_l, rel_l, obj_l, 'id')
    print('2 layer: {}'.format(edges))
    filteredEdges = filtering(nodes, edges)
    print('filtered edges: {}'.format(filteredEdges))
    keep = keepEdges(keep, filteredEdges)
print('Final All Connections: {}'.format(keep))

  0%|          | 0/2 [00:00<?, ?it/s]

Starts loop with: NCBIGene:55768


 50%|█████     | 1/2 [00:01<00:01,  1.35s/it]

2 layer: {('Coriell:GM25990', None, 'NCBIGene:55768'), ('NCBIGene:55768', 'BFO:0000050', 'GO:0005634'), ('NCBIGene:55768', 'RO:0002331', 'GO:0006457'), ('NCBIGene:55768', 'BFO:0000050', 'GO:0005737'), ('NCBIGene:55768', 'RO:0002331', 'KEGG-path:map04141'), (':.well-known/genid/person-3213-1', None, 'NCBIGene:55768'), (':.well-known/genid/person-GM25990', None, 'NCBIGene:55768'), ('NCBIGene:55768', 'BFO:0000050', 'GO:0005829'), ('NCBIGene:55768', 'RO:0002331', 'REACT:R-HSA-532668'), ('NCBIGene:55768', 'RO:0002327', 'GO:0005515'), (':.well-known/genid/person-3200-1', None, 'NCBIGene:55768'), (':.well-known/genid/person-3199-1', None, 'NCBIGene:55768'), ('NCBIGene:55768', None, 'OMIM:615273'), ('NCBIGene:55768', 'RO:0002327', 'GO:0046872'), ('Coriell:GM25340', None, 'NCBIGene:55768'), ('NCBIGene:55768', 'RO:0002331', 'GO:0006517')}
filtered edges: {('NCBIGene:55768', 'BFO:0000050', 'GO:0005737'), ('NCBIGene:55768', 'RO:0002331', 'KEGG-path:map04141'), ('NCBIGene:55768', 'BFO:0000050', 'GO

100%|██████████| 2/2 [00:07<00:00,  2.85s/it]

2 layer: {('REACT:R-HSA-532668', None, 'NCBIGene:6233'), ('REACT:R-HSA-532668', None, 'NCBIGene:23193'), ('REACT:R-HSA-532668', None, 'NCBIGene:55768'), ('REACT:R-HSA-532668', None, 'NCBIGene:7415'), ('Orphanet:100069', 'RO:0002331', 'REACT:R-HSA-532668'), ('OMIM:616687', 'RO:0002331', 'REACT:R-HSA-532668'), ('NCBIGene:5589', 'RO:0002331', 'REACT:R-HSA-532668'), ('REACT:R-HSA-532668', None, 'NCBIGene:5589'), ('OMIM:606056', 'RO:0002331', 'REACT:R-HSA-532668'), ('REACT:R-HSA-532668', None, 'NCBIGene:64772'), ('REACT:R-HSA-532668', None, 'NCBIGene:79139'), ('NCBIGene:51035', 'RO:0002331', 'REACT:R-HSA-532668'), ('REACT:R-HSA-532668', None, 'NCBIGene:7314'), ('DOID:0060213', 'RO:0002331', 'REACT:R-HSA-532668'), ('Orphanet:275864', 'RO:0002331', 'REACT:R-HSA-532668'), ('DOID:0050881', 'RO:0002331', 'REACT:R-HSA-532668'), ('Orphanet:329478', 'RO:0002331', 'REACT:R-HSA-532668'), ('NCBIGene:55768', 'RO:0002331', 'REACT:R-HSA-532668'), ('REACT:R-HSA-532668', None, 'NCBIGene:7316'), ('REACT:R-H


