In [67]:
import os, sys
import pandas as pd
from tqdm import tqdm
import requests
import json
from pandas.io.json import json_normalize
sys.path.insert(0,'/home/nuria/workspace/utils3/lib/')
import abravo_lib as utils

In [68]:
# api addresses
biolink = 'https://api.monarchinitiative.org/api'

# endpoint
endpoint = '/association'

# path to data
path = os.getcwd() + '/find_associations'
if not os.path.isdir(path): os.makedirs(path)

# read data
sys.path.insert(0, './find_associations/')
#/workspace/repurposing/rephetio-su/ngly1-net/networks/curated_1shellMonarchExpansion/data

In [69]:
# function get monarch associations
def hitMonarchApi(node = 'NCBIGene:55768'):
    '''This function performs api calls to Monarch for in and out edges. 
        It returns associated nodes'''
    
    # api addresses
    biolink = 'https://api.monarchinitiative.org/api/association'
    
    # parameters
    #data = {'use_compact_associations':'true','fl_excludes_evidence':'true'}
    data = {'fl_excludes_evidence':'true'}
    
    # out edges: from/
    r_out = requests.get('{}/from/{}'.format(biolink,node),data)

    # in edges: to/
    r_in = requests.get('{}/to/{}'.format(biolink,node),data)

    return r_out, r_in 

In [85]:
# prepare monarch api response as a tuple
def getMonarchEdges(r_out, r_in):
    '''This function prepare the api object response from monarch to a tuple. 
       It returns a tuple of edges: {(s,r,o),(s,r,o),...}.
    '''
    
    # variables
    tripleId = set()
    tripleXref = set()
    tripleType = set()
    tripleCategory = set()

    # compose tuple of triples
    for associations in [r_out.json()['associations'], r_in.json()['associations']]:
        for association in associations:
            tripleId.add((association['subject']['id'], association['relation']['id'], association['object']['id']))
            tripleXref.add((association['subject']['xrefs'], association['relation']['id'], association['object']['xrefs']))
            tripleType.add((association['subject']['types'], association['relation']['types'], association['object']['types']))
            tripleCategory.add((association['subject']['categories'], association['relation']['categories'], association['object']['categories']))
    
    return tripleId, tripleXref, tripleType, tripleCategory

In [72]:
# function to evaluate the intersection with network nodes
def filtering(nodes, edges):
    """filters all edges down to those which have both nodes in the nodes set"""
    nodes = set(nodes)
    keep = set()
    for (start, pred, stop) in edges:
        if {start, stop} <= nodes:
            keep.add((start, pred, stop))
            
    return keep

In [73]:
# add triple
def keepEdges(keep = (), new = ()):
    '''This function adds triples in a tuple'''
    
    for (start, pred, stop) in new:
        keep.add((start, pred, stop))
        
    return keep

In [88]:
def printFile(path,fileName,data):
    '''Function to save output to file.'''
    
    #connections_df.fillna('None').to_csv('{}/{}.tsv'.format(path,fileName), sep='\t', index=False, header=True)
    with open('{}/{}.tsv'.format(path,fileName), 'w') as f:
        f.write('subject_id\trelation_id\tobject_id\n')
        for (sub, pred, obj) in data:
            f.write('{}\t{}\t{}\n'.format(sub, pred, obj))
    
    return print("File '{}/{}.tsv' saved.".format(path,fileName))

In [75]:
# read ngly1 network nodes list
# ngly1 network
route = 'repurposing/rephetio-su/ngly1-net/connectivity/curated_1shellMonarchExpansion/network'
network_df = pd.read_table('../{}/network_nodes_monarch_list.tsv'.format(route))
network_df.head(2)
nodes = list(network_df.monarch_id)

In [90]:
# get associations from Monarch: api 
nodes
keep = set(); keepxref = set(); keeptype = set(); keepcategory = set()
for node in tqdm(nodes, total = len(nodes)):
    r_out, r_in = hitMonarchApi(node)
    edges, xrefs, types, categories = getMonarchEdges(r_out, r_in)
    filteredEdges = filtering(nodes, edges)
    keep = keepEdges(keep, filteredEdges)
    keepxref = keepEdges(keepxref, xrefs)
    keeptype = keepEdges(keeptype, types)
    keepcategory = keepEdges(keepcategory, categories)
    
printFile(path,'new_connections', keep)
printFile(path,'new_xrefs', keepxref)
printFile(path,'new_types', keeptype)
printFile(path,'new_categories', keepcategory)

100%|██████████| 463/463 [09:47<00:00,  1.14s/it]

File '/home/nuria/workspace/monarch/find_associations/new_connections.tsv' saved.
File '/home/nuria/workspace/monarch/find_associations/new_xrefs.tsv' saved.
File '/home/nuria/workspace/monarch/find_associations/new_types.tsv' saved.
File '/home/nuria/workspace/monarch/find_associations/new_categories.tsv' saved.



