In [1]:
import os, sys
import requests
import json
import pandas as pd

In [2]:
# write data
path = os.getcwd() + '/find-associations'
if not os.path.isdir(path): os.makedirs(path)
    
# read data
sys.path.insert(0, './find-associations/')

In [3]:
# function get monarch associations
def hitMonarchApi(node = 'NCBIGene:55768'):
    '''This function performs api calls to Monarch for in and out edges. 
        It returns associated nodes'''
    
    # api address
    biolink = 'https://api.monarchinitiative.org/api/association'
    
    # parameters
    parameters = {'fl_excludes_evidence': False, 'rows': 5 }#2000}
    
    # out edges: from/
    r_out = requests.get('{}/from/{}'.format(biolink,node),params=parameters)

    # in edges: to/
    r_in = requests.get('{}/to/{}'.format(biolink,node),params=parameters)

    return r_out, r_in 

In [4]:
# prepare monarch api response as a tuple but with the intemediate step to keep all node information?
def getEdgesObjects(r_out, r_in):
    '''This function prepare the api object response from monarch to a tuple. 
       It returns three lists, one for subjects, relations, and objects, 
       where each list is a list of dictionaries where each dict is a node.
    '''
    
    # variables
    sub_l = list()
    rel_l = list()
    obj_l = list()
    ref_l = list()

    # compose list of dictionaries
    for associations in [r_out.json()['associations'], r_in.json()['associations']]:
        for association in associations:
            #print(association['id'])
            pub_l = list()
            sub_l.append(association['subject'])
            rel_l.append(association['relation'])
            obj_l.append(association['object'])  
            # add references to each association as a list of strings
            if association['publications']:
                for publication in association['publications']:
                    #print(publication['id'])
                    pub_l.append(publication['id'])
            else:
                #print(association['publications'])
                pub_l.append('NA')
            ref_l.append('|'.join(pub_l))
            #print(pub_l)
            #print(ref_l)
    
    return sub_l, rel_l, obj_l, ref_l

def getEdges(sub_l, rel_l, obj_l, ref_l, attribute = 'id'):
    '''
       Function that builds triples with an attribute for each node. it returns a tuple with the edges
    '''
    edges = set()
    # compose tuple
    for i in range(len(sub_l)):
        sub = sub_l[i][attribute]
        rel = rel_l[i][attribute]
        obj = obj_l[i][attribute]
        ref = ref_l[i]
        edges.add((sub, rel, obj, ref))
        
    return edges

In [5]:
# function to evaluate the intersection with network nodes
def filtering(nodes, edges):
    """filters all edges down to those which have both nodes in the nodes set"""
    nodes = set(nodes)
    keep = set()
    for (start, pred, stop, ref) in edges:
        if {start, stop} <= nodes:
            keep.add((start, pred, stop, ref))
            
    return keep

In [6]:
# add triple
def keepEdges(keep = (), new = ()):
    '''This function adds triples in a tuple'''
    
    for (start, pred, stop, ref) in new:
        keep.add((start, pred, stop, ref))
        
    return keep

In [7]:
def printFile(path,fileName,data):
    '''Function to save output to file.'''
    
    #connections_df.fillna('None').to_csv('{}/{}.tsv'.format(path,fileName), sep='\t', index=False, header=True)
    with open('{}/{}.tsv'.format(path,fileName), 'w') as f:
        f.write('subject_id\trelation_id\tobject_id\treference_id_list\n')
        for (sub, pred, obj, ref) in data:
            f.write('{}\t{}\t{}\t{}\n'.format(sub, pred, obj, ref))
    
    return print("File '{}/{}.tsv' saved.".format(path,fileName))

In [8]:
# read ngly1 network nodes list
# ngly1 network
route = 'repurposing/rephetio-su/ngly1-net/connectivity/curated_1shellMonarchExpansion_net/network'
network_df = pd.read_table('../{}/network_nodes_monarch_list.tsv'.format(route))
network_df.head(2)
nodes = list(network_df.monarch_id)

In [9]:
# save all monarch api json responses results
with open('{}/all_not_filtered_monarch_api_response.json'.format(path), 'w') as f:
    f.write('{\n "response": [\n')

# get associations from Monarch: api - ii
nodes_l = ['NCBIGene:55768','REACT:R-HSA-532668','OMIM:615273'] 
keep = set()
first = 1
for node in nodes_l:
    #print('Starts loop with: {}'.format(node))
    r_out, r_in = hitMonarchApi(node)
    # save all monarch api json responses results
    with open('{}/all_not_filtered_monarch_api_response.json'.format(path), 'a') as f:
        if first:
            first = 0
        else:
            f.write(',\n')
        json.dump(r_out.json(), f, sort_keys=True, indent=4)
        f.write(',\n')
        json.dump(r_in.json(), f, sort_keys=True, indent=4)
    sub_l, rel_l, obj_l, ref_l = getEdgesObjects(r_out, r_in)
    edges = getEdges(sub_l, rel_l, obj_l, ref_l, 'id')
    #print('2 layer: {}'.format(edges))
    filteredEdges = filtering(nodes, edges)
    #print('filtered edges: {}'.format(filteredEdges))
    keep = keepEdges(keep, filteredEdges)
#print('Final All Connections: {}'.format(keep))

printFile(path,'new_connections', keep)
with open('{}/all_not_filtered_monarch_api_response.json'.format(path), 'a') as f:
    f.write('\n ]\n}')
print("File '{}/all_not_filtered_monarch_api_response.json' saved.".format(path))

File '/home/nuria/workspace/monarch/find_associations/new_connections.tsv' saved.
File '/home/nuria/workspace/monarch/find_associations/all_not_filtered_monarch_api_response.json' saved.
