In [None]:
import requests
import pandas as pd
import numpy as np
from collections import defaultdict

In [None]:
def export_sparc_asctb_csv(root_node_id, api_key, file_path='./CFF_ASCT-B.csv', max_search_depth=7, relationship_type='http://purl.obolibrary.org/obo/BFO_0000050'):
    """
    Generates a HuBMAP ASCT+B compatible CSV export of the anatomical ontological labels associated with the provided root node id as identified in the SciGraph
    ontology graph. The ontology search is filtered on the provided relationship predicate (relationship_type), and further confined by search depth 
    (max_search_depth). An API key must be provided to the endpoint, which can be obtained via registration at https://scicrunch.org/.
    
    References
    ---------
    SPARC SciGraph: https://scicrunch.org/browse/api-docs/index.html?url=https://scicrunch.org/swagger-docs/swagger.json
    HuBMAP ASCT+B file format: https://hubmapconsortium.github.io/ccf-asct-reporter/docs?id=2
    
    Arguments
    ----------
    root_node_id : str
        The root node from which to search. Recommend using a major anatomical structure. Example: 'UBERON:0000948'
    api_key : str
        Your api key from https://scicrunch.org/
    file_path : str
        The path and file name to write. Example: './CFF_ASCT-B.csv'
    max_search_depth : int
        The maximum permitted number of edge traversals in the search
    relationship_type : str
        The relationship type on which the query edge traversal is filtered. Example: 'http://purl.obolibrary.org/obo/BFO_0000050'
    """
    
    nodeList, nodeIdx, objEdgeIdx, subEdgeIdx = query_sparc_ontology_graph(root_node_id, api_key, max_search_depth, relationship_type)
     
    # Define a function to recursively list the ancestors of a node. Assume a strict hierarchy
    def listAncestry(node, ancestryList):
        ancestryList.append(node)
        if node.get('id') in subEdgeIdx:
            nodeIdx.get
            listAncestry(subEdgeIdx[node.get('id')], ancestryList)
            
    # Iterate over all terms and generate their ancestry list
    # Append root node to the end of the list, then reverse it so that ancestors appear first
    allAncestryList = []
    for node in nodeList:
        nodeAncestryList = []
        allAncestryList.append(nodeAncestryList)
        listAncestry(node, nodeAncestryList)
        nodeAncestryList.reverse()
        
    #Generate dataframe header columns based on max depth
    max_depth = len(max(allAncestryList,key=len))
    columns = []
    for i in range(1, max_depth+1):
        columns.append('AS/'+str(i))
        columns.append('AS/'+str(i)+'/LABEL')
        columns.append('AS/'+str(i)+'/ID')
        
    #Convert node objects to array representations for CSV export
    csv_term_structure = ['name', 'label', 'id']
    csv_ready_arr = [[[term[k] for k in csv_term_structure] for term in termList] for termList in allAncestryList]
    
    #Flatten the innermost list
    csv_ready_arr = [[item for sublist in termList for item in sublist] for termList in csv_ready_arr]
    
    #Pad jagged arrays to make pandas happy
    for termList in csv_ready_arr:
        pad_list = ([None] * ((max_depth * 3) - len(termList)))
        termList.extend(pad_list)
        
    #Export padded list
    csv_df = pd.DataFrame(csv_ready_arr, columns=columns)
    csv_df.to_csv(file_path, index=False);

In [None]:
def fetch_sparc_term_graph_object(root_node_id, api_key, max_search_depth=7, relationship_type='http://purl.obolibrary.org/obo/BFO_0000050'):
    """
    Generates a normalized, pared-down object graph representation of a SPARC scigraph ontology search anchored at the provided 
    root_node_id. The ontology search is filtered on the provided relationship predicate (relationship_type), and further confined by search depth 
    (max_search_depth). An API key must be provided to the endpoint, which can be obtained via registration at https://scicrunch.org/.
    
    Each node/ontological term representation object in the graph contains a children <list> property.
    Example {name:'Heart', label:'Heart', children:[{<child_1>}, {<child_2>}, ...]
    
    References
    ---------
    SPARC SciGraph: https://scicrunch.org/browse/api-docs/index.html?url=https://scicrunch.org/swagger-docs/swagger.json
    HuBMAP ASCT+B file format: https://hubmapconsortium.github.io/ccf-asct-reporter/docs?id=2
    
    Arguments
    ----------
    root_node_id : str
        The root node from which to search. Recommend using a major anatomical structure. Example: 'UBERON:0000948'
    api_key : str
        Your api key from https://scicrunch.org/
    max_search_depth : int
        The maximum permitted number of edge traversals in the search
    relationship_type : str
        The relationship type on which the query edge traversal is filtered. Example: 'http://purl.obolibrary.org/obo/BFO_0000050'
        
    Returns
    ----------
    Two-tuple consisting of:
        [0]: Root node reference to the generated term graph object
        [1]: Index mapping id to term object for efficient term lookup by id
    """
    
    nodeList, nodeIdx, objEdgeIdx, subEdgeIdx = query_sparc_ontology_graph(root_node_id, api_key, max_search_depth, relationship_type)
    for node in nodeList:
        node['children'] = objEdgeIdx.get(node.get('id'))
    root_node = nodeIdx[root_node_id]

    return [root_node, nodeIdx]
    

In [None]:
def query_sparc_ontology_graph(root_node_id, api_key, max_search_depth=7, relationship_type='http://purl.obolibrary.org/obo/BFO_0000050'):
    """
    Executes a neighbor search against the scicrunch API. Emits a pared-down subset of response data suitable for construction of
    a partonomy graph.
    
    Returns
    ----------
    Three-tuple consisting of the following
        [0]: nodeList: Unordered list of ontology term dictionary objects with the following keys [{name:<str>, label:<str>, id:<str>}, ...]
        [1]: nodeIdx: Dictionary for efficient node lookup by term id. Structure is {<id>:{<term_object>}}
        [2]: objEdgeIdx: Dictionary for efficient children search from a term id. Structure is {<id>:[{<child_term_1>}, {<child_term_2>}, ...]}
        [3]: subEdgeIdx: Dictionary for efficient parent search from a term id. Structure is {<id>:{<parent_term}}
    """
    
    NEIGHBOR_NODE_URL = 'https://scicrunch.org/api/1/scigraph/graph/neighbors/'+root_node_id
    NEIGHBOR_NODE_PARAMS = {'depth':max_search_depth, 'blankNodes':False, 'relationshipType':relationship_type,'direction':'INCOMING','entail':False,'key':api_key}
    
    #Execute request against scicrunch API and parse the neighbor data
    neighbor_node_response_obj = requests.get(url=NEIGHBOR_NODE_URL, params=NEIGHBOR_NODE_PARAMS)
    neighbor_node_response = neighbor_node_response_obj.json()
    
    #Initialize and index neighbor terms by id
    nodeList = []
    nodeIdx = {}
    for node in neighbor_node_response['nodes']:
        abbreviatedNode = {'name':node.get('lbl'), 'label': node.get('lbl'), 'id': node.get('id')}
        nodeList.append(abbreviatedNode)
        nodeIdx[node.get('id')] = abbreviatedNode
        
    #Index terms by edge "object" ID to facilitate ancestry resolution
    objEdgeIdx = defaultdict(list) #Index to look up children from a term id
    subEdgeIdx = {} #Index to look up parent from a term id
    for edge in neighbor_node_response['edges']:
        objEdgeIdx[edge.get('obj')].append(nodeIdx.get(edge['sub'])) 
        subEdgeIdx[edge.get('sub')] = nodeIdx.get(edge['obj'])
        
    return [nodeList, nodeIdx, objEdgeIdx, subEdgeIdx]