In [3]:
import neo4j
from neo4j import GraphDatabase, RoutingControl
import networkx as nx
import requests

In [4]:
pub_listing_url = "https://www.designsafe-ci.org/api/publications/v2"

def get_ds_pubs():
    """Return a generator of top-level publication metadata"""
    offset = 0
    limit = 100
    res_length = 100
    while res_length == 100:
        res = requests.get(pub_listing_url, params={"offset": offset, "limit": limit})
        res_json = res.json()

        yield from res_json["result"]
        res_length = len(res_json["result"])
        offset += 100


def get_publication(project_id: str):
    """Retrieve published metadata using the project ID."""
    res = requests.get(f"{pub_listing_url}/{project_id}")
    return res.json()


def iterate_publications():
    """Generator of all published metadata"""
    for pub in get_ds_pubs():
        if pub["type"] not in ["other", "field_reconnaissance"]:
            yield get_publication(pub["projectId"])

In [5]:
URI = "neo4j://localhost:7687"

In [6]:
def setup_db(driver: neo4j.Driver):
    """Create constraints/indices for DesignSafe entities"""
    driver.execute_query("""

    CREATE INDEX entity_uuid IF NOT EXISTS FOR (e:Entity) ON e.uuid
    """)


def cleanup_db(driver: neo4j.Driver):
    """Clear all entries in the database"""
    driver.execute_query("MATCH (n) DETACH DELETE n")

In [7]:
def ingest_entity(driver: neo4j.Driver, uuid: str, title: str, description: str, **kwargs):
    """
    Ingest an entity in neo4j. Its properties will be the provided uuid/title/description 
    and any other provided kwargs.
    """
    extra_fields = [f"{k}: ${k}" for k in kwargs]
    extra_fields_str = ""
    if extra_fields:
        extra_fields_str = ", " + ", ".join(extra_fields)
    add_entity_query = \
    f"""
    MERGE (e: Entity {{uuid: $uuid}})
    SET e = {{uuid: $uuid}} // Allow properties to be unset with subsequent calls
    SET e += {{ title: $title,
                description: $description
                {extra_fields_str}
             }}
    """
    driver.execute_query(add_entity_query, uuid=uuid, title=title, description=description, **kwargs)


def ingest_entity_rel(driver: neo4j.Driver, parent_uuid: str, child_uuid: str, order: int=0):
    """
    Create a :HAS_CHILD relationship in the graph between 2 nodes given their UUIDs
    """
    add_rel_query = \
    """
    MATCH (parent:Entity {uuid: $parent_uuid}) 
    WITH parent 
    MATCH(child: Entity {uuid: $child_uuid}) 
    MERGE (parent)-[:HAS_CHILD {order: $order}]->(child)
    """
    driver.execute_query(add_rel_query, parent_uuid=parent_uuid, child_uuid=child_uuid, order=order)


In [8]:
pub_json = get_publication("PRJ-1811")

with GraphDatabase.driver(URI) as driver:
    setup_db(driver)

# Create a node in the neo4j graph for every node in the project tree
with GraphDatabase.driver(URI) as driver:
    pub_tree = nx.tree_graph(pub_json['tree'])
    for node in pub_tree.nodes:
        node_data = pub_tree.nodes[node]
        title = node_data["value"]["title"]
        uuid = node
        name = node_data["name"]
        meta_uuid = node_data["uuid"]
        description = node_data["value"].get("description", None)
        ingest_entity(driver, uuid, title, description, name=name, meta_uuid=meta_uuid)


In [9]:
# Get a dictionary mapping each node ID to its parent in the tree
dfs_pred = nx.dfs_predecessors(pub_tree, 'NODE_ROOT')

# Go through each relationship and insert an edge in the graph
with GraphDatabase.driver(URI) as driver:
    for key in dfs_pred:
        child_uuid = key
        parent_uuid = dfs_pred[key]
        order = pub_tree.nodes[key].get("order", 0)
        ingest_entity_rel(driver, parent_uuid, child_uuid, order)


In [10]:
# Run this to clean up the db if something goes wrong
with GraphDatabase.driver(URI) as driver:
    cleanup_db(driver)