In [11]:
import sys
import os
from py2neo import Graph
from pathlib import Path
from pandas import DataFrame

graph = Graph("bolt://localhost:8687")

# df = DataFrame(graph.run("""
# MATCH (id:Id { id:"chebi:5063"})<-[:id]-(n:GraphNode)
# RETURN n.`grebi:name`[0] as name
# """).data())

# print(df)


#Returns all HP terms. Each term node definitely corresponds to an HP term, but may ALSO correspond to an MP term due to the mappings.
#This also means that the relationships may come from either HP or MP.
#
df = DataFrame(graph.run("""
MATCH (id:Id { id:"hp:0000001"})<-[:id]-(hpo_root_term:GraphNode)
    <-[:`biolink:broad_match`]-(term:GraphNode)
    -[outgoing_edge]->(n:GraphNode)
RETURN term.`grebi:name`[0] AS from, type(outgoing_edge) AS edge, n.`grebi:name`[0] AS to
""").data())

df.to_csv("all_hp_all_out.csv", index=False)

In [12]:


# This version of the above query filters the relationships to those asserted by HP only (not MP)
df = DataFrame(graph.run("""
MATCH (id:Id { id:"hp:0000001"})<-[:id]-(hpo_root_term:GraphNode)
    <-[:`biolink:broad_match`]-(term:GraphNode)
    -[outgoing_edge]->(n:GraphNode)
    WHERE "OLS.hp" IN outgoing_edge.`grebi:datasources`
RETURN term.`grebi:name`[0] AS from, type(outgoing_edge) AS edge, n.`grebi:name`[0] AS to
""").data())

df.to_csv("all_hp_all_out_hp_only.csv", index=False)

In [14]:


# This version of the above query filters the relationships to those asserted by HP only (not MP)
# Also adds identifiers in the results

df = DataFrame(graph.run("""
MATCH (id:Id { id:"hp:0000001"})<-[:id]-(hpo_root_term:GraphNode)
    <-[:`biolink:broad_match`]-(term:GraphNode)
    -[outgoing_edge]->(n:GraphNode)
    WHERE "OLS.hp" IN outgoing_edge.`grebi:datasources`
RETURN
    [id in term.id WHERE id =~ "hp:[0-9]*" | id][0] AS from_id,
    term.`grebi:name`[0] AS from_label,
    type(outgoing_edge) AS edge,
    n.id AS to_ids,
    n.`grebi:name`[0] AS to_label
""").data())

df.to_csv("all_hp_all_out_hp_outgoing.csv", index=False)



In [17]:


# This query returns all incoming edges to all HP terms
# Note that the incoming edges may target either the HP terms or their equivalent MP terms
# and there is currently no way to differentiate!
# We will prob have to make two different versions of the Neo4j, one with merged mappings and one without

df = DataFrame(graph.run("""
MATCH (id:Id { id:"hp:0000001"})<-[:id]-(hpo_root_term:GraphNode)
    <-[:`biolink:broad_match`]-(term:GraphNode)
    <-[incoming_edge]-(n:GraphNode)
RETURN
    [id in term.id WHERE id =~ "hp:[0-9]*" | id][0] AS to_id,
    term.`grebi:name`[0] AS to_label,
    type(incoming_edge) AS edge,
    n.id AS from_ids,
    n.`grebi:name`[0] AS from_label
""").data())

df.to_csv("all_hp_all_in_hp.csv", index=False)