In [1]:
import requests

In [2]:
data = requests.get("https://raw.githubusercontent.com/obophenotype/human-phenotype-ontology/master/hp.obo").text

In [6]:
terms = data.split("[Term]")[1:]

In [38]:
data = []
for t in terms:
    alias = []
    isa = []
    umls = []
    for line in t.split("\n"):
        if line.startswith("id:"):
            id = line.split("id:")[1].strip()
        if line.startswith("name:"):
            name = line.split("name:")[1].strip()
        if line.startswith("xref:") and "UMLS:" in line:
            umls.append(line.split("xref:")[1].strip())
        if line.startswith("is_a:"):
            isa.append(line.split("is_a:")[1].strip().split("!")[0].strip())
        if line.startswith("alt_id"):
            al = line.split("alt_id: ")[1].strip()
            alias.append(al)
        
    data.append({"id":id, "name":name, "umls":umls, "isa":isa, "alias": alias})
    

In [39]:
# Define Neo4j connections
import pandas as pd
from neo4j import GraphDatabase
host = 'neo4j://localhost:7687'
user = 'neo4j'
password = 'letmein'
driver = GraphDatabase.driver(host,auth=(user, password))

def run_query(query, params=None):
    with driver.session() as session:
        result = session.run(query, params)
        return pd.DataFrame([r.values() for r in result], columns=result.keys())

In [40]:
run_query("""
CREATE CONSTRAINT IF NOT EXISTS ON (h:HPO) ASSERT h.id IS UNIQUE;
""")

In [41]:
run_query("""
UNWIND $data AS row
MERGE (h:HPO {id: row.id})
SET h.name = row.name,
    h.alternative_ids = row.alias
FOREACH (x in row.isa | MERGE (isa:HPO {id: x}) MERGE (h)-[:IS_A]->(isa))
FOREACH (x in row.umls | MERGE (umls:UMLS {id: x}) MERGE (h)-[:HPO_TO_UMLS]->(umls))
""", {"data": data})

In [33]:
data

[{'id': 'HP:5000048',
  'name': 'Anti-Kv1.4 antibody',
  'umls': 'UMLS:C4073286',
  'isa': ['HP:0030057'],
  'alias': []}]

In [42]:
run_query("""
MATCH p=(h:HPO)-[:IS_A*]->()
WHERE h.name = "Abnormal prostate morphology"
RETURN p
""")

Unnamed: 0,p
0,(())
1,"((), ())"
2,"((), (), ())"
3,"((), (), (), ())"
4,"((), (), (), (), ())"
5,"((), (), (), (), (), ())"
6,"((), (), (), (), (), (), ())"
7,"((), ())"
8,"((), (), ())"
9,"((), (), (), ())"


In [43]:
run_query("""
MATCH p=(h:HPO)-[:IS_A*0..]->(parents)
WHERE h.name = "Abnormal prostate morphology"
RETURN collect(distinct parents.name) AS result
""")

Unnamed: 0,collect(distinct parents.name)
0,"[Abnormal prostate morphology, Abnormality of ..."


In [44]:
run_query("""
MATCH p=(h:HPO)<-[:IS_A*0..]-(children)
WHERE h.name = "All"
SET children.level = length(p)
""")