In [2]:
# !pip install neo4j
# !pip install py2neo

In [3]:
# from neo4j import GraphDatabase
# GraphDatabase??

In [40]:
from neo4j import GraphDatabase
import pandas as pd
import json

with open('config.json', 'r') as f:
    credentials = json.load(f)

uri = credentials['uri']
user = credentials['user']
password = credentials['password']
driver = GraphDatabase.driver(uri, auth=(user, password))
session = driver.session()

In [38]:
cypher_query = """
    MATCH (u:User)-[r]-(s:Symptom)
    RETURN u.country, s.symptom, COUNT(r) AS ocurrency
    ORDER BY ocurrency DESC
    LIMIT 10
"""

# execute the query
with driver.session() as session:
    result = session.run(cypher_query)
    for record in result:
        print(f"{record['u.country']} has {record['ocurrency']} ocurrencies of users with {record['s.symptom']} symptom")

Peru has 73 ocurrencies of users with vomiting symptom
United States has 72 ocurrencies of users with vomiting symptom
Vietnam has 72 ocurrencies of users with fatigue symptom
Peru has 71 ocurrencies of users with fatigue symptom
Indonesia has 68 ocurrencies of users with fatigue symptom
Vietnam has 67 ocurrencies of users with vomiting symptom
Singapore has 65 ocurrencies of users with fatigue symptom
New Zealand has 65 ocurrencies of users with vomiting symptom
Spain has 64 ocurrencies of users with fatigue symptom
Sweden has 61 ocurrencies of users with fatigue symptom


Transforming query result to dataframe

In [39]:
cypher_query = """
    MATCH (u:User)-[r]-(s:Symptom)
    RETURN u.country, s.symptom, COUNT(r) AS ocurrency
    ORDER BY ocurrency DESC
    LIMIT 10
"""

df = pd.DataFrame()
# execute the query
with driver.session() as session:
    result = session.run(cypher_query)
    df = result.to_df()

print(df.shape)
df.head()

(10, 3)


Unnamed: 0,u.country,s.symptom,ocurrency
0,Peru,vomiting,73
1,United States,vomiting,72
2,Vietnam,fatigue,72
3,Peru,fatigue,71
4,Indonesia,fatigue,68


In [21]:
cypher_query = """
    MATCH (u:User)-[r]-(d:Disease)
    RETURN u.country, d.disease, COUNT(r) AS ocurrency
    ORDER BY ocurrency DESC
    LIMIT 10
"""

# execute the query
with driver.session() as session:
    result = session.run(cypher_query)
    for record in result:
        print(record)

<Record u.country='Austria' d.disease='hepatitis b' ocurrency=11>
<Record u.country='United States' d.disease='acne' ocurrency=11>
<Record u.country='South Africa' d.disease='heart attack' ocurrency=9>
<Record u.country='Vietnam' d.disease='common cold' ocurrency=9>
<Record u.country='South Korea' d.disease='dengue' ocurrency=9>
<Record u.country='United States' d.disease='chronic cholestasis' ocurrency=9>
<Record u.country='Netherlands' d.disease='allergy' ocurrency=9>
<Record u.country='Belgium' d.disease='peptic ulcer diseae' ocurrency=9>
<Record u.country='Singapore' d.disease='jaundice' ocurrency=9>
<Record u.country='Italy' d.disease='hypothyroidism' ocurrency=8>


In [17]:
cypher_query = """
    MATCH (d:Disease)-[r]-(s:Symptom)
    WITH d, COUNT(s.symptom) AS n_symp, SUM(toFloat(s.weight)) AS severity
    RETURN d.disease, n_symp, severity, ROUND(severity/n_symp,2) AS avg_severity 
    ORDER BY avg_severity ASC
    LIMIT 10
"""

# execute the query
with driver.session() as session:
    result = session.run(cypher_query)
    for record in result:
        print(record)

<Record d.disease='acne' n_symp=4 severity=9.0 avg_severity=2.25>
<Record d.disease='psoriasis' n_symp=6 severity=15.0 avg_severity=2.5>
<Record d.disease='fungal infection' n_symp=3 severity=8.0 avg_severity=2.67>
<Record d.disease='osteoarthristis' n_symp=6 severity=20.0 avg_severity=3.33>
<Record d.disease='paralysis (brain hemorrhage)' n_symp=4 severity=14.0 avg_severity=3.5>
<Record d.disease='migraine' n_symp=9 severity=32.0 avg_severity=3.56>
<Record d.disease='hepatitis b' n_symp=12 severity=43.0 avg_severity=3.58>
<Record d.disease='arthritis' n_symp=5 severity=18.0 avg_severity=3.6>
<Record d.disease='chronic cholestasis' n_symp=7 severity=26.0 avg_severity=3.71>
<Record d.disease='drug reaction' n_symp=4 severity=15.0 avg_severity=3.75>


In [73]:
cypher_query = """
    MATCH (d:Disease)-[r]-(s:Symptom)
    WITH d,r,s
    WHERE d.disease = 'common cold'
    RETURN *
"""

# execute the query
with driver.session() as session:
    result = session.run(cypher_query)
    for record in result:
        disease = record['d'] 
        relationship = record['r'] 
        symptom = record['s'] 
        print(f"The disease named {disease['disease']} is related with {symptom['symptom']} by the relationship {relationship.type}")

The disease named common cold is related with continuous sneezing by the relationship IS_PRESENTED_BY
The disease named common cold is related with headache by the relationship IS_PRESENTED_BY
The disease named common cold is related with throat irritation by the relationship IS_PRESENTED_BY
The disease named common cold is related with high fever by the relationship IS_PRESENTED_BY
The disease named common cold is related with loss of smell by the relationship IS_PRESENTED_BY
The disease named common cold is related with redness of eyes by the relationship IS_PRESENTED_BY
The disease named common cold is related with swelled lymph nodes by the relationship IS_PRESENTED_BY
The disease named common cold is related with muscle pain by the relationship IS_PRESENTED_BY
The disease named common cold is related with malaise by the relationship IS_PRESENTED_BY
The disease named common cold is related with phlegm by the relationship IS_PRESENTED_BY
The disease named common cold is related with

In [68]:
cypher_query = """
    MATCH (s:Symptom)
    RETURN *
    LIMIT 10
"""

# execute the query
with driver.session() as session:
    result = session.run(cypher_query)
    for record in result:
        symptom = record['s'] 
        print(f"{symptom['symptom']=}, {type(symptom['symptom'])}")
        print(f"{symptom['weight']=}, {type(symptom['weight'])}")

symptom['symptom']='itching', <class 'str'>
symptom['weight']='1', <class 'str'>
symptom['symptom']='skin rash', <class 'str'>
symptom['weight']='3', <class 'str'>
symptom['symptom']='nodal skin eruptions', <class 'str'>
symptom['weight']='4', <class 'str'>
symptom['symptom']='continuous sneezing', <class 'str'>
symptom['weight']='4', <class 'str'>
symptom['symptom']='shivering', <class 'str'>
symptom['weight']='5', <class 'str'>
symptom['symptom']='chills', <class 'str'>
symptom['weight']='3', <class 'str'>
symptom['symptom']='joint pain', <class 'str'>
symptom['weight']='3', <class 'str'>
symptom['symptom']='stomach pain', <class 'str'>
symptom['weight']='5', <class 'str'>
symptom['symptom']='acidity', <class 'str'>
symptom['weight']='3', <class 'str'>
symptom['symptom']='ulcers on tongue', <class 'str'>
symptom['weight']='4', <class 'str'>


In [74]:
cypher_query = """
    CALL db.labels()
"""

# execute the query
with driver.session() as session:
    result = session.run(cypher_query)
    for record in result:
        print(record)
        print(record.keys())
        print(record.values())

<Record label='Disease'>
['label']
['Disease']
<Record label='Precaution'>
['label']
['Precaution']
<Record label='Symptom'>
['label']
['Symptom']
<Record label='User'>
['label']
['User']


In [70]:
cypher_query = """
    CALL db.relationshipTypes()
"""

# execute the query
with driver.session() as session:
    result = session.run(cypher_query)
    for record in result:
        print(record)

<Record relationshipType='IS_SICK_WITH'>
<Record relationshipType='PRESENTS'>
<Record relationshipType='IS_TREATED_WITH'>
<Record relationshipType='IS_PRESENTED_BY'>
