In [None]:
import os
import json
from py2neo import Graph, Node
import pandas as pd

In [None]:
df = pd.read_csv("csv_path") # Path to CSV file for creating knowledge graph

#### Define entity (Nodes)

In [None]:
diseases = []
for each in df['Disease Name']:
    diseases.extend(each.split(','))
diseases = set(diseases)
diseases

In [None]:
symptoms = []
for each in df['Symptom']:
    symptoms.extend(each.split(','))
symptoms = set(symptoms)
symptoms

In [None]:
pathogenesis = []
for each in df['Pathogenesis']:
    pathogenesis.extend(each.split(','))
pathogenesis = set(pathogenesis)
pathogenesis

In [None]:
therapeutic_principles = []
for each in df['Treatment Principle']:
    therapeutic_principles.extend(each.split(','))
therapeutic_principles = set(therapeutic_principles)
therapeutic_principles

In [None]:
medicines = []
for each in df['Medication']:
    medicines.extend(each.split(','))
medicines = set(medicines)
medicines

#### Define relationship (Edges)

In [None]:
# Define relationship deduplication function
def deduplicate(rels_old):
    rels_new = []
    for each in rels_old:
        if each not in rels_new:
            rels_new.append(each)
    return rels_new

##### Relationship: Symptom - Disease Name

In [None]:
symptom_disease = []
for idx, row in df.iterrows():
    for each_disease in row['Disease Name'].split(','):
        for each_symptom in row['Symptom'].split(','):
            symptom_disease.append([each_symptom, each_disease])
symptom_disease = deduplicate(symptom_disease)
symptom_disease

##### Relationship：Pathogenesis - Symptom

In [None]:
pathogenesis_symptom = []
for idx, row in df.iterrows():
    for each_symptom in row['Symptom'].split(','):
        for each_pathogenesis in row['Pathogenesis'].split(','):
            pathogenesis_symptom.append([each_pathogenesis, each_symptom])
pathogenesis_symptom = deduplicate(pathogenesis_symptom)
pathogenesis_symptom

##### Relationship：Pathogenesis - Treatment Principle

In [None]:
pathogenesis_therapeutic_principle = []
for idx, row in df.iterrows():
    for each_therapeutic_principle in row['Treatment Principle'].split(','):
        for each_pathogenesis in row['Pathogenesis'].split(','):
            pathogenesis_therapeutic_principle.append([each_pathogenesis, each_therapeutic_principle])
pathogenesis_therapeutic_principle = deduplicate(pathogenesis_therapeutic_principle)
pathogenesis_therapeutic_principle

##### Relationship：Treatment Principle - Medication

In [None]:
therapeutic_principle_medicine = []
for idx, row in df.iterrows():
    try:
        for each_medicine in row['Medication'].split(','):
            for each_therapeutic_principle in row['Treatment Principle'].split(','):
                therapeutic_principle_medicine.append([each_therapeutic_principle, each_medicine])
    except:
        pass
therapeutic_principle_medicine = deduplicate(therapeutic_principle_medicine)
therapeutic_principle_medicine

##### Relationship：Symptom - Treatment Principle

In [None]:
symptom_therapeutic_principle = []
for idx, row in df.iterrows():
    try:
        for each_symptom in row['Symptom'].split(','):
            for each_therapeutic_principle in row['Treatment Principle'].split(','):
                symptom_therapeutic_principle.append([each_symptom, each_therapeutic_principle])
    except:
        pass
symptom_therapeutic_principle = deduplicate(symptom_therapeutic_principle)
symptom_therapeutic_principle

##### Relationship：Medication - Pathogenesis

In [None]:
medicine_pathogenesis = []
for idx, row in df.iterrows():
    try:
        for each_medicine in row['Medication'].split(','):
            for each_pathogenesis in row['Pathogenesis'].split(','):
                medicine_pathogenesis.append([each_medicine, each_pathogenesis])
    except:
        pass
medicine_pathogenesis = deduplicate(medicine_pathogenesis)
medicine_pathogenesis

##### Relationship：Medication - Symptom

In [None]:
medicine_symptom = []
for idx, row in df.iterrows():
    try:
        for each_medicine in row['Medication'].split(','):
            for each_symptom in row['Symptom'].split(','):
                medicine_symptom.append([each_medicine, each_symptom])
    except:
        pass
medicine_symptom = deduplicate(medicine_symptom)
medicine_symptom

#### Connect graph database

In [None]:
# Note that the username here is the neo4j global username, not the name of the DBMS or database
g = Graph('neo4j_url', auth=('neo4j', 'password'), name='neo4j')

#### Create knowledge graph entities (nodes)

In [None]:
# Note:This code block is used to delete all entities and relationships
# cypher = 'MATCH (n) DETACH DELETE n'
# g.run(cypher)

In [None]:
for each in diseases:
    node = Node('Disease Name', name=each)
    g.create(node)
    print('Create the entity {}'.format(each))

In [None]:
for each in medicines:
    node = Node('Medication', name=each)
    g.create(node)
    print('Create the entity {}'.format(each))

In [None]:
for each in symptoms:
    node = Node('Symptom', name=each)
    g.create(node)
    print('Create the entity {}'.format(each))

In [None]:
for each in therapeutic_principles:
    node = Node('Pathogenesis', name=each)
    g.create(node)
    print('Create the entity {}'.format(each))

In [None]:
for each in pathogenesis:
    node = Node('Treatment Principle', name=each)
    g.create(node)
    print('Create the entity {}'.format(each))

#### Create knowledge graph relationships (edges)

In [None]:
# Define the create relationship function
def create_relationship(start_node, end_node, edges, rel_type, rel_name):
    for edge in edges:
        p = edge[0]
        q = edge[1]
        # Cypher statement for creating relationships
        query = "match(p:%s),(q:%s) where p.name='%s' and q.name='%s' create (p)-[rel:%s{name:'%s'}]->(q)" % (start_node, end_node, p, q, rel_type, rel_name)
        try:
            g.run(query) # Run Cypher statement
            print('Create the relationship {}-{}->{}'.format(p, rel_type, q))
        except Exception as e:
            print(e)

##### Symptom-Belongs to->Disease Name

In [None]:
create_relationship('Symptom', 'Disease Name', symptom_disease, 'Belongs to', 'Symptom belongs to Disease Name')

##### Pathogenesis-Causes->Symptom

In [None]:
create_relationship('Pathogenesis', 'Symptom', pathogenesis_symptom, 'Causes', 'Pathogenesis causes Symptom')

##### Pathogenesis-Follows->Treatment Principle

In [None]:
create_relationship('Pathogenesis', 'Treatment Principle', pathogenesis_therapeutic_principle, 'Follows', 'Pathogenesis follows Treatment Principle')

##### Treatment Principle-Recommends->Medication

In [None]:
create_relationship('Treatment Principle', 'Medication', therapeutic_principle_medicine, 'Recommends', 'Treatment Principle recommends Medication')

##### Symptom-Applies->Treatment Principle

In [None]:
create_relationship('Symptom', 'Treatment Principle', symptom_therapeutic_principle, 'Applies', 'Symptom applies Treatment Principle')

##### Medication-Treats->Pathogenesis

In [None]:
create_relationship('Medication', 'Pathogenesis', medicine_pathogenesis, 'Treats', 'Medication treats Pathogenesis')

##### Medication-Treats->Symptom

In [None]:
create_relationship('Medication', 'Symptom', medicine_symptom, 'Treats', 'Medication treats Symptom')