In [3]:

import pandas as pd
import csv

finalDf = pd.read_csv("hierarchy_bi2200_assets.csv", quoting=csv.QUOTE_ALL)

# Write hierarchy to .cypher
with open("hierarchy.cypher", "w") as f:
    
    f.write("CREATE CONSTRAINT ON (n:`_generic_hierarchy_node_business-investment-instrument-asset`) ASSERT n.code IS UNIQUE;")
    f.write("\n")

    # Write the rows
    
    # Track whats being created - DONT create the same node twice
    alreadyDone = {}
    for i, row in finalDf.iterrows():
        
            # SKip first row, we've done that explicitly above
            if row["ParentCode"] != "":
                
                # If we've created a node with this code. Error if its not representing the same label. Otherwise skip. 
                if row["Code"] in alreadyDone.keys():
                    if row["Label"] != alreadyDone[row["Code"]]:
                        raise ValueError("The code {c} has labels of both: '{a}' and '{b}'.".format(a=row["Code"],b=row["Label"],c=alreadyDone[row["Code"]]))
                else:
                    # New node. Create it then add to alreadyDone dictionary.
                    f.write("CREATE (node:`_generic_hierarchy_node_business-investment-instrument-asset` { code:'%s',label:'%s' });" % (row["Code"], row["Label"]))
                    f.write("\n")
                    alreadyDone.update({row["Code"]:row["Label"]})     
 
    # Write the relationships
    for i, row in finalDf.iterrows():
        
            if row["ParentCode"] != "":
                
                f.write("MATCH (parent:`_generic_hierarchy_node_business-investment-instrument-asset`), (child:`_generic_hierarchy_node_business-investment-instrument-asset`) where parent.code = \"%s\" and child.code = \"%s\" CREATE (child)-[:hasParent]->(parent);" %  (row["ParentCode"], row["Code"]))
                f.write("\n")
                