Code for adding nodes and edges to Neo4j

In [3]:
from neo4j import GraphDatabase
from tqdm import tqdm
import pandas as pd

In [4]:
edgesdf = pd.read_csv('Data/edges.tsv', sep = '\t')
nodesdf = pd.read_csv('Data/nodes.tsv', sep ='\t')

In [12]:
uri = "bolt://localhost:7687"
username = "neo4j"
password = "12345678"
auth=(username, password)

In [13]:
def create_indexes(tx):
    """Create indexes for faster lookups."""
    index_queries = [
        "CREATE INDEX IF NOT EXISTS FOR (n:Node) ON (n.id)",
        "CREATE INDEX IF NOT EXISTS FOR (n:Node) ON (n.kind)",
        "CREATE INDEX IF NOT EXISTS FOR (n:Node) ON (n.name)",
        "CREATE INDEX IF NOT EXISTS FOR ()-[r:CONNECTED]->() ON (r.metaedge)"
    ]
    for query in index_queries:
        tx.run(query)

In [14]:
def add_nodes_in_batches(tx, nodes_batch):
    """Insert nodes in batches."""
    query = """
    UNWIND $batch AS node
    MERGE (n:Node {id: node.id})
    SET n.kind = node.kind, n.name = node.name
    """
    tx.run(query, batch=nodes_batch)

def add_edges_in_batches(tx, edges_batch):
    """Insert edges in batches."""
    query = """
    UNWIND $batch AS edge
    MATCH (source:Node {id: edge.source})
    MATCH (target:Node {id: edge.target})
    MERGE (source)-[r:CONNECTED {metaedge: edge.metaedge}]->(target)
    """
    tx.run(query, batch=edges_batch)

def chunked_list(data, chunk_size=1000):
    """Yield data in chunks of `chunk_size`."""
    for i in range(0, len(data), chunk_size):
        yield data[i:i + chunk_size]


In [15]:
with GraphDatabase.driver(uri=uri, auth=auth) as driver:
    with driver.session() as session:
        print("Creating indexes...")
        session.execute_write(create_indexes)

        print("Adding nodes...")
        nodes_data = nodesdf.to_dict(orient="records")
        for batch in tqdm(chunked_list(nodes_data), total=len(nodes_data) // 1000 + 1):
            session.execute_write(add_nodes_in_batches, batch)

        print("Adding edges...")
        edges_data = edgesdf.to_dict(orient="records")
        for batch in tqdm(chunked_list(edges_data), total=len(edges_data) // 1000 + 1):
            session.execute_write(add_edges_in_batches, batch)

        print("Data import complete!")

Creating indexes...
Adding nodes...


100%|██████████| 24/24 [00:06<00:00,  3.45it/s]


Adding edges...


100%|██████████| 1293/1293 [10:24<00:00,  2.07it/s]  

Data import complete!



