In [2]:
from utils_neo4j import Neo4jApp

from dotenv import load_dotenv
import os

load_dotenv()

# Load params for Graph
scheme = os.getenv("scheme")
host_name = os.getenv("host_name")
port = os.getenv("port")
user = os.getenv("user")
password = os.getenv("password")
database = os.getenv("database")

TO DO:

- Create a database on the desktop Neo4j app
- Fill in all the correct information according to the setup made on the desktop Neo4j app
- Install plugins:
  - APOC



- Update the neo4j conf settings appropriately:
  - `dbms.memory.heap.initial_size=2048m`
  - `dbms.memory.heap.max_size=4G`
  - `dbms.memory.pagecache.size=2048m`


In [3]:
uri = f"{scheme}://{host_name}:{port}"
app = Neo4jApp(uri, user, password, database)
app.verify_connection()

You need to do the following:

- Import all the files you'll want to use to the import location on the desktop Neo4j app

# Remove nodes (if needed)

In [4]:
query_remove_all = """
MATCH (n)
DETACH DELETE n
"""

query_remove_all_in_iterations = """
//Delete all in iterations
CALL apoc.periodic.iterate(
  "MATCH (n) RETURN n",
  "DETACH DELETE n",
  {batchSize: 1000, parallel: false}
)
YIELD batches, total, errorMessages
RETURN batches, total, errorMessages;
"""

In [5]:
app.query(query_remove_all_in_iterations)

EagerResult(records=[<Record batches=1 total=0 errorMessages={}>], summary=<neo4j._work.summary.ResultSummary object at 0x10730fa50>, keys=['batches', 'total', 'errorMessages'])

In [6]:
app.query(query_remove_all)

EagerResult(records=[], summary=<neo4j._work.summary.ResultSummary object at 0x173916bd0>, keys=[])

# Create constraints

In [7]:
query_constraint_node_id = """
CREATE CONSTRAINT id_unique IF NOT EXISTS FOR (x:Entity) REQUIRE x.id IS UNIQUE
"""

app.query(query_constraint_node_id)


app.query("SHOW CONSTRAINTS")

EagerResult(records=[<Record id=4 name='id_unique' type='UNIQUENESS' entityType='NODE' labelsOrTypes=['Entity'] properties=['id'] ownedIndex='id_unique' propertyType=None>], summary=<neo4j._work.summary.ResultSummary object at 0x17391d050>, keys=['id', 'name', 'type', 'entityType', 'labelsOrTypes', 'properties', 'ownedIndex', 'propertyType'])

# Load nodes

In [8]:
# query_load_nodes = """
# LOAD CSV WITH HEADERS FROM 'file:///kg_nodes.csv' AS row
# CREATE (e:Entity {
#     id: row.ID,
#     type: row.type,
#     name: row.name,
#     source: row.source
# });
# """

# query_load_nodes = """
# LOAD CSV WITH HEADERS FROM 'file:///kg_nodes.csv' AS row
# MERGE (e:Entity {id: row.ID})
# SET e.type = row.type,
#     e.name = row.name,
#     e.source = row.source;
# """

# query_load_nodes_with_embeddings = """
# LOAD CSV WITH HEADERS FROM 'file:///kg_nodes.csv' AS row
# MERGE (e:Entity {id: row.ID})
# SET e.type = row.type,
#     e.name = row.name,
#     e.source = row.source,
#     e.embedding = apoc.convert.fromJsonList(row.embedding) // Use apoc.convert.fromJsonList if embeddings are JSON arrays
# """

query_load_nodes_with_embeddings_i_template = """
CALL apoc.periodic.iterate(
  'LOAD CSV WITH HEADERS FROM "file:///kg_nodes_{i}.csv" AS row RETURN row',
  'MERGE (e:Entity {{id: row.ID}})
   SET e.type = row.type,
       e.name = row.name,
       e.source = row.source,
       e.embedding = apoc.convert.fromJsonList(row.embedding)', // Use apoc.convert.fromJsonList if embeddings are JSON arrays
  {{batchSize: 1000, iterateList: true, parallel: true}}
);
"""


# query_load_nodes = """
# LOAD CSV WITH HEADERS FROM 'file:///kg_nodes.csv' AS row
# CALL apoc.create.node(['Entity', row.type], {
#     id: row.ID,
#     name: row.name,
#     source: row.source
# }) YIELD node
# RETURN node;
# """

# app.query(query=query_load_nodes_with_embeddings_i_template)

for i in range(0, 10):
    query_load_relations_i = query_load_nodes_with_embeddings_i_template.format(i=i)
    app.query(query=query_load_relations_i)
    print("done with", i)

done with 0
done with 1
done with 2
done with 3
done with 4
done with 5
done with 6
done with 7
done with 8
done with 9


In [9]:
len_emb = 1536
similarity_function = "cosine"

vector_index_embedding = """
CREATE VECTOR INDEX entityEmbeddings IF NOT EXISTS
FOR (e:Entity)
ON e.embedding
OPTIONS {{
    indexConfig: {{
        `vector.dimensions`: {len_emb},
        `vector.similarity_function`: '{similarity_function}'
    }}
}}
"""

vector_index_embedding = vector_index_embedding.format(len_emb=len_emb, similarity_function=similarity_function)

app.query(vector_index_embedding)

EagerResult(records=[], summary=<neo4j._work.summary.ResultSummary object at 0x1066c6050>, keys=[])

# Load index (if needed)

In [10]:
# query_index_nodes = """
# CREATE INDEX FOR (e:Entity) ON (e.id);
# """

# app.query(query=query_index_nodes)

# Load relations

In [11]:
# query_load_relations = """
# LOAD CSV WITH HEADERS FROM 'file:///kg_relations.csv' AS row
# CREATE (r:Relationship {
#     source: row.x_index,
#     target: row.y_index,
#     name: row.display_relation,
#     relation: row.relation
# });
# """


# query_load_relations_i_template = """
# LOAD CSV WITH HEADERS FROM 'file:///kg_relations_{i}.csv' AS row
# CREATE (r:Relationship {{
#     source: row.x_index,
#     target: row.y_index,
#     name: row.display_relation,
#     relation: row.relation
# }});
# """

# query_match_relations_template = """
# LOAD CSV WITH HEADERS FROM 'file:///kg_relations.csv' AS row
# MATCH (x:Entity {{id: row.x_index}})
# MATCH (y:Entity {{id: row.y_index}})
# CREATE (x)-[r:RELATION {{name: row.display_relation, relation: row.relation}}]->(y);
# """

query_match_relations_i_template_using_apoc = """
CALL apoc.periodic.iterate(
  'LOAD CSV WITH HEADERS FROM "file:///kg_relations_{i}.csv" AS row RETURN row',
  'MATCH (e1:Entity {{id: row.x_index}})
   MATCH (e2:Entity {{id: row.y_index}})
   CALL apoc.create.relationship(e1, row.display_relation, {{e2e: [row.relation]}}, e2)
   YIELD rel
   RETURN count(*)',
  {{batchSize:1000, iterateList:true}}
)
"""


# query_match_relations_i_template = """
# CALL apoc.periodic.iterate(
#   'LOAD CSV WITH HEADERS FROM "file:///kg_relations_{i}.csv" AS row RETURN row',
#   'MATCH (e1:Entity {{id: row.x_index}})
#    MATCH (e2:Entity {{id: row.y_index}})
#    CREATE (e1)-[:RELATION {{type: row.display_relation, e2e: [row.relation]}}]->(e2);
#    RETURN rel',
#   {{batchSize:1000, iterateList:true}}
# )
# """

for i in range(0, 10):
    query_load_relations_i = query_match_relations_i_template_using_apoc.format(i=i)
    app.query(query=query_load_relations_i)
    print("done with", i)

done with 0
done with 1
done with 2
done with 3
done with 4
done with 5
done with 6
done with 7
done with 8
done with 9


In [12]:
print("Done")

Done
