In [1]:
# Install necessary dependencies
%pip install graphdatascience

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from graphdatascience import GraphDataScience
from neo4j import GraphDatabase

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
gds = GraphDataScience("bolt://127.0.0.1:7687", ("huvi", "huvinesh#"))

In [4]:
driver = GraphDatabase.driver("bolt://127.0.0.1:7687", auth=("huvi", "huvinesh#"))

In [5]:
from graphdatascience.server_version.server_version import ServerVersion

assert gds.server_version() >= ServerVersion(1, 8, 0)

In [6]:
 # The `run_cypher` method can be used to run arbitrary Cypher queries on the database.
_ = gds.run_cypher(
    """
        CREATE
         (dan:Person {name: 'Dan'}),
         (annie:Person {name: 'Annie'}),
         (matt:Person {name: 'Matt'}),
         (jeff:Person {name: 'Jeff'}),
         (brie:Person {name: 'Brie'}),
         (elsa:Person {name: 'Elsa'}),

         (cookies:Product {name: 'Cookies'}),
         (tomatoes:Product {name: 'Tomatoes'}),
         (cucumber:Product {name: 'Cucumber'}),
         (celery:Product {name: 'Celery'}),
         (kale:Product {name: 'Kale'}),
         (milk:Product {name: 'Milk'}),
         (chocolate:Product {name: 'Chocolate'}),

         (dan)-[:BUYS {amount: 1.2}]->(cookies),
         (dan)-[:BUYS {amount: 3.2}]->(milk),
         (dan)-[:BUYS {amount: 2.2}]->(chocolate),

         (annie)-[:BUYS {amount: 1.2}]->(cucumber),
         (annie)-[:BUYS {amount: 3.2}]->(milk),
         (annie)-[:BUYS {amount: 3.2}]->(tomatoes),

         (matt)-[:BUYS {amount: 3}]->(tomatoes),
         (matt)-[:BUYS {amount: 2}]->(kale),
         (matt)-[:BUYS {amount: 1}]->(cucumber),

         (jeff)-[:BUYS {amount: 3}]->(cookies),
         (jeff)-[:BUYS {amount: 2}]->(milk),

         (brie)-[:BUYS {amount: 1}]->(tomatoes),
         (brie)-[:BUYS {amount: 2}]->(milk),
         (brie)-[:BUYS {amount: 2}]->(kale),
         (brie)-[:BUYS {amount: 3}]->(cucumber),
         (brie)-[:BUYS {amount: 0.3}]->(celery),

         (elsa)-[:BUYS {amount: 3}]->(chocolate),
         (elsa)-[:BUYS {amount: 3}]->(milk)
    """
)

In [7]:
# We define how we want to project our database into GDS
node_projection = ["Person", "Product"]
relationship_projection = {"BUYS": {"orientation": "UNDIRECTED", "properties": "amount"}}

# Before actually going through with the projection, let's check how much memory is required
result = gds.graph.project.estimate(node_projection, relationship_projection)

print(f"Required memory for native loading: {result['requiredMemory']}")

Required memory for native loading: [549 KiB ... 1030 KiB]


In [8]:
# For this small graph memory requirement is low. Let us go through with the projection
G, result = gds.graph.project("purchases", node_projection, relationship_projection)

print(f"The projection took {result['projectMillis']} ms")

# We can use convenience methods on `G` to check if the projection looks correct
print(f"Graph '{G.name()}' node count: {G.node_count()}")
print(f"Graph '{G.name()}' node labels: {G.node_labels()}")

The projection took 170 ms
Graph 'purchases' node count: 182
Graph 'purchases' node labels: ['Product', 'Person']


In [9]:
# We can also estimate memory of running algorithms like FastRP, so let's do that first
result = gds.fastRP.mutate.estimate(
    G,
    mutateProperty="embedding",
    randomSeed=42,
    embeddingDimension=4,
    relationshipWeightProperty="amount",
    iterationWeights=[0.8, 1, 1, 1],
)

print(f"Required memory for running FastRP: {result['requiredMemory']}")

Required memory for running FastRP: 25 KiB


In [10]:
# Now let's run FastRP and mutate our projected graph 'purchases' with the results
result = gds.fastRP.mutate(
    G,
    mutateProperty="embedding",
    randomSeed=42,
    embeddingDimension=4,
    relationshipWeightProperty="amount",
    iterationWeights=[0.8, 1, 1, 1],
)

# Let's make sure we got an embedding for each node
print(f"Number of embedding vectors produced: {result['nodePropertiesWritten']}")

Number of embedding vectors produced: 182


In [11]:
# Run kNN and write back to db (we skip memory estimation this time...)
result = gds.knn.write(
    G,
    topK=10,
    nodeProperties=["embedding"],
    randomSeed=42,
    concurrency=1,
    sampleRate=1.0,
    deltaThreshold=0.0,
    writeRelationshipType="SIMILAR",
    writeProperty="score",
)

print(f"Relationships produced: {result['relationshipsWritten']}")
print(f"Nodes compared: {result['nodesCompared']}")
print(f"Mean similarity: {result['similarityDistribution']['mean']}")

Relationships produced: 1820
Nodes compared: 182
Mean similarity: 0.057306920565091644


In [12]:
results = gds.run_cypher(
    """
        MATCH (p1:Person)-[r:SIMILAR]->(p2:Person {name: "Brie"})
        RETURN p1.name AS person1, p2.name AS person2, r.score AS similarity
        ORDER BY similarity DESCENDING
    """
)

In [13]:
results

Unnamed: 0,person1,person2,similarity
0,Annie,Brie,0.994913
1,Dan,Brie,0.992308
2,Dan,Brie,0.992308
3,Matt,Brie,0.991966
4,Elsa,Brie,0.984722
5,Matt,Brie,0.979399
6,Matt,Brie,0.979399
7,Annie,Brie,0.973954
8,Annie,Brie,0.970427
9,Annie,Brie,0.970427


In [14]:
gds.run_cypher("""
    UNWIND $data as row
    MATCH(: Person {name:row.person1[0]})-[:BUYS]->(a1:Product)
    RETURN DISTINCT a1.name
""", params={"data":results})

Unnamed: 0,a1.name
0,Milk
1,Tomatoes
2,Cucumber


In [15]:
recommendations = gds.run_cypher("""
        UNWIND $data as row
        MATCH (:Person {name: row.person1[0]})-[:BUYS]->(p1:Product)
        MATCH (:Person {name: row.person2[0]})-[:BUYS]->(p2:Product)
        WITH collect(p1) as products, p2
        WHERE not p2 in products
        RETURN distinct p2.name as recommendation""", params={"data":results})

In [16]:
for recommend in recommendations['recommendation']:
    print(recommend)

Kale
Celery


In [17]:
len(recommendations)

2

In [None]:
gds.run_cypher("""
MATCH 