# Introducing the GDS Python client: graph projection

## Installing and configuring the `graphdatascience` package

In [1]:
# !pip install graphdatascience

In [2]:
import graphdatascience
# graphdatascience Python package version number
graphdatascience.__version__

'1.5'

In [3]:
# display config (optional)
import pandas as pd
pd.set_option('display.max_colwidth', None)
from IPython.display import display, HTML

In [4]:
from graphdatascience import GraphDataScience

In [5]:
# instantiate GDS object
gds = GraphDataScience("bolt://localhost:7687", auth=("neo4j", "admin"))
print(gds)
# print the version of the GDS library installed in Neo4j
# different from the Python package version number
print(gds.version())

<graphdatascience.graph_data_science.GraphDataScience object at 0x7f606ca0f970>
2.2.5


In [6]:
# list all available procedures
gds.list()

Unnamed: 0,name,description,signature,type
0,gds.allShortestPaths.delta.mutate,The Delta Stepping shortest path algorithm computes the shortest (weighted) path between one node and any other node in the graph. The computation is run multi-threaded,"gds.allShortestPaths.delta.mutate(graphName :: STRING?, configuration = {} :: MAP?) :: (relationshipsWritten :: INTEGER?, mutateMillis :: INTEGER?, postProcessingMillis :: INTEGER?, preProcessingMillis :: INTEGER?, computeMillis :: INTEGER?, configuration :: MAP?)",procedure
1,gds.allShortestPaths.delta.mutate.estimate,Returns an estimation of the memory consumption for that procedure.,"gds.allShortestPaths.delta.mutate.estimate(graphNameOrConfiguration :: ANY?, algoConfiguration :: MAP?) :: (requiredMemory :: STRING?, treeView :: STRING?, mapView :: MAP?, bytesMin :: INTEGER?, bytesMax :: INTEGER?, nodeCount :: INTEGER?, relationshipCount :: INTEGER?, heapPercentageMin :: FLOAT?, heapPercentageMax :: FLOAT?)",procedure
2,gds.allShortestPaths.delta.stats,The Delta Stepping shortest path algorithm computes the shortest (weighted) path between one node and any other node in the graph. The computation is run multi-threaded,"gds.allShortestPaths.delta.stats(graphName :: STRING?, configuration = {} :: MAP?) :: (postProcessingMillis :: INTEGER?, preProcessingMillis :: INTEGER?, computeMillis :: INTEGER?, configuration :: MAP?)",procedure
3,gds.allShortestPaths.delta.stats.estimate,Returns an estimation of the memory consumption for that procedure.,"gds.allShortestPaths.delta.stats.estimate(graphNameOrConfiguration :: ANY?, algoConfiguration :: MAP?) :: (requiredMemory :: STRING?, treeView :: STRING?, mapView :: MAP?, bytesMin :: INTEGER?, bytesMax :: INTEGER?, nodeCount :: INTEGER?, relationshipCount :: INTEGER?, heapPercentageMin :: FLOAT?, heapPercentageMax :: FLOAT?)",procedure
4,gds.allShortestPaths.delta.stream,The Delta Stepping shortest path algorithm computes the shortest (weighted) path between one node and any other node in the graph. The computation is run multi-threaded,"gds.allShortestPaths.delta.stream(graphName :: STRING?, configuration = {} :: MAP?) :: (index :: INTEGER?, sourceNode :: INTEGER?, targetNode :: INTEGER?, totalCost :: FLOAT?, nodeIds :: LIST? OF INTEGER?, costs :: LIST? OF FLOAT?, path :: PATH?)",procedure
...,...,...,...,...
364,gds.util.infinity,RETURN gds.util.infinity() - Return infinity as a Cypher value.,gds.util.infinity() :: (FLOAT?),function
365,gds.util.isFinite,"RETURN gds.util.isFinite(value) - Return true iff the given argument is a finite value (not ±Infinity, NaN, or null).",gds.util.isFinite(value :: NUMBER?) :: (BOOLEAN?),function
366,gds.util.isInfinite,"RETURN gds.util.isInfinite(value) - Return true iff the given argument is not a finite value (not ±Infinity, NaN, or null).",gds.util.isInfinite(value :: NUMBER?) :: (BOOLEAN?),function
367,gds.util.nodeProperty,Returns a node property value from a named in-memory graph.,"gds.util.nodeProperty(graphName :: STRING?, nodeId :: NUMBER?, propertyKey :: STRING?, nodeLabel = * :: STRING?) :: (ANY?)",function


## GDS Python Client usage

## Creating a projected graph from Python


In [12]:
graph_object, graph_infos = gds.graph.project(
    "persons",   # projected graph's name
    "Person",     # node projection
    {"KNOWS": {"orientation": "UNDIRECTED"}}   # relationship projection
)

In [13]:
graph_object

Graph({'graphName': 'persons', 'nodeCount': 40948, 'relationshipCount': 715412, 'database': 'neo4j', 'configuration': {'relationshipProjection': {'KNOWS': {'orientation': 'UNDIRECTED', 'aggregation': 'DEFAULT', 'type': 'KNOWS', 'properties': {}}}, 'jobId': '6c97c72d-2dfc-4a4c-9c4a-6d66e3eb069a', 'nodeProjection': {'Person': {'label': 'Person', 'properties': {}}}, 'relationshipProperties': {}, 'creationTime': neo4j.time.DateTime(2022, 12, 18, 15, 30, 54, 686664698, tzinfo=<DstTzInfo 'Europe/Paris' CET+1:00:00 STD>), 'validateRelationships': False, 'readConcurrency': 4, 'sudo': False, 'nodeProperties': {}}, 'schema': {'graphProperties': {}, 'relationships': {'KNOWS': {}}, 'nodes': {'Person': {}}}, 'memoryUsage': '6286 KiB'})

In [14]:
# just transforming to dataframe for nicer display
graph_infos.to_frame()

Unnamed: 0,0
nodeProjection,"{'Person': {'label': 'Person', 'properties': {}}}"
relationshipProjection,"{'KNOWS': {'orientation': 'UNDIRECTED', 'aggregation': 'DEFAULT', 'type': 'KNOWS', 'properties': {}}}"
graphName,persons
nodeCount,40948
relationshipCount,715412
projectMillis,43


In [15]:
graph_object.name(), graph_object.node_count(), graph_object.relationship_count()

('persons', 40948, 715412)

In [16]:
# list all available projected graphs
gds.graph.list()

Unnamed: 0,degreeDistribution,graphName,database,memoryUsage,sizeInBytes,nodeCount,relationshipCount,configuration,density,creationTime,modificationTime,schema
0,"{'p99': 111, 'min': 0, 'max': 446, 'mean': 17.47123180619322, 'p90': 36, 'p50': 10, 'p999': 200, 'p95': 52, 'p75': 19}",persons,neo4j,6286 KiB,6437360,40948,715412,"{'relationshipProjection': {'KNOWS': {'orientation': 'UNDIRECTED', 'aggregation': 'DEFAULT', 'type': 'KNOWS', 'properties': {}}}, 'jobId': '6c97c72d-2dfc-4a4c-9c4a-6d66e3eb069a', 'nodeProjection': {'Person': {'label': 'Person', 'properties': {}}}, 'relationshipProperties': {}, 'creationTime': 2022-12-18T15:30:54.686664698+01:00, 'validateRelationships': False, 'readConcurrency': 4, 'sudo': False, 'nodeProperties': {}}",0.000427,2022-12-18T15:30:54.686664698+01:00,2022-12-18T15:30:54.720689525+01:00,"{'graphProperties': {}, 'relationships': {'KNOWS': {}}, 'nodes': {'Person': {}}}"


In [11]:
# get a projected graph object from its name
G = gds.graph.get("persons")
G.name()

'pgraphB'

In [12]:
# drop an existing projected graph with same name as G
# gds.graph.drop(G)
# WARNING: will fail if G.name() is not present in the graph catalog