#### This Version Creates the Graph in Neo4J then runs the Queries.

In [162]:
from neo4j import GraphDatabase
import os
import pandas as pd
import shutil
import subprocess

This moves the nodes and relationships files from data/current_working_graph into the DBMS import folder.

At the moment this uses local paths, but the rest should be automated.

In [163]:
# Define the source directory
source_dir = os.path.abspath('../../data/current_working_graph')

# Define the destination directory
# This should be the import directory of your graph database
destination_dir = '/home/eddie/.config/Neo4j Desktop/Application/relate-data/dbmss/dbms-e4c1e17b-4cb7-4b84-a9fb-3fde3105cb8a/import/'

# Specify the filenames to move
filenames = ['nodes.csv', 'relationships.csv']

for filename in filenames:
    source_path = os.path.join(source_dir, filename)
    destination_path = os.path.join(destination_dir, filename)
    
    # Move each file
    shutil.copy(source_path, destination_path)

Then run the admin import command.

In [164]:
# Define the working directory
working_dir = '/home/eddie/.config/Neo4j Desktop/Application/relate-data/dbmss/dbms-e4c1e17b-4cb7-4b84-a9fb-3fde3105cb8a'

# Construct the command
command = [
    './bin/neo4j-admin', 'database', 'import', 'full',
    '--nodes=import/nodes.csv',
    '--relationships=import/relationships.csv',
    '--overwrite-destination', 'neo4j'
]

# Execute the command
result = subprocess.run(command, cwd=working_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# Check if the command was successful
if result.returncode == 0:
    print("Import successful")
    print(result.stdout.decode())
else:
    print("Error during import:")
    print(result.stderr.decode())

Import successful
Neo4j version: 5.12.0
Importing the contents of these files into /home/eddie/.config/Neo4j Desktop/Application/relate-data/dbmss/dbms-e4c1e17b-4cb7-4b84-a9fb-3fde3105cb8a/data/databases/neo4j:
Nodes:
  /home/eddie/.config/Neo4j Desktop/Application/relate-data/dbmss/dbms-e4c1e17b-4cb7-4b84-a9fb-3fde3105cb8a/import/nodes.csv

Relationships:
  /home/eddie/.config/Neo4j Desktop/Application/relate-data/dbmss/dbms-e4c1e17b-4cb7-4b84-a9fb-3fde3105cb8a/import/relationships.csv


Available resources:
  Total machine memory: 13.49GiB
  Free machine memory: 148.6MiB
  Max heap memory : 910.5MiB
  Max worker threads: 12
  Configured max memory: 77.07MiB
  High parallel IO: true

Cypher type normalization is enabled (disable with --normalize-types=false):
  Property type of 'Weight' normalized from 'float' --> 'double' in /home/eddie/.config/Neo4j Desktop/Application/relate-data/dbmss/dbms-e4c1e17b-4cb7-4b84-a9fb-3fde3105cb8a/import/relationships.csv

Import starting 2024-03-10 05

Run and connect to the Neo4j Database

In [165]:
from neo4j import GraphDatabase

uri = "neo4j://localhost:7687"
username = "neo4j"              # Neo4J username
password = os.environ['NEO4J_Password']           # Neo4J password

# Create a driver instance
driver = GraphDatabase.driver(uri, auth=(username, password))

# Ensure you close the driver connection when your program ends
def close_driver():
    driver.close()

**First create the gds Graph Projection**

In [166]:
def project_graph(tx):
    query = """
    CALL gds.graph.project(
      'myGraph', 
      ['Normal', 'Hyperglycemia', 'Hypoglycemia'],
      {
        LINK: {
          orientation: 'UNDIRECTED',
          properties: 'Weight'
        }
      }
    )
    """
    tx.run(query)

# Use a session to execute the graph projection
with driver.session() as session:
    session.execute_write(project_graph)

Define the graph algorithms.

In [167]:
def run_pagerank_centrality(tx):
    query = """
    CALL gds.pageRank.stream('myGraph', {
        relationshipWeightProperty: 'Weight'
    })
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).SpecID AS name, score
    ORDER BY score DESC, name ASC
    """
    results = tx.run(query)
    return [(record["name"], record["score"]) for record in results]

In [168]:
def run_degree_centrality(tx):
    query = """
    CALL gds.degree.stream('myGraph', {
        relationshipWeightProperty: 'Weight'
    })
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).SpecID AS name, score
    ORDER BY score DESC, name ASC
    """
    results = tx.run(query)
    return [(record["name"], record["score"]) for record in results]

In [169]:
def run_eigenvector_centrality(tx):
    query = """
    CALL gds.eigenvector.stream('myGraph', {
        relationshipWeightProperty: 'Weight'
    })
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).SpecID AS name, score
    ORDER BY score DESC, name ASC
    """
    results = tx.run(query)
    return [(record["name"], record["score"]) for record in results]

In [170]:
def run_articlerank_centrality(tx):
    query = """
    CALL gds.articleRank.stream('myGraph', {
        relationshipWeightProperty: 'Weight'
    })
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).SpecID AS name, score
    ORDER BY score DESC, name ASC
    """
    results = tx.run(query)
    return [(record["name"], record["score"]) for record in results]

In [171]:
def run_label_propagation_algorithm(tx):
    query = """
    CALL gds.labelPropagation.stream('myGraph', { relationshipWeightProperty: 'Weight' })
    YIELD nodeId, communityId AS Community
    RETURN gds.util.asNode(nodeId).SpecID AS name, Community
    ORDER BY Community, name
    """
    results = tx.run(query)
    return [(record["name"], record["Community"]) for record in results]

In [172]:
def run_leiden_algorithm(tx):
    query = """
    CALL gds.leiden.stream('myGraph', { relationshipWeightProperty: 'Weight' })
    YIELD nodeId, communityId AS Community
    RETURN gds.util.asNode(nodeId).SpecID AS name, Community
    ORDER BY Community, name
    """
    results = tx.run(query)
    return [(record["name"], record["Community"]) for record in results]

In [173]:
def run_louvain_algorithm(tx):
    query = """
    CALL gds.louvain.stream('myGraph', { relationshipWeightProperty: 'Weight' })
    YIELD nodeId, communityId AS Community
    RETURN gds.util.asNode(nodeId).SpecID AS name, Community
    ORDER BY Community, name
    """
    results = tx.run(query)
    return [(record["name"], record["Community"]) for record in results]

In [174]:
def run_node2vec_algorithm(tx):
    query = """
    CALL gds.node2vec.stream('myGraph', { relationshipWeightProperty: 'Weight' })
    YIELD nodeId, embedding
    RETURN gds.util.asNode(nodeId).SpecID AS name, embedding
    """
    results = tx.run(query)
    return [(record["name"], record["embedding"]) for record in results]


In [175]:
def run_fastRP_algorithm(tx):
    query = """
    CALL gds.fastRP.stream('myGraph',
        { relationshipWeightProperty: 'Weight',
         randomSeed:1234,
         embeddingDimension: 128
        }
    )
    YIELD nodeId, embedding
    RETURN gds.util.asNode(nodeId).SpecID AS name, embedding
    """
    results = tx.run(query)
    return [(record["name"], record["embedding"]) for record in results]


Execute the algorithms and store the results in a Dataframe.

In [176]:
# Use a session to execute the queries and retrieve the results
with driver.session() as session:
    pagerank_results = session.execute_read(run_pagerank_centrality)
    degree_results = session.execute_read(run_degree_centrality)
    eigenvector_results = session.execute_read(run_eigenvector_centrality)
    articlerank_results = session.execute_read(run_articlerank_centrality)
    label_propagation_results = session.execute_read(run_label_propagation_algorithm)
    leiden_results = session.execute_read(run_leiden_algorithm)
    louvain_results = session.execute_read(run_louvain_algorithm)

In [177]:
pagerank_df = pd.DataFrame(pagerank_results, columns=['SpecID', 'PageRank'])
degree_df = pd.DataFrame(degree_results, columns=['SpecID', 'DegreeCentrality'])
eigenvector_df = pd.DataFrame(eigenvector_results, columns=['SpecID', 'EigenvectorCentrality'])
articlerank_df = pd.DataFrame(articlerank_results, columns=['SpecID', 'ArticleRank'])
label_propagation_df = pd.DataFrame(label_propagation_results, columns=['SpecID', 'LabelPropagation'])
leiden_df = pd.DataFrame(leiden_results, columns=['SpecID', 'Leiden'])
louvain_df = pd.DataFrame(louvain_results, columns=['SpecID', 'Louvain'])

In [178]:
merged_df = pagerank_df
for df in [degree_df, eigenvector_df, articlerank_df, label_propagation_df, leiden_df, louvain_df]:
    merged_df = pd.merge(merged_df, df, on='SpecID', how='left')

In [179]:
df = merged_df
df

Unnamed: 0,SpecID,PageRank,DegreeCentrality,EigenvectorCentrality,ArticleRank,LabelPropagation,Leiden,Louvain
0,210510-1-49,1.248106,1901.864817,0.023495,0.224113,1,2500,1525
1,210225-2-12,1.246243,1897.696638,0.023444,0.223970,1,1,2289
2,210510-1-40,1.245336,1895.460696,0.023416,0.223874,1,127,988
3,210217-2-16,1.243198,1901.231876,0.023487,0.224026,1,2500,1525
4,210318-2-47,1.242143,1897.127150,0.023437,0.223874,1,2500,1525
...,...,...,...,...,...,...,...,...
3040,210211-2-09,0.229397,117.481426,0.001451,0.154657,1,127,988
3041,210211-2-11,0.221572,104.152999,0.001287,0.154133,1,127,988
3042,210211-1-37,0.216859,104.459756,0.001290,0.154120,1,127,988
3043,210211-2-03,0.208822,83.987125,0.001038,0.153340,1,127,988


##### **Node2Vec**

In [180]:
# with driver.session() as session:
#     node2vec_results = session.execute_read(run_node2vec_algorithm)

# node2vec_df = pd.DataFrame(node2vec_results, columns=['SpecID', 'embeddings'])

# # Expand the embeddings list into separate columns
# embeddings_df = pd.DataFrame(node2vec_df['embeddings'].tolist(), index=node2vec_df.index)

# # Optionally, rename the new columns
# embeddings_df.columns = [f'embedding_{i}' for i in range(embeddings_df.shape[1])]

# # Join the new embeddings columns to the original DataFrame
# node2vec_df = pd.concat([node2vec_df.drop(['embeddings'], axis=1), embeddings_df], axis=1)
# node2vec_df.to_csv('../../data/node2vec_embeddings.csv', index=False)
# node2vec_df.head()

#### **FastRP**

In [181]:
with driver.session() as session:
    fastRP_results = session.execute_read(run_fastRP_algorithm)

fastRP_df = pd.DataFrame(fastRP_results, columns=['SpecID', 'embeddings'])

# Expand the embeddings list into separate columns
embeddings_df = pd.DataFrame(fastRP_df['embeddings'].tolist(), index=fastRP_df.index)

# Optionally, rename the new columns
embeddings_df.columns = [f'embedding_{i}' for i in range(embeddings_df.shape[1])]

# Join the new embeddings columns to the original DataFrame
fastRP_df = pd.concat([fastRP_df.drop(['embeddings'], axis=1), embeddings_df], axis=1)
fastRP_df.to_csv('../../data/fastRP_embeddings.csv', index=False)
fastRP_df.head()

Unnamed: 0,SpecID,embedding_0,embedding_1,embedding_2,embedding_3,embedding_4,embedding_5,embedding_6,embedding_7,embedding_8,...,embedding_118,embedding_119,embedding_120,embedding_121,embedding_122,embedding_123,embedding_124,embedding_125,embedding_126,embedding_127
0,201210-1-00,-0.080063,0.134346,0.311409,0.068268,0.220651,0.020809,-0.157989,-0.023693,0.139395,...,-0.078375,0.062691,-0.112076,0.184696,0.327179,-0.07512,0.091235,-0.209512,0.040763,-0.094208
1,201210-1-01,-0.080101,0.134585,0.310871,0.067332,0.220452,0.02023,-0.15886,-0.023553,0.139669,...,-0.078032,0.063693,-0.111742,0.18601,0.326752,-0.076596,0.09126,-0.211565,0.040145,-0.094633
2,201210-1-02,-0.079054,0.138156,0.306293,0.055854,0.212508,0.01701,-0.174445,-0.019908,0.143086,...,-0.070208,0.065156,-0.112998,0.196053,0.32823,-0.083976,0.093691,-0.220491,0.040576,-0.086324
3,201210-1-03,-0.07992,0.136971,0.307921,0.059693,0.213916,0.01867,-0.169574,-0.020787,0.141887,...,-0.072528,0.063974,-0.112868,0.192851,0.328275,-0.082046,0.0933,-0.217178,0.04093,-0.088214
4,201210-1-04,-0.079154,0.133878,0.31225,0.070663,0.223848,0.020361,-0.154181,-0.024949,0.138894,...,-0.080197,0.062828,-0.111242,0.182163,0.32609,-0.07242,0.090532,-0.207949,0.039952,-0.096966


Delete the projection

In [182]:
def delete_projection(tx):
    query = """
    CALL gds.graph.drop('myGraph')
    """
    tx.run(query)

# Use a session to execute the graph projection
with driver.session() as session:
    session.execute_write(delete_projection)

In [183]:
close_driver()

In [184]:
df.to_csv("../../data/current_graph_metrics.csv", index=False)