In [1]:
import json
import pandas as pd
import numpy as np
from numpy.linalg import norm
from neo4j import GraphDatabase

In [2]:
with open('config.json', 'r') as f:
    credentials = json.load(f)

uri = credentials['uri']
user = credentials['user']
password = credentials['password']
driver = GraphDatabase.driver(uri, auth=(user, password))
#session = driver.session()

In [3]:
def clear_nodes(driver):
    session = driver.session()
    query = """
        MATCH (p:Patient)
        DETACH DELETE p
    """
    with driver.session() as session:
        tx = session.begin_transaction()
        tx.run(query)
        tx.commit()

In [4]:
def get_data(driver):
    session = driver.session()
    result = []
    query = """
            MATCH (d:Disease)-[]-(u:User)-[r]-(s:Symptom) 
            RETURN u.user AS user, d.disease AS disease, collect(s.symptom) AS symptoms, collect(toIntegerOrNull(s.weight)) AS weights
        """
    with session as s:
        tx = s.begin_transaction()
        for row in tx.run(query):
            try:
                if row:
                    user = (row[0]).strip()
                    disease = (row[1]).strip()
                    symptoms = row[2]
                    weights = row[3]
                    node_dict = {"user": user, "disease": disease, "symptoms": symptoms, "weights": weights}
                    result.append(node_dict)
            except Exception as e:
                print(e, row)
        tx.commit()
    return result

In [5]:
def create_node(driver, node_list):
    session = driver.session()
    query = """
            MERGE (p:Patient {patient: $user, disease: $disease, symptoms: $symptoms, weights: $weights})
        """
    i = 0
    tx = session.begin_transaction()
    with driver.session() as session:
        for node in node_list:
            try:
                tx.run(query, node)
                i += 1
                if i % 1000 == 0:
                    tx.commit()
                    print(f"{i} lines processed")
                    tx = session.begin_transaction()
            except Exception as e:
                print(e, node)
        tx.commit()
        print(f"{i} lines processed")

In [6]:
node_list = get_data(driver)
clear_nodes(driver)
create_node(driver, node_list)

1000 lines processed
2000 lines processed
3000 lines processed
4000 lines processed
4553 lines processed


In [7]:
def create_gds_graph_project(driver):
    session = driver.session()
    query = """
        CALL gds.graph.project(
            'diseaseGraph',
            {
                Patient: {
                    properties: ['weights']
                }
            },
            '*'
        );
    """
    with driver.session() as session:
        tx = session.begin_transaction()
        tx.run(query)
        tx.commit()

In [8]:
def estimate_knn_write(driver):
    session = driver.session()
    query = """
        CALL gds.knn.write.estimate('diseaseGraph', {
        nodeProperties: ['weights'],
        writeRelationshipType: 'SIMILAR',
        writeProperty: 'score',
        topK: 1
        })
        YIELD nodeCount, bytesMin, bytesMax, requiredMemory
    """
    with driver.session() as session:
        tx = session.begin_transaction()
        for row in tx.run(query):
            print(f"{row['nodeCount']=}, {row['bytesMin']=}, {row['bytesMax']=}, {row['requiredMemory']=}")
        tx.commit()

In [9]:
create_gds_graph_project(driver)
estimate_knn_write(driver)

row['nodeCount']=4553, row['bytesMin']=548584, row['bytesMax']=1859944, row['requiredMemory']='[535 KiB ... 1816 KiB]'


In [10]:
def get_stream_knn(driver):
    dataframe = pd.DataFrame()
    session = driver.session()
    query = """
        CALL gds.knn.stream('diseaseGraph', {
            topK: 1,
            nodeProperties: ['weights'],
            randomSeed: 1337,
            concurrency: 1,
            sampleRate: 1.0,
            deltaThreshold: 0.0
        })
        YIELD node1, node2, similarity
        RETURN gds.util.asNode(node1).patient AS Person1, gds.util.asNode(node2).patient AS Person2, similarity
        ORDER BY similarity DESCENDING, Person1, Person2
    """
    with driver.session() as session:
        tx = session.begin_transaction()
        dataframe = tx.run(query).to_df()
        tx.commit()
    return dataframe
    

In [11]:
def write_knn(driver):
    session = driver.session()
    query = """
        CALL gds.knn.write('diseaseGraph', {
            writeRelationshipType: 'SIMILAR',
            writeProperty: 'score',
            topK: 1,
            randomSeed: 42,
            concurrency: 1,
            nodeProperties: ['weights']
        })
        YIELD nodesCompared, relationshipsWritten
    """
    with driver.session() as session:
        tx = session.begin_transaction()
        tx.run(query)
        tx.commit()
    

In [12]:
dataframe = get_stream_knn(driver)
print(dataframe.shape)
dataframe.head(5)

(4553, 3)


Unnamed: 0,Person1,Person2,similarity
0,Aaron Munoz,Blythe Herrera,1.0
1,Abbot Carney,Sara Joyce,1.0
2,Abbot Daniels,Garrison Richard,1.0
3,Abbot Goff,Penelope Wooten,1.0
4,Abbot Kaufman,Irma Terry,1.0


In [13]:
write_knn(driver)