In [3]:
from py2neo import Graph
import pandas as pd

In [6]:
graph = Graph('bolt://localhost:11013', auth=('neo4j', 'maharastradataset'))

In [191]:
query ="""
   match(n) return(n)
"""
graph.run(query)

In [190]:
query ="""
   match(n) detach delete(n)
"""
graph.run(query)

In [179]:
def run(query):
    return(graph.run(query).stats())

In [180]:
def index_run(query_list):
    for i in query_list:
        graph.run(i).stats()

In [193]:
query1 =["CREATE INDEX ON :Encounter(id)",
         "CREATE INDEX ON :Patient (id)",          
         "CREATE INDEX ON :Procedure (code)"]

index_run(query1)

In [192]:
# query11 =["DROP INDEX ON :Encounter(id)",
#          "DROP INDEX ON :Patient (id)",          
#          "DROP INDEX ON :Procedure (code)"]

# index_run(query11)

In [194]:
graph.query("call db.schema.visualization")

nodes,relationships
"[(_-1:Encounter {constraints: [], indexes: ['id'], name: 'Encounter'}), (_-2:Patient {constraints: [], indexes: ['id'], name: 'Patient'}), (_-3:Procedure {constraints: [], indexes: ['code'], name: 'Procedure'})]",[]


In [195]:
q1 = '''
      load csv with headers from "file:/p_csv/Patient.csv" as row
      MERGE (p:Patient {pid:row.Patient_Id})
          SET
               p.gender=row.Gender,            
               p.age=toInteger(row.Age)
        '''
run(q1)

{'labels_added': 512, 'nodes_created': 512, 'properties_set': 1536}

In [196]:
query2 = '''
        load csv with headers from "file:/p_csv/Encounters.csv" as row
        MERGE (e:Encounter {id:row.Encounter_Id})
          SET 
            e.date_order = row.Date_Order,
            e.patient_id= row.Patient_Id,
            e.weights= toInteger(row.Weights)

        MERGE (p:Patient{pid:row.Patient_Id})
        
        MERGE (p)-[:HAS_ENCOUNTER]->(e)
        
        '''
run(query2)

{'labels_added': 1539,
 'relationships_created': 1539,
 'nodes_created': 1539,
 'properties_set': 6156}

In [197]:
query2 = '''
      load csv with headers from "file:/p_csv/Procedures.csv" as row
      MERGE (p:Patient {pid:row.Patient_Id})
      MERGE (r:Procedure {pname:row.Procedure})
          
          SET 
              r.date_order = row.Date_Order,
              r.start_date = row.Start_date
              
          MERGE (pe:Encounter {id:row.Encounter_Id})
          
          ON MATCH
            SET pe.date_order = row.Date_Order, pe.pname=row.Procedure, pe.Num_Order = toInteger(row.Num)

          MERGE (p)-[:HAS_ENCOUNTER]->(pe)
          MERGE (pe)-[:HAS_PROCEDURE]->(r)
        '''
run(query2)

{'labels_added': 9,
 'relationships_created': 1539,
 'nodes_created': 9,
 'properties_set': 6165}

In [198]:
query2 = '''

    CALL apoc.periodic.iterate(
    'MATCH (p:Patient) RETURN p',
    'MATCH (p)-[:HAS_ENCOUNTER]->(e)
    WITH e
    ORDER BY e.date_order
    WITH collect(e) AS encounters
    WITH encounters, encounters[1..] as nextEncounters
    UNWIND range(0,size(nextEncounters)-1,1) as index
    WITH encounters[index] as first, nextEncounters[index] as second
    MERGE (first)-[:NEXT]->(second)',{iterateList:false});
    
    '''
run(query2)

{}

In [199]:
query5 = '''
            MATCH (p:Patient)-[:HAS_ENCOUNTER]->(e:Encounter)-[:HAS_PROCEDURE]->(pr:Procedure)
            with p.pnumseq as numsequence,collect(e.Num_Order) as sequence1,p
            set p.pnumseq = sequence1
            return p.pid
            '''

run(query5)

{'properties_set': 512}

In [200]:
# Removing property

# query6 = '''
#             MATCH (p:Patient)
#             remove p.pnumseq
# '''

# run(query6)

In [201]:
# res4.to_csv("C:/Users/Mahesh/Desktop/Patient.csv")

## Node similarity


Creating In memory graph

In [141]:
query1= '''CALL gds.graph.create(
              'sequence1',
              ['Patient','Encounter','Procedure'],
              {
                HAS_ENCOUNTER: {
                  orientation: 'NATURAL'
                },
                HAS_PROCEDURE:{
                    orientation: 'NATURAL'
                },
                NEXT:{
                    orientation: 'NATURAL'
                }
              }
            )

        '''
run(query1)


{}

FastRP embedding

In [142]:
query1=  '''CALL gds.fastRP.mutate('sequence1',
            {
                embeddingDimension: 16,
                randomSeed: 42,
                mutateProperty: 'embedding',
                // relationshipWeightProperty: 'amount',
                iterationWeights: [0.8, 1, 1, 1]
            }
            )
            YIELD nodePropertiesWritten

        '''
df5 = pd.DataFrame(graph.query(query1).to_data_frame())
df5


Unnamed: 0,nodePropertiesWritten
0,2060


Using KNN similarity

In [143]:

query1= '''
            CALL gds.beta.knn.write('sequence1', {
                topK: 8,
                nodeWeightProperty: 'embedding',
                randomSeed: 42,
                concurrency: 1,
                sampleRate: 1.0,
                deltaThreshold: 0.0,
                writeRelationshipType: "SIMILAR",
                writeProperty: "score"
            })
            YIELD nodesCompared, relationshipsWritten, similarityDistribution
            RETURN nodesCompared, relationshipsWritten, similarityDistribution.mean as meanSimilarity
        '''

run(query1)


{}

Getting similarity scores

In [146]:
query = '''
            MATCH (n:Patient)-[r:SIMILAR]->(m:Patient)
            RETURN distinct n.pid as Patient1, m.pid as Patient2, r.score as Similarity,n.pnumseq as P1_Sequence,m.pnumseq as P2_Sequence
            ORDER BY Similarity DESCENDING
            '''
df3 = pd.DataFrame(graph.query(query).to_data_frame())
df3.head(50)

Unnamed: 0,Patient1,Patient2,Similarity,P1_Sequence,P2_Sequence
0,4599476,4842921,0.950378,"[1, 3, 2]","[1, 3, 2]"
1,4842921,4599476,0.950378,"[1, 3, 2]","[1, 3, 2]"
2,5117374,4584908,0.947648,"[1, 3, 2]","[1, 3, 2]"
3,4584908,5117374,0.947648,"[1, 3, 2]","[1, 3, 2]"
4,4954905,5151996,0.942102,"[1, 3, 2]","[1, 3, 2]"
5,5151996,4954905,0.942102,"[1, 3, 2]","[1, 3, 2]"
6,5512621,4948560,0.941249,"[1, 3, 2]","[1, 3, 2]"
7,4948560,5512621,0.941249,"[1, 3, 2]","[1, 3, 2]"
8,4366483,4995818,0.941228,"[1, 3, 2]","[1, 3, 2]"
9,4995818,4366483,0.941228,"[1, 3, 2]","[1, 3, 2]"


Removing relation

In [139]:
query1= '''
            match()-[r:SIMILAR]-()
            delete r
        '''
run(query1)

{}

Dropping In-memory graph

In [140]:
query1= '''CALL gds.graph.drop('sequence1')
        '''
run(query1)


{}