#### Importing libraries

In [1]:
from py2neo import Graph
import pandas as pd

#### Integrating with neo4j

In [2]:
graph = Graph('bolt://localhost:7687', auth=('neo4j', 'md1'))

In [3]:
query ="""
   match(n) return(n)
"""
graph.run(query)

n
"(_0:Patient {age: 67, gender: 'Female', pid: '4779549', pnumseq: [1, 3, 6]})"
"(_1:Patient {age: 73, gender: 'Female', pid: '5430539', pnumseq: [1, 3, 4, 2]})"
"(_2:Patient {age: 69, gender: 'Male', pid: '5055817', pnumseq: [1, 3, 2]})"


In [4]:
query ="""
   match(n) detach delete(n)
"""
graph.run(query)

In [4]:
def run(query):
    return(graph.run(query).stats())

In [5]:
def index_run(query_list):
    for i in query_list:
        graph.run(i).stats()

In [8]:
query1 =["CREATE INDEX ON :Encounter(id)",
         "CREATE INDEX ON :Patient (id)",          
         "CREATE INDEX ON :Procedure (code)"]

index_run(query1)

In [9]:
# query11 =["DROP INDEX ON :Encounter(id)",
#          "DROP INDEX ON :Patient (id)",          
#          "DROP INDEX ON :Procedure (code)"]

# index_run(query11)

In [6]:
graph.query("call db.schema.visualization")

nodes,relationships
"[(_-1:Encounter {constraints: [], indexes: ['id'], name: 'Encounter'}), (_-2:Patient {constraints: [], indexes: ['id'], name: 'Patient'}), (_-3:Procedure {constraints: [], indexes: ['code'], name: 'Procedure'})]","[(_-2)-[:HAS_ENCOUNTER {}]->(_-1), (_-1)-[:HAS_PROCEDURE {}]->(_-3), (_-1)-[:NEXT {}]->(_-1)]"


## Loading CSV Files

In [12]:
q1 = '''
      load csv with headers from "file:/p_csv/Patient.csv" as row
      MERGE (p:Patient {pid:row.Patient_Id})
          SET
               p.gender=row.Gender,            
               p.age=toInteger(row.Age)
        '''
run(q1)

{'labels_added': 512, 'nodes_created': 512, 'properties_set': 1536}

In [13]:
query2 = '''
        load csv with headers from "file:/p_csv/Encounters.csv" as row
        MERGE (e:Encounter {id:row.Encounter_Id})
          SET 
            e.date_order = row.Date_Order,
            e.patient_id= row.Patient_Id,
            e.weights= toInteger(row.Weights)

        MERGE (p:Patient{pid:row.Patient_Id})
        
        MERGE (p)-[:HAS_ENCOUNTER]->(e)
        
        '''
run(query2)

{'labels_added': 1539,
 'relationships_created': 1539,
 'nodes_created': 1539,
 'properties_set': 6156}

In [14]:
query2 = '''
      load csv with headers from "file:/p_csv/Procedures.csv" as row
      MERGE (p:Patient {pid:row.Patient_Id})
      MERGE (r:Procedure {pname:row.Procedure})
          
          SET 
              r.date_order = row.Date_Order,
              r.start_date = row.Start_date
              
          MERGE (pe:Encounter {id:row.Encounter_Id})
          
          ON MATCH
            SET pe.date_order = row.Date_Order, pe.pname=row.Procedure, pe.Num_Order = toInteger(row.Num)

          MERGE (p)-[:HAS_ENCOUNTER]->(pe)
          MERGE (pe)-[:HAS_PROCEDURE]->(r)
        '''
run(query2)

{'labels_added': 9,
 'relationships_created': 1539,
 'nodes_created': 9,
 'properties_set': 6165}

In [17]:
query2 = '''

    CALL apoc.periodic.iterate(
    'MATCH (p:Patient) RETURN p',
    'MATCH (p)-[:HAS_ENCOUNTER]->(e)
    WITH e
    ORDER BY e.date_order
    WITH collect(e) AS encounters
    WITH encounters, encounters[1..] as nextEncounters
    UNWIND range(0,size(nextEncounters)-1,1) as index
    WITH encounters[index] as first, nextEncounters[index] as second
    MERGE (first)-[:NEXT]->(second)',{iterateList:false});
    
    '''
run(query2)

{}

In [18]:
query5 = '''
            MATCH (p:Patient)-[:HAS_ENCOUNTER]->(e:Encounter)-[:HAS_PROCEDURE]->(pr:Procedure)
            with p.pnumseq as numsequence,collect(e.Num_Order) as sequence1,p
            set p.pnumseq = sequence1
            return p.pid
            '''

run(query5)

{'properties_set': 512}

In [200]:
# #Removing property
# query6 = '''
#             MATCH (p:Patient)
#             remove p.pnumseq
# '''
# run(query6)

In [201]:
# res4.to_csv("C:/Users/Mahesh/Desktop/Patient.csv")

In [26]:
query = '''
        MATCH p=(e:Encounter)-[:NEXT]->(e1)
        // WHERE exists((e1)-[:NEXT]->())
        UNWIND nodes(p) as node
        WITH e,p, collect(node.pname) as names
        with distinct names, count(*)  as count
        UNWIND range(0, size(names) - 2) as index
        WITH names[index] as Procedure1, names[index+1] as Procedure2,count
        return Procedure1,Procedure2, count 

        //create (a:Node{Proc:node1})-[t:TO]->(b:Node{Proc:node2})
'''
df4 = pd.DataFrame(graph.query(query).to_data_frame())
df4.to_csv("scl.csv", index = False)
df4


Unnamed: 0,Procedure1,Procedure2,count
0,P1,P3,444
1,P3,P6,8
2,P1,P4,49
3,P4,P3,2
4,P3,P2,418
5,P4,P2,46
6,P2,P1,26
7,P3,P5,10
8,P3,P1,2
9,P1,P2,7


In [1]:
# df4.to_csv("Procedure_count.csv")

## Node similarity


Creating In memory graph

In [7]:
query1= '''CALL gds.graph.create(
              'sequence1',
              ['Patient','Encounter','Procedure'],
              {
                HAS_ENCOUNTER: {
                  orientation: 'NATURAL'
                },
                HAS_PROCEDURE:{
                    orientation: 'NATURAL'
                },
                NEXT:{
                    orientation: 'NATURAL'
                }
              }
            )

        '''
run(query1)


{}

FastRP embedding

In [8]:
query1=  '''CALL gds.fastRP.mutate('sequence1',
            {
                embeddingDimension: 16,
                randomSeed: 42,
                mutateProperty: 'embedding',
                // relationshipWeightProperty: 'amount',
                iterationWeights: [0.8, 1, 1, 1]
            }
            )
            YIELD nodePropertiesWritten

        '''
df5 = pd.DataFrame(graph.query(query1).to_data_frame())
df5


Unnamed: 0,nodePropertiesWritten
0,2060


Using KNN similarity

In [9]:

query1= '''
            CALL gds.beta.knn.write('sequence1', {
                topK: 8,
                nodeWeightProperty: 'embedding',
                randomSeed: 42,
                concurrency: 1,
                sampleRate: 1.0,
                deltaThreshold: 0.0,
                writeRelationshipType: "SIMILAR",
                writeProperty: "score"
            })
            YIELD nodesCompared, relationshipsWritten, similarityDistribution
            RETURN nodesCompared, relationshipsWritten, similarityDistribution.mean as meanSimilarity
        '''

run(query1)


{}

Getting similarity scores

In [10]:
query = '''
    MATCH (n:Patient)-[r:SIMILAR]->(m:Patient)
RETURN distinct n.pid as Patient1, m.pid as Patient2, r.score as Similarity,n.pnumseq as P1_Sequence,m.pnumseq as P2_Sequence
ORDER BY Similarity DESCENDING
'''
df3 = pd.DataFrame(graph.query(query).to_data_frame())
df3.head(5)

Unnamed: 0,Patient1,Patient2,Similarity,P1_Sequence,P2_Sequence
0,4371980,5181471,0.953167,"[1, 3, 2]","[1, 3, 2]"
1,5181471,4371980,0.953167,"[1, 3, 2]","[1, 3, 2]"
2,5140487,5135739,0.952284,"[1, 3, 2]","[1, 3, 2]"
3,5135739,5140487,0.952284,"[1, 3, 2]","[1, 3, 2]"
4,5139875,5226663,0.947877,"[1, 3, 2]","[1, 4, 2]"


Removing relation

In [139]:
query1= '''
            match()-[r:SIMILAR]-()
            delete r
        '''
run(query1)

{}

Dropping In-memory graph

In [140]:
query1= '''CALL gds.graph.drop('sequence1')
        '''
run(query1)


{}

# Support Confidence Lift

In [5]:


query = '''match (c:Patient)
            CALL{
                    with c 
                    return count(c) as mycount
            }
            with sum(mycount)as count1
             MATCH p=(e:Encounter)-[:NEXT*0..]->()
            // WHERE exists((e1)-[:NEXT]->())
            UNWIND nodes(p) as node
            WITH e,p, collect(node.pname) as Procedures,count1
            with Procedures , round(toFloat(count(*))/count1,5) as Support,count(*) as count
            order by Support desc
            return  Procedures,Support, count       
'''

support1 = pd.DataFrame(graph.query(query).to_data_frame())
support1.head()

Unnamed: 0,Procedures,Support,count
0,[P1],0.99805,511
1,[P2],0.96094,492
2,[P3],0.90234,462
3,"[P1, P3]",0.86719,444
4,"[P3, P2]",0.81641,418


In [6]:
df7 = support1
df7["Confidence"]=0
df7['Lift']=0
df7.drop('count',axis=1,inplace=True)
df7.head()

Unnamed: 0,Procedures,Support,Confidence,Lift
0,[P1],0.99805,0,0
1,[P2],0.96094,0,0
2,[P3],0.90234,0,0
3,"[P1, P3]",0.86719,0,0
4,"[P3, P2]",0.81641,0,0


## Function for confidence

In [7]:
for i in range(0,df7.shape[0]):
    if(len(df7.iloc[i,0])==1):
        df7.iloc[i,2]= 1

In [8]:
seq=""
for i in range(0,df7.shape[0]):
    if(len(df7.iloc[i,0])>1):
        seq= df7.iloc[i,0].copy()
        support1= df7.iloc[i,1]
        seq.pop()
        for j in range(0,i):
            if(seq==df7.iloc[j,0]):
                support2= df7.iloc[j,1]
        df7.iloc[i,2]= round((support1/support2),5)

In [9]:
df7.head()

Unnamed: 0,Procedures,Support,Confidence,Lift
0,[P1],0.99805,1.0,0
1,[P2],0.96094,1.0,0
2,[P3],0.90234,1.0,0
3,"[P1, P3]",0.86719,0.86888,0
4,"[P3, P2]",0.81641,0.90477,0


## Function for Lift

In [10]:
seq=""
support1= suppor2= support3=1
for i in range(0,df7.shape[0]):
    if(len(df7.iloc[i,0])>1):
        seq= df7.iloc[i,0].copy()
        support1= df7.iloc[i,1]
        popped= seq.pop()
        for k in range(0,i):
            if(list(popped.split(" "))==df7.iloc[k,0]):
                support3= df7.iloc[k,1]
        for j in range(0,i):
            if(seq==df7.iloc[j,0]):
                support2= df7.iloc[j,1]
        df7.iloc[i,3]= round(support1/(support2*support3),5)

In [11]:
df7.head()

Unnamed: 0,Procedures,Support,Confidence,Lift
0,[P1],0.99805,1.0,0.0
1,[P2],0.96094,1.0,0.0
2,[P3],0.90234,1.0,0.0
3,"[P1, P3]",0.86719,0.86888,0.96292
4,"[P3, P2]",0.81641,0.90477,0.94155


In [12]:
val = 'P2'
indices =  []

for i in range(df7.shape[0]):
    seq1= df7.iloc[i,0].copy()
    popped= seq1.pop()
    #print(popped)   
    if(list(popped.split(" "))==list(val.split(" "))):
        indices.append(i)
                
newdf = df7.iloc[indices, :]
newdf


Unnamed: 0,Procedures,Support,Confidence,Lift
1,[P2],0.96094,1.0,0.0
4,"[P3, P2]",0.81641,0.90477,0.94155
5,"[P1, P3, P2]",0.79883,0.92117,0.95861
8,"[P4, P2]",0.08984,0.90192,0.93858
9,"[P1, P4, P2]",0.08789,0.91839,0.95572
18,"[P1, P2]",0.01367,0.0137,0.01425
23,"[P1, P4, P3, P2]",0.00391,1.0,1.04065
25,"[P4, P3, P2]",0.00391,1.0,1.04065
28,"[P3, P1, P2]",0.00391,1.0,1.04065
30,"[P1, P7, P2]",0.00391,1.0,1.04065
