#### Importing Libraries

In [3]:
from py2neo import Graph
import pandas as pd

#### Integrating with neo4j

In [4]:
graph = Graph('bolt://localhost:7687', auth=('neo4j', 'nextproject'))

In [25]:
query ="""
   match(n) return(n)
"""
graph.run(query)

n
"(_0:Encounter {id: '20220001', operation_code: 'CAGM', operation_date: datetime('2019-10-11T00:00:00.000000000+00:00'), patient_id: 'XYZ00001005', procedure_weight: 4})"
"(_1:Encounter {id: '20220002', operation_code: 'CAGM', operation_date: datetime('2019-06-07T00:00:00.000000000+00:00'), patient_id: 'XYZ00005126', procedure_weight: 5})"
"(_2:Encounter {id: '20220003', operation_code: 'CAGM', operation_date: datetime('2019-05-22T00:00:00.000000000+00:00'), patient_id: 'XYZ00005235', procedure_weight: 4})"


In [27]:
def run(query):
    return(graph.run(query).stats())

In [28]:
def index_run(query_list):
    for i in query_list:
        graph.run(i).stats()

In [6]:
# query1 =["CREATE INDEX ON :Encounter(id)",
#          "CREATE INDEX ON :Patient (id)",         
#          "CREATE INDEX ON :Condition (code)",  
#          "CREATE INDEX ON :Procedure (code)"]

# index_run(query1)

In [7]:
graph.query("call db.schema.visualization")

nodes,relationships
"[(_-7:Condition {constraints: [], indexes: ['code'], name: 'Condition'}), (_-5:Encounter {constraints: [], indexes: ['id'], name: 'Encounter'}), (_-6:Patient {constraints: [], indexes: ['id'], name: 'Patient'}), (_-8:Procedure {constraints: [], indexes: ['code'], name: 'Procedure'})]","[(_-6)-[:HAS_ENCOUNTER {}]->(_-5), (_-5)-[:HAS_PROCEDURE {}]->(_-8), (_-5)-[:NEXT {}]->(_-5), (_-5)-[:HAS_CONDITION {}]->(_-7), (_-8)-[:SIMILAR {}]->(_-6), (_-8)-[:SIMILAR {}]->(_-5), (_-8)-[:SIMILAR {}]->(_-8)]"


In [None]:
# query = 'DROP INDEX ON : Condition (code)'
# graph.run(query)

## Loading CSV Files


In [63]:
query2 = '''
        load csv with headers from "file:/csv/Encounter.csv" as row
        MERGE (e:Encounter {id:row.ENCOUNTER_ID})
          SET 
            e.operation_date=date(row.OPER_DT),
            e.patient_id=row.PATIENT_ID

        MERGE (p:Patient {id:row.PATIENT_ID})
        
        MERGE (p)-[:HAS_ENCOUNTER]->(e)
        
        '''
run(query2)

{'labels_added': 15501,
 'relationships_created': 8694,
 'nodes_created': 15501,
 'properties_set': 32889}

In [64]:
query3 = '''
      load csv with headers from "file:/csv/Patients.csv" as row
      MERGE (p:Patient {id:row.PATIENT_ID})
          SET
               p.Name=row.PATIENT_NAME, 
               p.gender=row.GENDER,            
               p.weight=row.WEIGHT,
               p.height=row.HEIGHT

        '''
run(query3)

{'properties_set': 21917}

In [65]:
query4 = '''
      load csv with headers from "file:/csv/Condition.csv" as row
      MATCH (p:Patient {id:row.PATIENT_ID})
        MERGE (c:Condition {code:row.ENCOUNTER_ID})
        SET 
            c.description=row.DIAGNOSIS_ICD_TEXT,
            c.diagnosis = row.DIAGNOSIS,
            c.operation_date = datetime(row.OPER_DT)
        
        MERGE (cs:Encounter {id:row.ENCOUNTER_ID})
        ON CREATE
          SET cs.date=datetime(row.OPER_DT) 
          
        MERGE (p)-[:HAS_ENCOUNTER]->(cs)
        MERGE (cs)-[:HAS_CONDITION]->(c)

        '''
run(query4)

{'labels_added': 8694,
 'relationships_created': 8694,
 'nodes_created': 8694,
 'properties_set': 28224}

In [66]:
query2 = '''
      load csv with headers from "file:/csv/Procedures.csv" as row
      MERGE (p:Patient {id:row.PATIENT_ID})
      MERGE (r:Procedure {code:row.OPERCODE})
          
          SET 
              r.description=row.OPER_NAME,
              r.operation_date=datetime(row.OPER_DT),
              r.cost = row.COST
          MERGE (pe:Encounter {id:row.ENCOUNTER_ID})
          
          ON MATCH
            SET pe.operation_date=datetime(row.OPER_DT), pe.operation_code=row.OPERCODE

          MERGE (p)-[:HAS_ENCOUNTER]->(pe)
          MERGE (pe)-[:HAS_PROCEDURE]->(r)
        '''
run(query2)

{'labels_added': 367,
 'relationships_created': 8694,
 'nodes_created': 367,
 'properties_set': 43837}

In [67]:
query2 = '''

    CALL apoc.periodic.iterate(
    'MATCH (p:Patient) RETURN p',
    'MATCH (p)-[:HAS_ENCOUNTER]->(e)
    WITH e
    ORDER BY e.operation_date
    WITH collect(e) AS encounters
    WITH encounters, encounters[1..] as nextEncounters
    UNWIND range(0,size(nextEncounters)-1,1) as index
    WITH encounters[index] as first, nextEncounters[index] as second
    CREATE (first)-[:NEXT{days:duration.inDays(date(first.operation_date), date(second.operation_date))}]->(second)',{iterateList:false});
    
    '''
run(query2)

{}

In [69]:
query2 = '''
       CALL apoc.periodic.iterate('MATCH (c:Condition) RETURN c',
      'MATCH (c)<-[:HAS_CONDITION]-(:Encounter)<-[:HAS_ENCOUNTER]-(p:Patient)
      WITH c,count(p) AS NUM
      SET c.num=NUM', {}
      );
    '''
run(query2)

{}

# Insights

In [6]:
# Query to get patients and procedures

query = '''
    MATCH (p:Patient)-[:HAS_ENCOUNTER]->(e:Encounter)-[:HAS_PROCEDURE]->(pr:Procedure)
    return p.id as Patient_id,p.Name as Patient_name,collect(pr.code) as sequence
'''
df3 = pd.DataFrame(graph.query(query).to_data_frame())
df3

Unnamed: 0,Patient_id,Patient_name,sequence
0,ABC00579733,RENJITH R PAI,"[CAGM, EPS, RFA]"
1,ABC00564991,AJAY SAH,"[CAGM, PAGM, PTAAB]"
2,ABC00338680,NIRANJAN DEY,"[CAGM, A203]"
3,ABC00591287,RUDRARAJU ADITYA VARMA,"[CAGM, ICDI]"
4,ABC00587272,S MAIMUNNISA BEGUM,"[CAGM, PTCAP]"
...,...,...,...
6802,ABC00579426,BHOLANATH DHAL,[UROPER7]
6803,ABC00583567,KABITA SARKAR,[UROPER17]
6804,ABC00589884,MST MURSHIDA KHATUN,[UROPER17]
6805,ABC00598605,SURESH HALDAR,[UROPER4]


In [16]:
# # Getting patients who followed CAGM, PTCAP

# df3['sequence'] = df3['sequence'].astype(str)
# df_new1= df3[df3['sequence'] == "['CAGM', 'P1703']"]
# df_new1


In [7]:
# Query to get patients and procedures

query = '''
    MATCH (p:Patient)-[:HAS_ENCOUNTER]->(e:Encounter)-[:HAS_PROCEDURE]->(pr:Procedure{code:"AVBDL"})
 match (e)-[:NEXT]->(e1:Encounter)-[:HAS_PROCEDURE]->(pr1:Procedure{code:"AVBDL"})
 return p.id,p.Name,pr.code,pr1.code,count(*)
'''
df1 = pd.DataFrame(graph.query(query).to_data_frame())
# patient = pd.DataFrame(graph.query(query).to_data_frame())
# type(result_df)
df1

Unnamed: 0,p.id,p.Name,pr.code,pr1.code,count(*)
0,ABC00556697,FATHIMA SAFEEDA V T,AVBDL,AVBDL,1


In [9]:
db = df3[df3['Patient_name']=='LEELA T S']
db

Unnamed: 0,Patient_id,Patient_name,sequence
821,ABC00174021,LEELA T S,"[CAGM, PTMCM, P1703]"


In [54]:
type(df3['sequence'][0])

list

In [56]:
# procedure = df3["sequence"].apply(pd.Series)
# procedure.head()

In [213]:
# result = pd.concat([patient,procedure], axis=1, join='inner')
# result = result.fillna(0)
# result = pd.DataFrame(result)
# result.columns = ['Patient_Name','1st','2nd','3rd','4th','5th','6th','7th','8th','9th','10th']
# result.head(10)

In [7]:
# Counting no of nodes in our graph

query1 = """
Match(n) return count(n) as ct
"""
result = graph.query(query1)
print(result)

    ct 
-------
 24562 



In [15]:
# Returning Patients names who followed CAGM after P1703
params = {'p1':'CAGM',
          'p2':'PTCAP'}

query2 = """
        match(n:Patient)-[:HAS_ENCOUNTER]->(e:Encounter{operation_code:$p1})-[:NEXT]->(e1:Encounter{operation_code:$p2})
        //where e1.operation_code CONTAINS $contains 
        return n.id as id
         """

result_df1 = pd.DataFrame(graph.query(query2,parameters = params).to_data_frame())
len(result_df1.id.unique())

145

In [10]:
# Counting indegress for different procedures 
# Also we can know here that which procedure is followed by most of the patients.

query3 = """
    match(p:Procedure)
    return distinct p.code, SIZE(()-[]->(p)) AS inDegree ORDER BY inDegree DESC
         """
result1 = graph.query(query3).to_data_frame()
result1


Unnamed: 0,p.code,inDegree
0,CAGM,933
1,PTCAP,924
2,P103,335
3,EPS,296
4,PTMCM,288
...,...,...
362,ORTHOPER14,1
363,UROPER23,1
364,UROPER12,1
365,UROPER4,1


## Assosciation rule mining


In [None]:
# # Query to get patients and procedures

# query = '''
#     MATCH (p:Patient)-[:HAS_ENCOUNTER]->(e:Encounter)-[:HAS_PROCEDURE]->(pr:Procedure)
#     return p.id as Patient_id,collect(pr.code) as sequence
# '''
# df3 = pd.DataFrame(graph.query(query).to_data_frame())
# # patient = pd.DataFrame(graph.query(query).to_data_frame())
# # type(result_df)
# df3

## Support ,Confidence and Lift

### Function for support

In [10]:
query = """
            match (c:Patient)
            CALL   {
                with c 
                return count(c) as mycount
                }
                with sum(mycount)as count1
                MATCH p=(e:Encounter)-[:NEXT*0..]->()
                UNWIND nodes(p) as node
                WITH e,p, collect(node.operation_code) as Procedures,count1
                with Procedures , round(toFloat(count(*))/count1,5) as Support,count(*) as count
                order by count desc
                return  Procedures,Support, count
        
        """

df4 = pd.DataFrame(graph.query(query).to_data_frame())

In [11]:
df4

Unnamed: 0,Procedures,Support,count
0,[CAGM],0.13706,933
1,[PTCAP],0.13574,924
2,[P103],0.04921,335
3,[EPS],0.04348,296
4,[PTMCM],0.04231,288
...,...,...,...
1444,"[UROPER7, UROPER3]",0.00015,1
1445,"[UROPER7, UROPER3, UROPER2]",0.00015,1
1446,[UROPER18],0.00015,1
1447,"[UROPER1, OBGOPER27]",0.00015,1


In [12]:
df7 = df4
df7["Confidence"]=0
df7['Lift']=0
df7.drop('count',axis=1,inplace=True)
df7.head()

Unnamed: 0,Procedures,Support,Confidence,Lift
0,[CAGM],0.13706,0,0
1,[PTCAP],0.13574,0,0
2,[P103],0.04921,0,0
3,[EPS],0.04348,0,0
4,[PTMCM],0.04231,0,0


### Function for Confidence

In [13]:
for i in range(0,df7.shape[0]):
    if(len(df7.iloc[i,0])==1):
        df7.iloc[i,2]= 1

In [14]:
seq=""
for i in range(0,df7.shape[0]):
    if(len(df7.iloc[i,0])>1):
        seq= df7.iloc[i,0].copy()
        support1= df7.iloc[i,1]
        seq.pop()
        for j in range(0,i):
            if(seq==df7.iloc[j,0]):
                support2= df7.iloc[j,1]
        df7.iloc[i,2]= round((support1/support2),5)

In [15]:
df7.tail()

Unnamed: 0,Procedures,Support,Confidence,Lift
1444,"[UROPER7, UROPER3]",0.00015,0.34091,0
1445,"[UROPER7, UROPER3, UROPER2]",0.00015,1.0,0
1446,[UROPER18],0.00015,1.0,0
1447,"[UROPER1, OBGOPER27]",0.00015,0.20548,0
1448,"[UROPER6, UROPER6]",0.00015,0.09259,0


In [21]:
895+367


1262

### Function for Lift

In [16]:
seq=""
support1= suppor2= support3=1
for i in range(0,df7.shape[0]):
    if(len(df7.iloc[i,0])>1):
        seq= df7.iloc[i,0].copy()
        support1= df7.iloc[i,1]
        popped= seq.pop()
        for k in range(0,i):
            if(list(popped.split(" "))==df7.iloc[k,0]):
                support3= df7.iloc[k,1]
        for j in range(0,i):
            if(seq==df7.iloc[j,0]):
                support2= df7.iloc[j,1]
        df7.iloc[i,3]= round(support1/(support2*support3),5)

In [20]:
df7


Unnamed: 0,Procedures,Support,Confidence,Lift
0,[CAGM],0.13706,1.00000,0.00000
1,[PTCAP],0.13574,1.00000,0.00000
2,[P103],0.04921,1.00000,0.00000
3,[EPS],0.04348,1.00000,0.00000
4,[PTMCM],0.04231,1.00000,0.00000
...,...,...,...,...
1444,"[UROPER7, UROPER3]",0.00015,0.34091,288.90601
1445,"[UROPER7, UROPER3, UROPER2]",0.00015,1.00000,295.85799
1446,[UROPER18],0.00015,1.00000,0.00000
1447,"[UROPER1, OBGOPER27]",0.00015,0.20548,35.86029


In [19]:
# query = '''
#         MATCH (p:Patient)-[:HAS_ENCOUNTER]->(e:Encounter)
#         WITH p,e
#         ORDER BY e.operation_date
#         with p.id as Patient_Id, p.Name as Patient_Name,collect(e.operation_code) as sequence
#         return sequence, count(Patient_Id) as count
#         order by count(Patient_Id) descending
# '''
# df7 = pd.DataFrame(graph.query(query).to_data_frame())
# # display(df7.to_string())
# df7.to_csv('895.csv')

In [26]:
def get_indices(df7,x):
        val = x
        indices =  []

        for i in range(df7.shape[0]):
            seq1= df7.iloc[i,0].copy()
            popped= seq1.pop()
            #print(popped)   
            if(list(popped.split(" "))==list(val.split(" "))):
                indices.append(i)

        newdf = df7.iloc[indices, :]
        return(newdf)


In [40]:
def get_SCL(df7,x):
    newdf = get_indices(df7,x)
    # Create an empty list
    Row_list =[] 
    Row_list1 = []
    Row_list2 =[]
    my_list2 = []
    # Iterate over each row
    for index, rows in newdf.iterrows():
    # Create list for the current row
        if rows.Lift > 0.95 :
#         my_list =[rows.Procedures, rows.Support]
#         my_list1 = [rows.Procedures,rows.Confidence]
            my_list2 = [rows.Procedures,rows.Lift]

    # append the list to the final list
#         Row_list.append(my_list)
#         Row_list1.append(my_list1)
        Row_list2.append(my_list2)

    # Print the list
#     print(Row_list) 
#     print("\n")
#     print(Row_list1)
#     print("\n")
    print(Row_list2)


In [42]:
get_SCL(df7,'PAGM')

[[], [['CAGM', 'PAGM'], 1.43316], [['CAGM', 'PAGM'], 1.43316], [['PTASU', 'PAGM'], 55.80357], [['LRHCATH', 'PAGM'], 1.52649], [['PTCAP', 'CAGM', 'PAGM'], 1.56282], [['PTAIL', 'PAGM'], 54.85775], [['PTPA', 'PAGM'], 24.51975], [['CAGM', 'PAGM', 'PAGM'], 9.51197], [['PAGM', 'PAGM'], 1.86842], [['CAGM', 'A204', 'PTAIL', 'PAGM'], 111.60714], [['CAGM', 'CAGM', 'PAGM'], 5.69424], [['PAGM', 'CAGM', 'CAGM', 'PAGM'], 111.60714], [['CAGM', 'PTPA', 'PAGM'], 111.60714], [['PAGM', 'PTACA', 'CAGM', 'PTPA', 'PAGM'], 111.60714], [['PTACA', 'CAGM', 'PTPA', 'PAGM'], 111.60714], [['PTASU', 'PAGM', 'PTACA', 'CAGM', 'PTPA', 'PAGM'], 111.60714], [['PTASU', 'PAGM', 'PTACA', 'CAGM', 'PTPA', 'PAGM'], 111.60714], [['CAGM', 'P2437', 'PAGM'], 16.25347], [['PAGM', 'CAGM', 'P2437', 'PAGM'], 111.60714], [['PTAAB', 'PAGM'], 38.04789], [['RSOVDV', 'LRHCATH', 'PAGM'], 111.60714], [['PAGM', 'CAGM', 'PAGM'], 7.12386], [['A204', 'PTAIL', 'PAGM'], 111.60714], [['P2437', 'PAGM'], 1.46083]]


In [23]:
query = '''
        //single procedure and involving count
        MATCH (p:Patient)-[:HAS_ENCOUNTER]->(e:Encounter)
        WITH p,e
        ORDER BY e.operation_date
        with p.id as Patient_Id, p.Name as Patient_Name,collect(e.operation_code) as Single_Procedure,e
        return Single_Procedure,count(Patient_Id) as Count
        order by Count descending
'''
df10 = pd.DataFrame(graph.query(query).to_data_frame())
df10

Unnamed: 0,Single_Procedure,Count
0,[CAGM],933
1,[PTCAP],924
2,[P103],335
3,[EPS],296
4,[PTMCM],288
...,...,...
362,[CRAATEL],1
363,[UROPER4],1
364,[CRABFSFD],1
365,[CRARSOB],1


In [22]:
df10["Support"]=df10['Confidence']=df10['Lift']=0

In [35]:
df10.head(80)

Unnamed: 0,Single_Procedure,Count
0,[CAGM],933
1,[PTCAP],924
2,[P103],335
3,[EPS],296
4,[PTMCM],288
...,...,...
75,[EMS],19
76,[AVBDL],18
77,[OPHTOPER10],18
78,[ORTHOPER18],18


In [64]:
query = '''
        MATCH p=(e:Encounter)-[:NEXT]->(e1)
        // WHERE exists((e1)-[:NEXT]->())
        UNWIND nodes(p) as node
        WITH e,p, collect(node.operation_code) as Procedures
        with distinct Procedures, count(*)  as count
        //UNWIND range(0, size(names) - 2) as index
        //WITH names[index] as Procedure1, names[index+1] as Procedure2,count
        //return Procedure1,Procedure2, count 
        return Procedures, count
        //create (a:Node{Proc:node1})-[t:TO]->(b:Node{Proc:node2})


'''
df4 = pd.DataFrame(graph.query(query).to_data_frame())
# df4.to_csv("sssihms_scl.csv", index = False)
# df4.to_csv('count.csv')
df4

Unnamed: 0,Procedures,count
0,"[PAGM, PTAIL]",1
1,"[PTMCM, CAGM]",11
2,"[EPS, RFA]",146
3,"[CAGM, P1703]",36
4,"[LRHCATH, P2437]",5
...,...,...
584,"[UROPER35, UROPER33]",1
585,"[UROPER31, UROPER31]",1
586,"[UROPER5, UROPER7]",1
587,"[UROPER7, UROPER3]",1


In [69]:
newdf3  = pd.DataFrame(df4['Procedures'].tolist(),columns=['Procedure1','Procedure2'],index = df4.index)
df4 = pd.concat([df4,newdf3],axis = 1)
df4

Unnamed: 0,Procedures,count,Procedure1,Procedure2
0,"[PAGM, PTAIL]",1,PAGM,PTAIL
1,"[PTMCM, CAGM]",11,PTMCM,CAGM
2,"[EPS, RFA]",146,EPS,RFA
3,"[CAGM, P1703]",36,CAGM,P1703
4,"[LRHCATH, P2437]",5,LRHCATH,P2437
...,...,...,...,...
584,"[UROPER35, UROPER33]",1,UROPER35,UROPER33
585,"[UROPER31, UROPER31]",1,UROPER31,UROPER31
586,"[UROPER5, UROPER7]",1,UROPER5,UROPER7
587,"[UROPER7, UROPER3]",1,UROPER7,UROPER3


In [68]:
list1 = []
for i in range(df7.shape[0]):
    if(len(df7.iloc[i][0]) == 2):
        list1.append(i)
        
newdf1 = df7.iloc[list1,:]

newdf1


Unnamed: 0,Procedures,Support,Confidence,Lift
11,"[EPS, RFA]",0.02145,0.49333,12.67224
12,"[CAGM, PTCAP]",0.02130,0.15541,1.14488
14,"[PTCAP, CAGM]",0.02071,0.15257,1.11317
16,"[RFA, EPS]",0.01851,0.47547,10.93534
34,"[CAGM, A203]",0.00926,0.06756,2.77006
...,...,...,...,...
1436,"[UROPER29, OBGOPER27]",0.00015,0.12712,22.18475
1441,"[UROPER5, UROPER7]",0.00015,0.34091,774.79339
1444,"[UROPER7, UROPER3]",0.00015,0.34091,288.90601
1447,"[UROPER1, OBGOPER27]",0.00015,0.20548,35.86029


In [72]:
newdf1.merge(df4,how = 'left', left_on = ['Procedure1','Procedure2'],right_on = ['Procedure1','Procedure2'])

KeyError: 'Procedure1'

In [73]:
newdf2  = pd.DataFrame(newdf1['Procedures'].tolist(),columns=['Procedure1','Procedure2'],index = newdf1.index)
newdf1 = pd.concat([newdf1,newdf2],axis = 1)
newdf1

Unnamed: 0,Procedures,Support,Confidence,Lift,Procedure1,Procedure2
11,"[EPS, RFA]",0.02145,0.49333,12.67224,EPS,RFA
12,"[CAGM, PTCAP]",0.02130,0.15541,1.14488,CAGM,PTCAP
14,"[PTCAP, CAGM]",0.02071,0.15257,1.11317,PTCAP,CAGM
16,"[RFA, EPS]",0.01851,0.47547,10.93534,RFA,EPS
34,"[CAGM, A203]",0.00926,0.06756,2.77006,CAGM,A203
...,...,...,...,...,...,...
1436,"[UROPER29, OBGOPER27]",0.00015,0.12712,22.18475,UROPER29,OBGOPER27
1441,"[UROPER5, UROPER7]",0.00015,0.34091,774.79339,UROPER5,UROPER7
1444,"[UROPER7, UROPER3]",0.00015,0.34091,288.90601,UROPER7,UROPER3
1447,"[UROPER1, OBGOPER27]",0.00015,0.20548,35.86029,UROPER1,OBGOPER27


In [57]:
newdf1.to_csv('sssihms_scl.csv')

In [83]:
newdf4 = newdf1.merge(df4,how = 'left', left_on = ['Procedure1','Procedure2'],right_on = ['Procedure1','Procedure2'])

In [87]:
newdf4 = newdf4.drop('Procedures_y',axis=1)

In [88]:
newdf4

Unnamed: 0,Procedures_x,Support,Confidence,Lift,Procedure1,Procedure2,count
0,"[EPS, RFA]",0.02145,0.49333,12.67224,EPS,RFA,146
1,"[CAGM, PTCAP]",0.02130,0.15541,1.14488,CAGM,PTCAP,145
2,"[PTCAP, CAGM]",0.02071,0.15257,1.11317,PTCAP,CAGM,141
3,"[RFA, EPS]",0.01851,0.47547,10.93534,RFA,EPS,126
4,"[CAGM, A203]",0.00926,0.06756,2.77006,CAGM,A203,63
...,...,...,...,...,...,...,...
584,"[UROPER29, OBGOPER27]",0.00015,0.12712,22.18475,UROPER29,OBGOPER27,1
585,"[UROPER5, UROPER7]",0.00015,0.34091,774.79339,UROPER5,UROPER7,1
586,"[UROPER7, UROPER3]",0.00015,0.34091,288.90601,UROPER7,UROPER3,1
587,"[UROPER1, OBGOPER27]",0.00015,0.20548,35.86029,UROPER1,OBGOPER27,1


In [89]:
newdf4.rename(columns={'Procedures_x':'Procedures'},inplace = True)

In [92]:
newdf4.to_csv('sssihms_scl.csv')

## Node Similarity

In [29]:
query1= '''CALL gds.graph.create(
              'sequence1',
              ['Patient','Encounter','Procedure'],
              {
                HAS_ENCOUNTER: {
                  orientation: 'NATURAL'
                },
                HAS_PROCEDURE:{
                    orientation: 'NATURAL'
                },
                NEXT:{
                    orientation: 'NATURAL'
                }
              }
            )

        '''
run(query1)


{}

In [30]:
query1=  '''CALL gds.fastRP.mutate('sequence1',
            {
                embeddingDimension: 16,
                randomSeed: 42,
                mutateProperty: 'embedding',
                // relationshipWeightProperty: 'amount',
                iterationWeights: [0.8, 1, 1, 1]
            }
            )
            YIELD nodePropertiesWritten

        '''
df5 = pd.DataFrame(graph.query(query1).to_data_frame())
df5


Unnamed: 0,nodePropertiesWritten
0,15868


In [31]:

query1= '''
            CALL gds.beta.knn.write('sequence1', {
                topK: 8,
                nodeWeightProperty: 'embedding',
                randomSeed: 42,
                concurrency: 1,
                sampleRate: 1.0,
                deltaThreshold: 0.0,
                writeRelationshipType: "SIMILAR",
                writeProperty: "score"
            })
            YIELD nodesCompared, relationshipsWritten, similarityDistribution
            RETURN nodesCompared, relationshipsWritten, similarityDistribution.mean as meanSimilarity
        '''

run(query1)


{}

In [33]:
query = '''
            MATCH (n:Patient)-[r:SIMILAR]->(m:Patient)
            RETURN distinct n.Name as Patient1, m.Name as Patient2, r.score as Similarity
            ORDER BY Similarity DESCENDING
            '''
df3 = pd.DataFrame(graph.query(query).to_data_frame())
df3

Unnamed: 0,Patient1,Patient2,Similarity
0,SUSANTA KUMAR DAS,SALMA BEGAM,1.000000
1,SALMA BEGAM,SUSANTA KUMAR DAS,1.000000
2,JOSEPH,SUMITRA CHALAK,0.973823
3,SUMITRA CHALAK,JOSEPH,0.973823
4,SHANTHALA R,SUKHEN BAGDI,0.971920
...,...,...,...
20495,PRIYANGSHU KUMAR,USMAN KHAN,0.534594
20496,PRIYANGSHU KUMAR,BARSHA KARMAKAR,0.532918
20497,KHOKU MANI ADHIKARY,MATHIVANAN S,0.530098
20498,SHASHIKALA.T.G,PRASNNA KUMAR MV,0.529070
