#### Importing libraries

In [1]:
from py2neo import Graph
import pandas as pd

#### Integrating with neo4j

In [3]:
graph = Graph('bolt://localhost:7687', auth=('neo4j','md2'))

In [35]:
query ="""
   match(n) return(n)
"""
graph.run(query)

In [34]:
query ="""
   match(n) detach delete(n)
"""
graph.run(query)

In [6]:
def run(query):
    
    return(graph.run(query).stats())

In [7]:
def index_run(query_list):
    for i in query_list:
        graph.run(i).stats()

In [21]:
query1 ="""CREATE INDEX ON :SCL(id)"""

run(query1)

{'indexes_added': 1}

In [23]:
# query = """CREATE CONSTRAINT UniqueCharacterId ON (c:pname) ASSERT c.id IS UNIQUE"""
# run(query)

In [20]:
# query11 ='''DROP INDEX ON :SCL(id)'''
# run(query11)

{'indexes_removed': 1}

In [36]:
graph.query("call db.schema.visualization")

nodes,relationships
"[(_-4:pname {constraints: [""Constraint( id=5, name='UniqueCharacterId', type='UNIQUENESS', schema=(:pname {id}), ownedIndex=4 )""], indexes: [], name: 'pname'}), (_-3:SCL {constraints: [], indexes: ['id'], name: 'SCL'})]",[]


## Load CSV Files

In [37]:
q1 = '''
      load csv with headers from "file:/SCL.csv" as row
      MERGE (r:pname{Name:row.Procedures})
          
          SET 
              r.Support = row.Support,
              r.Confidence = row.Confidence,
              r.Lift = row.Lift
              
        '''
run(q1)

{'labels_added': 9, 'nodes_created': 9, 'properties_set': 36}

In [38]:
q2 = '''
        LOAD CSV WITH HEADERS FROM "file:/Procedure_Count.csv" as row
        MATCH (s:pname{Name:row.Procedure1})
        MATCH (t:pname{Name:row.Procedure2})
        merge (s)-[i:NEXT{Count:toInteger(row.count),Support:row.Support,Confidence:row.Confidence,Lift:row.Lift,Sequence:row.Sequence}]->(t)
        
     '''
run(q2)

{'relationships_created': 17, 'properties_set': 85}

## Page Rank

 Creating In-memory graph

In [9]:
query3 = """CALL gds.graph.create(
              'myGraph',
              'pname',
              'NEXT',
              {
                relationshipProperties: 'Count'
              }
)"""

run(query3)

{}

In [10]:
query4 = '''CALL gds.pageRank.write.estimate('myGraph', {
          writeProperty: 'pageRank',
          maxIterations: 20,
          dampingFactor: 0.85
        })
        YIELD nodeCount, relationshipCount, bytesMin, bytesMax, requiredMemory'''

run(query4)


{}

In [11]:
query5 = '''CALL gds.pageRank.stream('myGraph')
        YIELD nodeId, score
        with gds.util.asNode(nodeId).Name as Procedure, score
        //Match(a:pname) where id(a) = nodeId
        //set a.Rank =score
        return Procedure,score
        ORDER BY score DESC
        '''

df3 = pd.DataFrame(graph.query(query5).to_data_frame()) 
print("\nPAGE RANKING ALGORITHM")
display(df3)


PAGE RANKING ALGORITHM


Unnamed: 0,Procedure,score
0,P3,1.136873
1,P2,0.83122
2,P1,0.744551
3,P5,0.391465
4,P6,0.391465
5,P4,0.276501
6,P7,0.276501
7,P8,0.276501
8,P9,0.228313


In [8]:
query1= '''CALL gds.graph.drop('myGraph')
        '''
run(query1)


ClientError: [Procedure.ProcedureCallFailed] Failed to invoke procedure `gds.graph.drop`: Caused by: java.util.NoSuchElementException: Graph with name `myGraph` does not exist on database `neo4j`. It might exist on another database.

In [39]:
query5 = '''CALL gds.degree.stream('myGraph')
            YIELD nodeId, score
            RETURN gds.util.asNode(nodeId).Name AS name, score AS followers
            ORDER BY followers DESC, name DESC
        '''

df3 = pd.DataFrame(graph.query(query5).to_data_frame())
df3

ClientError: [Procedure.ProcedureCallFailed] Failed to invoke procedure `gds.degree.stream`: Caused by: java.util.NoSuchElementException: Graph with name `myGraph` does not exist on database `neo4j`. It might exist on another database.

#### Eigen vector centrality

In [47]:
query6 = '''CALL gds.eigenvector.stream('myGraph')
            YIELD nodeId, score
            RETURN gds.util.asNode(nodeId).Name AS name, score
            ORDER BY score DESC, name ASC'''

df7 = pd.DataFrame(graph.query(query6).to_data_frame())
df7

Unnamed: 0,name,score
0,P3,0.561088
1,P2,0.540998
2,P1,0.440465
3,P5,0.224247
4,P6,0.224247
5,P4,0.176038
6,P7,0.176038
7,P8,0.176038
8,P9,0.070356


In [50]:
query7 = '''CALL gds.degree.stream('myGraph')
        YIELD nodeId, score
        RETURN gds.util.asNode(nodeId).Name AS Name, score AS Followers
        ORDER BY Followers DESC, Name DESC'''

df8 = pd.DataFrame(graph.query(query7).to_data_frame())
df8

Unnamed: 0,Name,Followers
0,P1,5.0
1,P3,4.0
2,P4,3.0
3,P2,2.0
4,P9,1.0
5,P8,1.0
6,P7,1.0
7,P6,0.0
8,P5,0.0


### InfluenceMaximization

##### Using Greedy

In [52]:
query8 = '''CALL gds.alpha.influenceMaximization.greedy.stream('myGraph', {seedSetSize: 4, concurrency: 4})
            YIELD nodeId, spread
            RETURN gds.util.asNode(nodeId).Name AS Name, spread
            ORDER BY spread ASC'''
df9 = pd.DataFrame(graph.query(query8).to_data_frame())
df9

Unnamed: 0,Name,spread
0,P1,1.728
1,P3,2.91
2,P4,4.0
3,P2,5.0


##### Using CELF

In [53]:
query9 = '''CALL gds.alpha.influenceMaximization.celf.stream('myGraph', {seedSetSize: 3, concurrency: 4})
            YIELD nodeId, spread
            RETURN gds.util.asNode(nodeId).Name AS Name, spread
            ORDER BY spread ASC'''
df10 = pd.DataFrame(graph.query(query9).to_data_frame())
df10

Unnamed: 0,Name,spread
0,P1,1.728
1,P3,2.91
2,P4,4.0


### Closeness centrality

Closeness centrality is a way of detecting nodes that are able to spread information very efficiently through a graph.
Nodes with a high closeness score have the shortest distances to all other nodes.

In [58]:
query11 = '''CALL gds.alpha.closeness.harmonic.stream('myGraph', {})
            YIELD nodeId, centrality
            RETURN gds.util.asNode(nodeId).Name AS Procedure, centrality as Centrality
            ORDER BY Centrality DESC
            '''
df11 =  pd.DataFrame(graph.query(query11).to_data_frame())
df11

Unnamed: 0,Procedure,Centrality
0,P3,0.6875
1,P2,0.625
2,P1,0.5
3,P5,0.479167
4,P6,0.479167
5,P4,0.375
6,P7,0.375
7,P8,0.375
8,P9,0.333333


### HITS

In [68]:
query12 = '''CALL gds.alpha.hits.stream('myGraph', {hitsIterations: 20})
            YIELD nodeId, values
            RETURN gds.util.asNode(nodeId).Name AS Name, values.auth AS Auth, values.hub as Hub
            ORDER BY Hub DESC
            '''
df12 =  pd.DataFrame(graph.query(query12).to_data_frame())
print("\nProcedures ordered based on \nHub scores")
display(df12)


Procedures ordered based on 
Hub scores


Unnamed: 0,Name,Auth,Hub
0,P1,0.226684,0.647751
1,P4,0.222607,0.474258
2,P3,0.638641,0.362235
3,P2,0.578388,0.297379
4,P8,0.222607,0.219477
5,P9,0.162984,0.219477
6,P7,0.222607,0.19877
7,P5,0.124486,0.0
8,P6,0.124486,0.0


In [12]:
query12 = '''CALL gds.alpha.hits.stream('myGraph', {hitsIterations: 20})
            YIELD nodeId, values
            RETURN gds.util.asNode(nodeId).Name AS Name, values.auth AS Auth, values.hub as Hub
            ORDER BY Auth DESC
            '''
df12 =  pd.DataFrame(graph.query(query12).to_data_frame())
print("\nProcedures ordered based on \nAuthorities scores")
display(df12)


Procedures ordered based on 
Authorities scores


Unnamed: 0,Name,Auth,Hub
0,P3,0.638641,0.362235
1,P2,0.578388,0.297379
2,P1,0.226684,0.647751
3,P4,0.222607,0.474258
4,P7,0.222607,0.19877
5,P8,0.222607,0.219477
6,P9,0.162984,0.219477
7,P5,0.124486,0.0
8,P6,0.124486,0.0
