<h2>Page Rank</h2>

In [8]:
from neo4j import GraphDatabase
import pandas as pd

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 100)

In [2]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [3]:
# URI examples: "neo4j://localhost", "neo4j+s://xxx.databases.neo4j.io"
URI = "neo4j://localhost:7687"
AUTH = ("neo4j", "12345678")

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()

conn = Neo4jConnection(uri=URI, user=AUTH[0], pwd=AUTH[1])

<h2>Page Rank</h2>

In [4]:
query = '''CALL gds.graph.drop(
  'pageRankGraph'
)'''

conn.query(query)

query = '''CALL gds.graph.project(
  'pageRankGraph',
  'Paper',
  'CITES'
)'''

conn.query(query)

[<Record nodeProjection={'Paper': {'label': 'Paper', 'properties': {}}} relationshipProjection={'CITES': {'orientation': 'NATURAL', 'indexInverse': False, 'aggregation': 'DEFAULT', 'type': 'CITES', 'properties': {}}} graphName='pageRankGraph' nodeCount=988 relationshipCount=10030 projectMillis=18>]

In [8]:
query = '''CALL gds.pageRank.write.estimate('pageRankGraph', {
  writeProperty: 'pageRank',
  maxIterations: 20,
  dampingFactor: 0.85
})
YIELD nodeCount, relationshipCount, bytesMin, bytesMax, requiredMemory'''
res = conn.query(query=query)
res

[<Record nodeCount=988 relationshipCount=10030 bytesMin=24664 bytesMax=24664 requiredMemory='24 KiB'>]

In [None]:
query =''' CALL gds.pageRank.stream('pageRankGraph', {
           maxIterations: 20,
           dampingFactor: 0.85
           })
           YIELD nodeId, score
           RETURN gds.util.asNode(nodeId).title AS title, score
           ORDER BY score DESC, title ASC
            '''
res = conn.query(query=query)
df = pd.DataFrame(res, columns=["Title", "Score"])
df

<h2>Node Similarity</h2>

In [6]:
query = '''CALL gds.graph.drop(
  'similarityGraph'
)'''

conn.query(query)

query = '''CALL gds.graph.project(
  'similarityGraph',
  ['Author', 'Paper', 'Community', 'Affiliation'],
  ['WROTE', 'GURU_OF', 'REVIEWS', 'AFFILIATED_TO']
)'''

conn.query(query)

Query failed: {code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke procedure `gds.graph.drop`: Caused by: java.util.NoSuchElementException: Graph with name `similarityGraph` does not exist on database `neo4j`. It might exist on another database.}


[<Record nodeProjection={'Author': {'label': 'Author', 'properties': {}}, 'Paper': {'label': 'Paper', 'properties': {}}, 'Community': {'label': 'Community', 'properties': {}}, 'Affiliation': {'label': 'Affiliation', 'properties': {}}} relationshipProjection={'REVIEWS': {'orientation': 'NATURAL', 'indexInverse': False, 'aggregation': 'DEFAULT', 'type': 'REVIEWS', 'properties': {}}, 'AFFILIATED_TO': {'orientation': 'NATURAL', 'indexInverse': False, 'aggregation': 'DEFAULT', 'type': 'AFFILIATED_TO', 'properties': {}}, 'GURU_OF': {'orientation': 'NATURAL', 'indexInverse': False, 'aggregation': 'DEFAULT', 'type': 'GURU_OF', 'properties': {}}, 'WROTE': {'orientation': 'NATURAL', 'indexInverse': False, 'aggregation': 'DEFAULT', 'type': 'WROTE', 'properties': {}}} graphName='similarityGraph' nodeCount=2308 relationshipCount=7242 projectMillis=65>]

In [9]:
query =''' CALL gds.nodeSimilarity.stream('similarityGraph')
           YIELD node1, node2, similarity
           RETURN gds.util.asNode(node1).name AS Author1, gds.util.asNode(node2).name AS Author2, similarity
           ORDER BY similarity DESCENDING, Author1, Author2
            '''
res = conn.query(query=query)
df = pd.DataFrame(res, columns=["Author 1", "Author 2", "Score"])
df

Unnamed: 0,Author 1,Author 2,Score
0,Angie A. Kehagia,Gary H. Glover,1.000000
1,Gary H. Glover,Angie A. Kehagia,1.000000
2,Jake Greenblum,Ryan Hubbard,0.666667
3,Ryan Hubbard,Jake Greenblum,0.666667
4,Anderson Rocha 0001,Andrea Owe,0.500000
...,...,...,...
12765,Dieter Baum,Jordan Gergov,0.035294
12766,Peter Gritzmann,Alexander Kaplan,0.033708
12767,Peter Gritzmann,Dalal M. Al Tamimi,0.033333
12768,Dieter Baum,Peter Gritzmann,0.031250


In [4]:
query =''' CALL gds.nodeSimilarity.stream('similarityGraph')
           YIELD node1, node2, similarity
           RETURN gds.util.asNode(node1).name AS Author1, gds.util.asNode(node2).name AS Author2, similarity
           ORDER BY similarity DESCENDING, Author1, Author2
            '''
res = conn.query(query=query)
df = pd.DataFrame(res, columns=["Author 1", "Author 2", "Score"])
df

Unnamed: 0,Author 1,Author 2,Score
0,Ahmed Fouad Bouras,Hadjar Toumi,1.0
1,Amarjeet Singh 0004,Neelam Dehal,1.0
2,Amy Huang,Min Li,1.0
3,Angie A. Kehagia,Gary H. Glover,1.0
4,Bhaskaran Unnikrishnan,Vaman Kulkarni,1.0
5,Camille Nebeker,Nadir Weibel,1.0
6,Camille Nebeker,Vincent Chan,1.0
7,David Buitenweg,Ivo Maathuis,1.0
8,Frank Ole Flemisch,Jochen Nelles,1.0
9,Gary H. Glover,Angie A. Kehagia,1.0
