# PageRank Algorithm

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from neo4j import GraphDatabase
import matplotlib.pyplot as plt

#### Connecting to the DBMS
If there is any authentification error after you run any queries, then your username and password are wrong

In [7]:
# Seen from :server status
uri = "bolt://localhost:7687"

# default user for graph database is neo4j
# auth = ("neo4j", "password")
auth = ("neo4j","abc")

driver = GraphDatabase.driver(uri = uri, auth = auth)
driver.verify_connectivity()

  driver.verify_connectivity()


'Neo4j/4.4.5'

#### Creating Graph Projection Mapping
Graph Data Science Library plugin needs to be installed for this

In [8]:
graph_projection_mapping =  """
							CALL gds.graph.project(
								'Wikipedia',
								["Page"],{
									LINKED: {orientation: "Natural"}
								}
							)
							"""
driver.session().run(graph_projection_mapping)

<neo4j.work.result.Result at 0x203755fba60>

#### Implementing PageRank:

In [9]:
class pagerank:
    def __init__(self, driver) -> None:
        """
        Args:
            driver is GraphDatabase.driver
        """
        self.driver = driver

    def close(self) -> None:
        self.driver.close()

    #estimation for memory
    @classmethod
    def write(cls, tx) -> any:
        """
        @param cls is the class
        @param tx is the transaction
        @returns the data for the memory
        """
        query = ("""
                    CALL gds.pageRank.write('Wikipedia', {
                    maxIterations: 20,
                    dampingFactor: 0.85,
                    writeProperty: 'pagerank'
                    })
                    YIELD nodePropertiesWritten, ranIterations
                """)
        result = tx.run(query)
        #returns dataframe
        return result.data()

    @classmethod
    def pagerank(cls, tx) -> any:
        """
        @param cls is the class
        @param tx is the transaction
        @return result.data() is the data of the pagerank
        """
        query = ("""
                    Call gds.pageRank.stream('Wikipedia') 
                    YIELD nodeId, score RETURN gds.util.asNode(nodeId).id AS id, score AS pagerank
                    ORDER BY score DESC, id ASC
                """)
        result = tx.run(query)
        #return a dataframe
        return result.data() 


    def write_pagerank(self) -> any:
        """
        @param self
        @return result is the result of the memory estimation
        """
        result = self.driver.session().write_transaction(self.write)
        return pd.DataFrame(result)


    def run_pagerank(self) -> any:
        """
        @param self
        @return result is the dataframe from the pagerank
        """
        result = self.driver.session().write_transaction(self.pagerank)
        return pd.DataFrame(result)

In [12]:
hello = pagerank(driver)
data = hello.run_pagerank()
data.head()


Unnamed: 0,id,pagerank
0,United_States,42.074772
1,France,28.178751
2,Europe,27.807074
3,United_Kingdom,27.385292
4,English_language,21.327554


In [13]:
data.tail()

Unnamed: 0,id,pagerank
4599,Yellowhammer,0.15
4600,Yotsuya_Kaidan,0.15
4601,You%27re_Still_the_One,0.15
4602,Yungay%2C_Peru,0.15
4603,Zara_Yaqob,0.15
