In [1]:
import pandas as pd
import numpy as np

import os

import  csv

from time import sleep
from timeit import default_timer as timer

# custom general helper functions for this project
import custom_utils as cu
import importlib

In [2]:
# reload imports as needed
importlib.reload(cu);

In [3]:
# set up Pandas options
pd.set_option('display.max_columns', 25)
pd.set_option('display.max_rows', 100)
pd.set_option('display.precision', 3)
pd.options.display.float_format = '{:.2f}'.format

In [4]:
from py2neo import authenticate, Graph, Node, Relationship

In [5]:
# To avoid typing neo4j password into the notebook each time,
# I'm saving it in a separate file and reading it in with the helper function below.
def read_n4jpass():
    """Reads neo4j connection credentials from .n4jpass file in current folder.
    Expects one value per line, ignores comments, e.g.:
    # comments here
    user=neo4j
    password=secretStuff123
    """
    
    cur_folder = os.getcwd()
    
    with open(cur_folder + '/.n4jpass', 'r') as f:
        lines = f.readlines()

    d = {}
    for l in lines:
        if l.strip() and (l[0] != '#'):
            k, v = l.strip().split('=')
            d[k] = v

    return d

In [6]:
n4j_cred = read_n4jpass()

In [7]:
# set up authentication parameters
authenticate("localhost:7474", n4j_cred["user"], n4j_cred["password"])

In [8]:
# connect to authenticated graph database
graph = Graph("http://localhost:7474/db/data/")

In [9]:
# test query
r = graph.data('CALL db.indexes;')
pd.DataFrame(r)

Unnamed: 0,description,failureMessage,id,indexName,progress,properties,provider,state,tokenNames,type
0,INDEX ON :Article(title),,1,index_1,100.0,[title],"{'key': 'native-btree', 'version': '1.0'}",ONLINE,[Article],node_unique_property


### Network descriptive statistics

In [None]:
# number of nodes (cypher query)
match (a:Article)
 return count(a)
# output: 5185699

In [10]:
q = """
    match (a:Article)
    return count(a);
 """
r = graph.data(q)
pd.DataFrame(r)

Unnamed: 0,count(a)
0,5185699


In [12]:
# number of directional edges
q = """
    match ()-[r]->()
    return count(r);
 """
r = graph.data(q)
pd.DataFrame(r)

Unnamed: 0,count(r)
0,18361180


In [18]:
# total traffic volume in the graph
q = """
    match ()-[r]->()
    return sum(r.traffic);
 """
r = graph.data(q)
pd.DataFrame(r)

Unnamed: 0,sum(r.traffic)
0,1737027807


In [13]:
# number of directional LINK_TO edges
q = """
    match ()-[r:LINK_TO]->()
    return count(r);
 """
r = graph.data(q)
pd.DataFrame(r)

Unnamed: 0,count(r)
0,17851501


In [19]:
# link traffic volume in the graph
q = """
    match ()-[r:LINK_TO]->()
    return sum(r.traffic);
 """
r = graph.data(q)
pd.DataFrame(r)

Unnamed: 0,sum(r.traffic)
0,1718277936


In [14]:
# number of directional SEARCH_FOR edges
q = """
    match ()-[r:SEARCH_FOR]->()
    return count(r);
 """
r = graph.data(q)
pd.DataFrame(r)

Unnamed: 0,count(r)
0,509679


In [20]:
# search traffic volume in the graph
q = """
    match ()-[r:SEARCH_FOR]->()
    return sum(r.traffic);
 """
r = graph.data(q)
pd.DataFrame(r)

Unnamed: 0,sum(r.traffic)
0,18749871


In [16]:
# number of reciprocal LINK_TO/SEARCH_FOR relationships
q = """
    match p=(a)-[:LINK_TO]->(b)-[:SEARCH_FOR]->(a)
    return count(p);
 """
r = graph.data(q)
pd.DataFrame(r)


Unnamed: 0,count(p)
0,85226


In [21]:
# number of reciprocal LINK_TO relationships
q = """
    match p=(a)-[r1:LINK_TO]->(b)-[r2:LINK_TO]->(a)
    return count(p)/2;
 """
r = graph.data(q)
pd.DataFrame(r)

Unnamed: 0,count(p)/2
0,2959959


In [22]:
# number of reciprocal SEARCH_FOR relationships
q = """
    match p=(a)-[r1:SEARCH_FOR]->(b)-[r2:SEARCH_FOR]->(a)
    return count(p)/2;
 """
r = graph.data(q)
pd.DataFrame(r)

Unnamed: 0,count(p)/2
0,33236


### Calculate degrees

In [None]:
# cypher-shell query to calculate degrees and save them on nodes
MATCH (a:Article)
SET a.search_in_degree = size((a)<-[:SEARCH_FOR]-()),
    a.search_out_degree = size((a)-[:SEARCH_FOR]->()),
    a.link_in_degree = size((a)<-[:LINK_TO]-()),
    a.link_out_degree = size((a)-[:LINK_TO]->())
;
# 0 rows available after 75292 ms, consumed after another 0 ms
# Set 20742796 properties


In [None]:
# cypher-shell queries to calculate traffic volumes per node and save them on nodes

# in-search traffic
MATCH (a:Article)<-[si:SEARCH_FOR]-()
WITH a, SUM(si.traffic) AS s
SET a.search_in_traffic = s
;
# 0 rows available after 12231 ms, consumed after another 0 ms
# Set 249765 properties


# out-search traffic
MATCH (a:Article)-[so:SEARCH_FOR]->()
WITH a, SUM(so.traffic) AS s
SET a.search_out_traffic = s
;
# 0 rows available after 9980 ms, consumed after another 0 ms
# Set 237126 properties


# in-link traffic
MATCH (a:Article)<-[li:LINK_TO]-()
WITH a, SUM(li.traffic) AS s
SET a.link_in_traffic = s
;
# 0 rows available after 61065 ms, consumed after another 0 ms
# Set 2598301 properties



# out-link traffic
MATCH (a:Article)-[lo:LINK_TO]->()
WITH a, SUM(lo.traffic) AS s
SET a.link_out_traffic = s
;
# 0 rows available after 59575 ms, consumed after another 0 ms
# Set 1703704 properties



#### Install Apoc procedures  
Instructions: https://neo4j-contrib.github.io/neo4j-apoc-procedures/#_installation_in_neo4j_server_docker  
**Note:** Currently, those instructions link to Apoc 3.4.x, which is not compatible with the latest neo4j version, which is 3.5.x. Instead, check the compatibility matrix [here](https://github.com/neo4j-contrib/neo4j-apoc-procedures#version-compatibility-matrix) and download the appropriate Apoc version, or go to https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/ to get the latest Apoc release at the top.

#### Install Algo procedures  
Instructions: https://neo4j.com/docs/graph-algorithms/current/introduction/#_installation 

In [11]:
# get a listing of all labels and properties
q = """call apoc.meta.data();"""
r = graph.data(q)
graph_metadata = pd.DataFrame(r)

In [12]:
graph_metadata = graph_metadata[["label", "elementType", "index", "property", "type", "unique"]]

In [13]:
graph_metadata

Unnamed: 0,label,elementType,index,property,type,unique
0,LINK_TO,relationship,False,Article,RELATIONSHIP,False
1,LINK_TO,relationship,False,traffic,INTEGER,False
2,SEARCH_FOR,relationship,False,Article,RELATIONSHIP,False
3,SEARCH_FOR,relationship,False,traffic,INTEGER,False
4,Article,node,False,LINK_TO,RELATIONSHIP,False
5,Article,node,False,search_traffic_to_hyphen_minus,INTEGER,False
6,Article,node,False,search_traffic_to_main_page,INTEGER,False
7,Article,node,False,external_website_traffic,INTEGER,False
8,Article,node,False,search_in_traffic,INTEGER,False
9,Article,node,False,search_out_traffic,INTEGER,False


Many of the calculated and imported numerical properties on the nodes are either null or positive. The null values would not be counted in aggregate stats, so let's replace them with 0s.

In [32]:
article_properties_to_update = graph_metadata[(graph_metadata.label == "Article") \
                                              & (graph_metadata["type"] == "INTEGER") \
                                              & (graph_metadata["property"].str[-6:] != "degree")
                                             ]["property"].values

article_properties_to_update

array(['search_traffic_to_hyphen_minus', 'search_traffic_to_main_page',
       'external_website_traffic', 'search_in_traffic',
       'search_out_traffic', 'search_traffic_from_main_page',
       'link_in_traffic', 'link_out_traffic', 'unknown_external_traffic',
       'external_search_traffic', 'empty_referer_traffic',
       'other_wikimedia_traffic'], dtype=object)

In [53]:
start_time = timer()
for p in article_properties_to_update:
    q = """
    MATCH (a:Article) 
    WHERE a.%s is null
    SET a.%s = 0;
    """ %(p,p)
    
    r = graph.run(q)
    
    print("Ran query:", q)
    print("Elapsed time:", round((timer() - start_time)/60, 2), "min\n")
    

cu.printRunTime(start_time)

Ran query: 
    MATCH (a:Article) 
    WHERE a.link_in_traffic is null
    SET a.link_in_traffic = 0;
    
Elapsed time: 0.9 min

Ran query: 
    MATCH (a:Article) 
    WHERE a.link_out_traffic is null
    SET a.link_out_traffic = 0;
    
Elapsed time: 1.95 min

Ran query: 
    MATCH (a:Article) 
    WHERE a.external_search_traffic is null
    SET a.external_search_traffic = 0;
    
Elapsed time: 2.56 min

Ran query: 
    MATCH (a:Article) 
    WHERE a.empty_referer_traffic is null
    SET a.empty_referer_traffic = 0;
    
Elapsed time: 2.66 min

Ran query: 
    MATCH (a:Article) 
    WHERE a.other_wikimedia_traffic is null
    SET a.other_wikimedia_traffic = 0;
    
Elapsed time: 3.82 min

Ran query: 
    MATCH (a:Article) 
    WHERE a.unknown_external_traffic is null
    SET a.unknown_external_traffic = 0;
    
Elapsed time: 5.4 min

Ran query: 
    MATCH (a:Article) 
    WHERE a.external_website_traffic is null
    SET a.external_website_traffic = 0;
    
Elapsed time: 6.84 min

Ran

Runtime: 16.01 min



In [33]:
# undo the above 
start_time = timer()
for p in article_properties_to_update:
    q = """
    MATCH (a:Article) 
    WHERE a.%s = 0
    REMOVE a.%s;
    """ %(p,p)
    
    r = graph.run(q)
    
    print("Ran query:", q)
    print("Elapsed time:", round((timer() - start_time)/60, 2), "min\n")
    

cu.printRunTime(start_time)

Ran query: 
    MATCH (a:Article) 
    WHERE a.search_traffic_to_hyphen_minus = 0
    REMOVE a.search_traffic_to_hyphen_minus;
    
Elapsed time: 1.47 min

Ran query: 
    MATCH (a:Article) 
    WHERE a.search_traffic_to_main_page = 0
    REMOVE a.search_traffic_to_main_page;
    
Elapsed time: 3.74 min

Ran query: 
    MATCH (a:Article) 
    WHERE a.external_website_traffic = 0
    REMOVE a.external_website_traffic;
    
Elapsed time: 5.44 min

Ran query: 
    MATCH (a:Article) 
    WHERE a.search_in_traffic = 0
    REMOVE a.search_in_traffic;
    
Elapsed time: 6.83 min

Ran query: 
    MATCH (a:Article) 
    WHERE a.search_out_traffic = 0
    REMOVE a.search_out_traffic;
    
Elapsed time: 8.65 min

Ran query: 
    MATCH (a:Article) 
    WHERE a.search_traffic_from_main_page = 0
    REMOVE a.search_traffic_from_main_page;
    
Elapsed time: 10.71 min

Ran query: 
    MATCH (a:Article) 
    WHERE a.link_in_traffic = 0
    REMOVE a.link_in_traffic;
    
Elapsed time: 11.47 min

Ran qu

Runtime: 16.15 min



In [39]:
import sys
sys.version_info

sys.version_info(major=3, minor=5, micro=2, releaselevel='final', serial=0)

#### Calculate descriptive stats for numeric node properties

In [14]:
article_num_properties = graph_metadata[(graph_metadata.label == "Article") \
                                              & (graph_metadata["type"] == "INTEGER")
                                             ]["property"].values

article_num_properties

array(['search_traffic_to_hyphen_minus', 'search_traffic_to_main_page',
       'external_website_traffic', 'search_in_traffic',
       'search_out_traffic', 'search_traffic_from_main_page',
       'link_out_degree', 'link_in_traffic', 'link_out_traffic',
       'unknown_external_traffic', 'external_search_traffic',
       'search_in_degree', 'search_out_degree', 'link_in_degree',
       'empty_referer_traffic', 'other_wikimedia_traffic'], dtype=object)

In [34]:
start_time = timer()
for i, p in enumerate(article_num_properties):
    q = """
    MATCH (a:Article) 
    RETURN 
        count(a.{param}) as count,
        avg(a.{param}) as avg,
        min(a.{param}) as min,
        percentileDisc(a.{param}, 0.25) as quartile_1,
        percentileDisc(a.{param}, 0.5) as median,
        percentileDisc(a.{param}, 0.75) as quartile_3,
        max(a.{param}) as max,
        sum(a.{param}) as sum
        ;
    """.format(param=p)
    
    r = graph.data(q)
    row_df = pd.DataFrame(r)
    row_df["article_property"] = p
    row_df.set_index("article_property", inplace=True)
    
    if i==0:
        article_prop_descriptive_stats = row_df
    else:
        article_prop_descriptive_stats = pd.concat([article_prop_descriptive_stats, row_df])
    
    
    print("Ran query:", q)
    print("Elapsed time:", round((timer() - start_time)/60, 2), "min\n")
    

cu.printRunTime(start_time)

article_prop_descriptive_stats

Ran query: 
    MATCH (a:Article) 
    RETURN 
        count(a.search_traffic_to_hyphen_minus) as count,
        avg(a.search_traffic_to_hyphen_minus) as avg,
        min(a.search_traffic_to_hyphen_minus) as min,
        percentileDisc(a.search_traffic_to_hyphen_minus, 0.25) as quartile_1,
        percentileDisc(a.search_traffic_to_hyphen_minus, 0.5) as median,
        percentileDisc(a.search_traffic_to_hyphen_minus, 0.75) as quartile_3,
        max(a.search_traffic_to_hyphen_minus) as max,
        sum(a.search_traffic_to_hyphen_minus) as sum
        ;
    
Elapsed time: 0.49 min

Ran query: 
    MATCH (a:Article) 
    RETURN 
        count(a.search_traffic_to_main_page) as count,
        avg(a.search_traffic_to_main_page) as avg,
        min(a.search_traffic_to_main_page) as min,
        percentileDisc(a.search_traffic_to_main_page, 0.25) as quartile_1,
        percentileDisc(a.search_traffic_to_main_page, 0.5) as median,
        percentileDisc(a.search_traffic_to_main_page, 0.75) as 

Runtime: 9.03 min



Unnamed: 0_level_0,avg,count,max,median,min,quartile_1,quartile_3,sum
article_property,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
search_traffic_to_hyphen_minus,37.03,127457,29570,19,10,13,34,4719628
search_traffic_to_main_page,36.49,110009,40893,18,10,12,32,4013760
external_website_traffic,148.27,787311,17676430,27,10,15,66,116732857
search_in_traffic,75.07,249765,79452,24,10,13,53,18749871
search_out_traffic,79.07,237126,83036,23,10,13,53,18749871
search_traffic_from_main_page,125.98,257794,2838090,24,10,14,56,32475706
link_out_degree,3.44,5185699,2224,0,0,0,1,17851501
link_in_traffic,661.31,2598301,1217004,79,10,26,302,1718277936
link_out_traffic,1008.55,1703704,4466936,86,10,26,359,1718277936
unknown_external_traffic,98.61,374921,182358,27,10,15,66,36972380


In [38]:
article_degree_properties = [s for s in article_num_properties if s[-6:] == "degree" ]

# [s for s in list if sub in s]

article_degree_properties

['link_out_degree', 'search_in_degree', 'search_out_degree', 'link_in_degree']

In [39]:
# same stats for non-zero article node degree values 

article_degree_properties = [s for s in article_num_properties if s[-6:] == "degree" ]

start_time = timer()
for i, p in enumerate(article_degree_properties):
    q = """
    MATCH (a:Article) 
    WHERE a.{param} > 0
    RETURN 
        count(a.{param}) as count_pos,
        avg(a.{param}) as avg_pos,
        min(a.{param}) as min_pos,
        percentileDisc(a.{param}, 0.25) as quartile_1_pos,
        percentileDisc(a.{param}, 0.5) as median_pos,
        percentileDisc(a.{param}, 0.75) as quartile_3_pos,
        max(a.{param}) as max_pos,
        sum(a.{param}) as sum_pos
        ;
    """.format(param=p)
    
    r = graph.data(q)
    row_df = pd.DataFrame(r)
    row_df["article_property"] = p
    row_df.set_index("article_property", inplace=True)
    
    if i==0:
        descriptive_stats_nonzero_degree_props = row_df
    else:
        descriptive_stats_nonzero_degree_props = pd.concat([descriptive_stats_nonzero_degree_props, row_df])
    
    
    print("Ran query:", q)
    print("Elapsed time:", round((timer() - start_time)/60, 2), "min\n")
    

cu.printRunTime(start_time)

descriptive_stats_nonzero_degree_props

Ran query: 
    MATCH (a:Article) 
    WHERE a.link_out_degree > 0
    RETURN 
        count(a.link_out_degree) as count_pos,
        avg(a.link_out_degree) as avg_pos,
        min(a.link_out_degree) as min_pos,
        percentileDisc(a.link_out_degree, 0.25) as quartile_1_pos,
        percentileDisc(a.link_out_degree, 0.5) as median_pos,
        percentileDisc(a.link_out_degree, 0.75) as quartile_3_pos,
        max(a.link_out_degree) as max_pos,
        sum(a.link_out_degree) as sum_pos
        ;
    
Elapsed time: 0.26 min

Ran query: 
    MATCH (a:Article) 
    WHERE a.search_in_degree > 0
    RETURN 
        count(a.search_in_degree) as count_pos,
        avg(a.search_in_degree) as avg_pos,
        min(a.search_in_degree) as min_pos,
        percentileDisc(a.search_in_degree, 0.25) as quartile_1_pos,
        percentileDisc(a.search_in_degree, 0.5) as median_pos,
        percentileDisc(a.search_in_degree, 0.75) as quartile_3_pos,
        max(a.search_in_degree) as max_pos,
        s

Runtime: 0.8 min



Unnamed: 0_level_0,avg_pos,count_pos,max_pos,median_pos,min_pos,quartile_1_pos,quartile_3_pos,sum_pos
article_property,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
link_out_degree,10.48,1703704,2224,3,1,1,9,17851501
search_in_degree,2.04,249765,710,1,1,1,2,509679
search_out_degree,2.15,237126,3775,1,1,1,2,509679
link_in_degree,6.87,2598301,6368,2,1,1,6,17851501


### Connected Components / Union Find

In [41]:
!pwd

/home/arinai/Wikipedia_clickstream_project


In [58]:
s = """CALL algo.unionFind(
    'MATCH (a:Article) RETURN id(a) as id', 
    'MATCH (a1:Article)-->(a2:Article) RETURN id(a1) as source, id(a2) as target', 
    {partitionProperty: 'cc_partition', graph: 'cypher'})
YIELD loadMillis, computeMillis, writeMillis, nodes, communityCount, p1, p5, p10, p25, p50, p75, p90, p95, p99, p100;
"""

with open("cc.cypher", "w") as f:
    f.write(s)

In [62]:
# run in terminal with nohup
passwd=$(bash get_n4j_pass.sh)
nohup cat cc.cypher | cypher-shell -u neo4j -p $passwd --format plain &> cc.out
# runtime: 2.5 min


In [65]:
!head cc.out

loadMillis, computeMillis, writeMillis, nodes, communityCount, p1, p5, p10, p25, p50, p75, p90, p95, p99, p100
94000, 619, 58132, 5185699, 2432266, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2736127


In [None]:
MATCH (a:Article)
RETURN count(distinct a.cc_partition);
# 2432266

In [None]:
MATCH (a:Article)
RETURN a.cc_partition, count(a) as count
ORDER BY count desc;

### Strongly connected components

In [None]:
# run in cypher-shell
CALL algo.scc('Article', 'LINK_TO', {partitionProperty:'link_scc_partition'})
YIELD loadMillis, computeMillis, writeMillis, setCount, maxSetSize, minSetSize;
# 
+-------------------------------------------------------------------------------+
| loadMillis | computeMillis | writeMillis | setCount | maxSetSize | minSetSize |
+-------------------------------------------------------------------------------+
| 21938      | 1300387       | 676         | 3759767  | 1409023    | 1          |
+-------------------------------------------------------------------------------+

# 1 row available after 1380941 ms, consumed after another 0 ms

In [None]:
MATCH (a:Article)
RETURN a.link_scc_partition, count(a) as count
ORDER BY count desc
LIMIT 20;


match (a:Article)
where a.link_scc_partition = 1882533
return a;

In [None]:
MATCH (a:Article)
RETURN 
    a.link_scc_partition, 
    count(a) as articles_count,
    sum(a.external_website_traffic) 
        + sum(a.search_in_traffic) 
        + sum(a.search_traffic_from_main_page) 
        + sum(a.link_in_traffic) 
        + sum(a.unknown_external_traffic)
        + sum(a.external_search_traffic)
        + sum(a.empty_referer_traffic)
        + sum(a.other_wikimedia_traffic)
        as total_visits,
    sum(a.link_in_degree) as link_edges_count,
    sum(a.link_in_traffic) as link_traffic,
    sum(a.search_in_degree) as search_edges_count,
    sum(a.search_in_traffic) as search_traffic
ORDER BY articles_count desc
LIMIT 20;

In [None]:
'external_website_traffic', 'search_in_traffic',
'search_traffic_from_main_page',
'link_in_traffic',
'unknown_external_traffic', 'external_search_traffic',
'empty_referer_traffic', 'other_wikimedia_traffic'

In [1]:
!pwd

/home/arinai/Wikipedia_clickstream_project


In [12]:
s = """CALL algo.scc(
    'MATCH (a:Article) RETURN id(a) as id', 
    'MATCH (a1:Article)-->(a2:Article) RETURN id(a1) as source, id(a2) as target', 
    {partitionProperty: 'scc_partition', graph: 'cypher'})
YIELD loadMillis, computeMillis, writeMillis, setCount, maxSetSize, minSetSize;
"""

f = open("scc.cypher", "w")
f.write(s)

283

In [13]:
!head scc.cypher


CALL algo.scc(
    'MATCH (a:Article) RETURN id(a) as id', 
    'MATCH (a1:Article)-->(a2:Article) RETURN id(a1) as source, id(a2) as target', 
    {partitionProperty: 'scc_partition', graph: 'cypher'})
YIELD loadMillis, computeMillis, writeMillis, setCount, maxSetSize, minSetSize;


In [None]:
# run in terminal with nohup
passwd=$(bash get_n4j_pass.sh)
nohup cat scc.cypher | cypher-shell -u neo4j -p $passwd --format plain &> scc.out
# runtime: about 32 min

In [39]:
! nohup echo "test2" >> test.out 2>&1

In [40]:
!head test.out

test2
test2
nohup: ignoring input
test2
nohup: ignoring input
test2


### Centrality

In [None]:
# Running these in cypher-shell, since they might take a while


# PageRank

CALL algo.pageRank('Article', 'LINK_TO', {writeProperty:"link_pagerank", weightProperty: "traffic"});
# 1 row available after 147133 ms, consumed after another 0 ms


CALL algo.pageRank('Article', 'SEARCH_FOR', {writeProperty:"search_pagerank", weightProperty: "traffic"});
# 1 row available after 68307 ms, consumed after another 0 ms


CALL algo.pageRank(
  'MATCH (a:Article) RETURN id(a) as id',
  'MATCH (a1:Article)-->(a2:Article) RETURN id(a1) as source, id(a2) as target',
  {graph:'cypher', writeProperty:"pagerank", weightProperty: "traffic"}
);
# 1 row available after 283603 ms, consumed after another 0 ms






# ArticleRank

CALL algo.articleRank('Article', 'LINK_TO', {writeProperty:"link_artrank", weightProperty: "traffic"});
# 1 row available after 75048 ms, consumed after another 0 ms


CALL algo.articleRank('Article', 'SEARCH_FOR', {writeProperty:"search_artrank", weightProperty: "traffic"});
# 1 row available after 71285 ms, consumed after another 0 ms


CALL algo.articleRank(
  'MATCH (a:Article) RETURN id(a) as id',
  'MATCH (a1:Article)-->(a2:Article) RETURN id(a1) as source, id(a2) as target',
  {graph:'cypher', writeProperty:"artrank", weightProperty: "traffic"}
);
# 1 row available after 134921 ms, consumed after another 1 ms



# Betweenness: RA-Brandes algo


# the below took too long to run
CALL algo.betweenness.sampled('Article', 'LINK_TO',
  {direction:'out', strategy:'degree', writeProperty:'link_out_betweenness'})
YIELD nodes, minCentrality, maxCentrality, sumCentrality, loadMillis, computeMillis, writeMillis;


# the unsampled betweenness below took too long to run

CALL algo.betweenness('Article', 'LINK_TO',
  {direction:'out', writeProperty:'link_out_betweenness'})
YIELD nodes, minCentrality, maxCentrality, sumCentrality, loadMillis, computeMillis, writeMillis;


CALL algo.betweenness('Article', 'LINK_TO',
  {direction:'in', writeProperty:'link_in_betweenness'})
YIELD nodes, minCentrality, maxCentrality, sumCentrality, loadMillis, computeMillis, writeMillis;


CALL algo.betweenness('Article', 'LINK_TO',
  {direction:'both', writeProperty:'link_betweenness'})
YIELD nodes, minCentrality, maxCentrality, sumCentrality, loadMillis, computeMillis, writeMillis;





# Harmonic centrality (variant of closeness)

CALL algo.closeness.harmonic('Article', 'LINK_TO', {writeProperty:'link_harmonic_centrality'})
YIELD nodes, loadMillis, computeMillis, writeMillis;








### Top node listings

### Community detection