# Centralities (PageRank, Harmonic, Closeness) behaviour on Spotify Graph and SGC model

### Configuration steps

In [None]:
import logging
import sys
import os
from featgraph.jwebgraph import start_jvm

jvm_path = None

logging.basicConfig(
  level=logging.INFO,
  format='%(asctime)s %(name)-12s %(levelname)-8s: %(message)s',
  datefmt='%Y-%m-%d %H:%M:%S',
)


do_install = True
package_root = ".."

if do_install:
  cmd = "{} -m pip install -U {} --use-feature=in-tree-build".format(
    sys.executable,
    package_root,
  )
  logging.info(cmd)
  os.system(cmd)
  cmd = "{} -m pip install -Ur {}".format(
    sys.executable,
    os.path.join(package_root, "notebooks-requirements.txt"),
  )
  logging.info(cmd)
  os.system(cmd)


start_jvm(jvm_path=jvm_path)

### Loading the Spotify graph

In [None]:
# import Spotify graph
import featgraph.jwebgraph.utils
from featgraph import jwebgraph

spotify_basename = "graphs/spotify-2018"
graph = jwebgraph.utils.BVGraph(spotify_basename)
print(graph)

for r in (
  "graph", "properties", "ids.txt"
):
  if not os.path.isfile(graph.path(r)):
    raise FileNotFoundError(graph.path(r))

graph.reconstruct_offsets()
from featgraph.misc import pretty_print_int

print("Graph '{}' has\n{:>11} nodes\n{:>11} arcs".format(
  graph.basename,
  pretty_print_int(graph.numNodes()),
  pretty_print_int(graph.numArcs()),
))

### Creating an instance of the SGC model

In [None]:
## create sgc graph in BVgraph format and store it
# from featgraph import sgc
# nx_basepath = "graph_SGC/SGCexampleNX"
# bv_basepath = "graph_SGC/SGCexampleBV"
# seed = 42
# model = sgc.SGCModel(
#     n_celeb=160,
#     n_leader=160,
#     n_masses=100000,
# )
# nxgraph = model(seed=seed)
# sgc.to_bv(nxgraph, bv_basepath)
# sgc_graph = jwebgraph.utils.BVGraph(bv_basepath)

### Loading the SGC graph

In [None]:
# loading the SGC graph
import featgraph.jwebgraph.utils
from featgraph import jwebgraph

sgc_basename = "graph_SGC/SGCexampleBV"
sgc_graph = jwebgraph.utils.BVGraph(sgc_basename)
print(sgc_graph)

for r in (
  "graph", "properties"#, "ids.txt"
):
  if not os.path.isfile(sgc_graph.path(r)):
    raise FileNotFoundError(sgc_graph.path(r))

sgc_graph.reconstruct_offsets()
from featgraph.misc import pretty_print_int

print("Graph '{}' has\n{:>11} nodes\n{:>11} arcs".format(
  sgc_graph.basename,
  pretty_print_int(sgc_graph.numNodes()),
  pretty_print_int(sgc_graph.numArcs()),
))

# Test on Spotify graph

### Generating subgraphs for different popularity thresholds

In [None]:
# Generating the subgraphs for different popularity thresholds
import numpy as np

thresh = np.arange(0, 81, 5)  # [0, 20, 30, 34, 35, 60]
type_filt = 'popularity'
dest_path = 'graphs/spotify-2018'
missing_value = -20
for t in thresh:  # per ogni soglia 
    print(t)
    # genero sottografo per soglia 
    subgraph_path = dest_path + '.mapped-' + type_filt + '-' + str(t)
    map_pop = list(map(lambda p: p > t, graph.popularity(missing_value)))
    subgraph = graph.transform_map(subgraph_path, map_pop, "spotify")
    # print("Subgraph generated")
    # print(subgraph.numNodes())

### Loading subgraphs for different popularity thresholds and computing mean centralities for supergenres

In [None]:
import itertools
import numpy as np
from tqdm import tqdm

thresh = np.arange(0, 81, 5)  # [0, 20, 30, 34, 35, 60]
dest_path = 'graphs/spotify-2018'

hip_hop_cent = {'hc': [],
                'cc': [],
                'pr': []}
classical_cent = {'hc': [],
                  'cc': [],
                  'pr': []}
rock_cent = {'hc': [],
                'cc': [],
                'pr': []}

hip_hop_cent_n = {'hc': [],
                'cc': [],
                'pr': [],
                'pra': []}
classical_cent_n = {'hc': [],
                  'cc': [],
                  'pr': []}
rock_cent_n = {'hc': [],
                'cc': [],
                'pr': []}
hiphop_hc = []
classical_hc = []
rock_hc = []

for t in thresh:  # per ogni soglia
    # carico grafo
    basename = dest_path + '.mapped-' + 'popularity' + '-' + str(t)
    subgraph = jwebgraph.utils.BVGraph(basename)
    subgraph.reconstruct_offsets()
    
    # compute harmonic
    subgraph.compute_transpose()
    subgraph.compute_harmonicc()
    subgraph.compute_pagerank()
    subgraph.compute_closenessc()
    
    # select harmonic centralities for a specific genre: hip-hop
    map_gen = map(lambda p: 'hip-hop' in p, subgraph.supergenre())
    hc_gen = list(itertools.compress(subgraph.harmonicc(), map_gen))
    hc_gen_list = list(hc_gen)
    hip_hop_cent["hc"].append(sum(hc_gen_list)/len(hc_gen_list))
    hip_hop_cent_n["hc"].append((sum(hc_gen_list)/len(hc_gen_list))/subgraph.numNodes())
    # hiphop_hc.append(hc_gen_list)

    # select harmonic centralities for a specific genre: classical
    map_gen = map(lambda p: 'classical' in p, subgraph.supergenre())
    hc_gen = list(itertools.compress(subgraph.harmonicc(), map_gen))
    hc_gen_list = list(hc_gen)
    classical_cent["hc"].append(sum(hc_gen_list)/len(hc_gen_list))
    classical_cent_n["hc"].append((sum(hc_gen_list)/len(hc_gen_list))/subgraph.numNodes())
    # classical_hc.append(hc_gen_list)
    
    # select harmonic centralities for a specific genre: rock
    map_gen = map(lambda p: 'rock' in p, subgraph.supergenre())
    hc_gen = list(itertools.compress(subgraph.harmonicc(), map_gen))
    hc_gen_list = list(hc_gen)
    rock_cent["hc"].append(sum(hc_gen_list)/len(hc_gen_list))
    rock_cent_n["hc"].append((sum(hc_gen_list)/len(hc_gen_list))/subgraph.numNodes())
    # rock_hc.append(hc_gen_list)
    
    # select closeness centralities for a specific genre: hip-hop
    map_gen = map(lambda p: 'hip-hop' in p, subgraph.supergenre())
    cc_gen = list(itertools.compress(subgraph.closenessc(), map_gen))
    cc_gen_list = list(cc_gen)
    hip_hop_cent["cc"].append(sum(cc_gen_list)/len(cc_gen_list))
    hip_hop_cent_n["cc"].append((sum(cc_gen_list)/len(cc_gen_list))/subgraph.numNodes())

    # select closeness centralities for a specific genre: classical
    map_gen = map(lambda p: 'classical' in p, subgraph.supergenre())
    cc_gen = list(itertools.compress(subgraph.closenessc(), map_gen))
    cc_gen_list = list(cc_gen)
    classical_cent["cc"].append(sum(cc_gen_list)/len(cc_gen_list))
    classical_cent_n["cc"].append((sum(cc_gen_list)/len(cc_gen_list))/subgraph.numNodes())
    
    # select closeness centralities for a specific genre: rock
    map_gen = map(lambda p: 'rock' in p, subgraph.supergenre())
    cc_gen = list(itertools.compress(subgraph.closenessc(), map_gen))
    cc_gen_list = list(cc_gen)
    rock_cent["cc"].append(sum(cc_gen_list)/len(cc_gen_list))
    rock_cent_n["cc"].append((sum(cc_gen_list)/len(cc_gen_list))/subgraph.numNodes())

    # select pagerank centralities for a specific genre: hip-hop
    map_gen = map(lambda p: 'hip-hop' in p, subgraph.supergenre())
    pr_gen = list(itertools.compress(subgraph.pagerank(), map_gen))
    pr_gen_list = list(pr_gen)
    hip_hop_cent["pr"].append(sum(pr_gen_list)/len(pr_gen_list))
    hip_hop_cent_n["pr"].append((sum(pr_gen_list)/len(pr_gen_list))/subgraph.numNodes())

    # select pagerank centralities for a specific genre: classical
    map_gen = map(lambda p: 'classical' in p, subgraph.supergenre())
    pr_gen = list(itertools.compress(subgraph.pagerank(), map_gen))
    pr_gen_list = list(pr_gen)
    classical_cent["pr"].append(sum(pr_gen_list)/len(pr_gen_list))
    classical_cent_n["pr"].append((sum(pr_gen_list)/len(pr_gen_list))/subgraph.numNodes())

    # select pagerank centralities for a specific genre: rock
    map_gen = map(lambda p: 'rock' in p, subgraph.supergenre())
    pr_gen = list(itertools.compress(subgraph.pagerank(), map_gen))
    pr_gen_list = list(pr_gen)
    rock_cent["pr"].append(sum(pr_gen_list)/len(pr_gen_list))
    rock_cent_n["pr"].append((sum(pr_gen_list)/len(pr_gen_list))/subgraph.numNodes())
    
    # select pagerank alpha
    #map_gen = map(lambda p: 'hip-hop' in p, subgraph.supergenre())
    #da = 0.1
    #alphas = np.linspace(da, 1, int(1/da - 1), endpoint=False)
    #for i, a in enumerate(tqdm(alphas)):
    #    subgraph.compute_pagerank(a)
    #    pra_gen = list(itertools.compress(subgraph.pagerank(a), map_gen))
    #    pra_gen_list = list(pr_gen)
    #    hip_hop_cent["pra"].append(sum(pra_gen_list)/len(pra_gen_list))
    #    hip_hop_cent_n["pra"].append((sum(pra_gen_list)/len(pra_gen_list))/subgraph.numNodes())

In [None]:
import matplotlib.pyplot as plt

plt.plot(thresh, hip_hop_cent['hc'], label='hip_hop')
plt.plot(thresh, classical_cent['hc'], label='classical')
plt.plot(thresh, rock_cent['hc'], label='rock')
plt.legend()
plt.show()

In [None]:
plt.plot(thresh, hip_hop_cent_n['hc'], label='hip_hop')
plt.plot(thresh, classical_cent_n['hc'], label='classical')
plt.plot(thresh, rock_cent_n['hc'], label='rock')
plt.legend()
plt.show()

In [None]:
plt.plot(np.arange(0, 81, 5), hip_hop_cent['cc'],label='hip_hop')
plt.plot(np.arange(0, 81, 5), classical_cent['cc'], label='classical')
plt.plot(np.arange(0, 81, 5), rock_cent['cc'], label='rock')
plt.legend()
plt.show()

In [None]:
plt.plot(np.arange(0, 81, 5), hip_hop_cent_n['cc'], label='hip_hop')
plt.plot(np.arange(0, 81, 5), classical_cent_n['cc'], label='classical')
plt.plot(np.arange(0, 81, 5), rock_cent_n['cc'], label='rock')
plt.legend()
plt.show()

In [None]:
plt.plot(np.arange(0, 81, 5), hip_hop_cent['pr'], label='hip_hop')
plt.plot(np.arange(0, 81, 5), classical_cent['pr'], label='classical')
plt.plot(np.arange(0, 81, 5), rock_cent['pr'], label='rock')
plt.legend()
plt.show()

In [None]:
plt.plot(np.arange(0, 81, 5), hip_hop_cent_n['pr'], label='hip_hop')
plt.plot(np.arange(0, 81, 5), classical_cent_n['pr'], label='classical')
plt.plot(np.arange(0, 81, 5), rock_cent_n['pr'], label='rock')
plt.legend()
plt.show()

# Generate subgraph from filtering by popularity 

### Filtering on SGC model

In [None]:
# Generating the subgraphs for different popularity thresholds
import numpy as np

thresh = np.arange(0, 81, 5)  # [0, 20, 30, 34, 35, 60]
type_filt = 'popularity'
dest_path = 'graph_SGC/SGCexampleBV'
missing_value = -20

for t in thresh:  # per ogni soglia 
    # genero sottografo per soglia 
    sgc_subgraph_path = dest_path + '.mapped-' + type_filt + '-' + str(t)
    map_pop = list(map(lambda p: p > t, graph.popularity(missing_value)))
    sgc_subgraph = sgc_graph.transform_map(sgc_subgraph_path, map_pop, "sgc")
    print("Subgraph generated")
    # print(subgraph.numNodes())

In [None]:
import itertools
import numpy as np
from tqdm import tqdm

thresh = np.arange(0, 81, 5)  # [0, 20, 30, 34, 35, 60]
dest_path = 'graph_SGC/SGCexampleBV'

masses_cent = {'hc': [],
                'cc': [],
                'pr': []}
celeb_cent = {'hc': [],
                  'cc': [],
                  'pr': []}
commlead_cent = {'hc': [],
                'cc': [],
                'pr': []}

masses_cent_n = {'hc': [],
                'cc': [],
                'pr': [],
                'pra': []}
celeb_cent_n = {'hc': [],
                  'cc': [],
                  'pr': []}
commlead_cent_n = {'hc': [],
                'cc': [],
                'pr': []}


for t in thresh:  # per ogni soglia
    print(t)
    # carico grafo
    basename = dest_path + '.mapped-' + 'popularity' + '-' + str(t)
    sgc_subgraph = jwebgraph.utils.BVGraph(basename)
    sgc_subgraph.reconstruct_offsets()
    
    # compute harmonic
    sgc_subgraph.compute_transpose()
    sgc_subgraph.compute_harmonicc()
    sgc_subgraph.compute_pagerank()
    sgc_subgraph.compute_closenessc()
    
    # select harmonic centralities for a specific type: masses
    map_type = map(lambda p: 'masses' in p, sgc_subgraph.type_sgc())
    hc_type = list(itertools.compress(sgc_subgraph.harmonicc(), map_type))
    hc_type_list = list(hc_type)
    if len(hc_type_list) > 0:
        masses_cent["hc"].append(sum(hc_type_list)/len(hc_type_list))
        masses_cent_n["hc"].append((sum(hc_type_list)/len(hc_type_list))/sgc_subgraph.numNodes())
    
    # select harmonic centralities for a specific type: celeb
    map_type = map(lambda p: 'celeb' in p, sgc_subgraph.type_sgc())
    hc_type = list(itertools.compress(sgc_subgraph.harmonicc(), map_type))
    hc_type_list = list(hc_type)
    if len(hc_type_list) > 0:
        celeb_cent["hc"].append(sum(hc_type_list)/len(hc_type_list))
        celeb_cent_n["hc"].append((sum(hc_type_list)/len(hc_type_list))/sgc_subgraph.numNodes())
    
    # select harmonic centralities for a specific type: celeb
    map_type = map(lambda p: 'community leaders' in p, sgc_subgraph.type_sgc())
    hc_type = list(itertools.compress(sgc_subgraph.harmonicc(), map_type))
    hc_type_list = list(hc_type)
    if len(hc_type_list) > 0:
        commlead_cent["hc"].append(sum(hc_type_list)/len(hc_type_list))
        commlead_cent_n["hc"].append((sum(hc_type_list)/len(hc_type_list))/sgc_subgraph.numNodes())

    # select harmonic centralities for a specific type: masses
    map_type = map(lambda p: 'masses' in p, sgc_subgraph.type_sgc())
    cc_type = list(itertools.compress(sgc_subgraph.closenessc(), map_type))
    cc_type_list = list(cc_type)
    if len(cc_type_list) > 0:
        masses_cent["cc"].append(sum(cc_type_list)/len(cc_type_list))
        masses_cent_n["cc"].append((sum(cc_type_list)/len(cc_type_list))/sgc_subgraph.numNodes())
    
    # select harmonic centralities for a specific type: celeb
    map_type = map(lambda p: 'celeb' in p, sgc_subgraph.type_sgc())
    cc_type = list(itertools.compress(sgc_subgraph.closenessc(), map_type))
    cc_type_list = list(cc_type)
    if len(cc_type_list) > 0:
        celeb_cent["cc"].append(sum(cc_type_list)/len(cc_type_list))
        celeb_cent_n["cc"].append((sum(cc_type_list)/len(cc_type_list))/sgc_subgraph.numNodes())
    
    # select harmonic centralities for a specific type: celeb
    map_type = map(lambda p: 'community leaders' in p, sgc_subgraph.type_sgc())
    cc_type = list(itertools.compress(sgc_subgraph.closenessc(), map_type))
    cc_type_list = list(cc_type)
    if len(cc_type_list) > 0:
        commlead_cent["cc"].append(sum(cc_type_list)/len(cc_type_list))
        commlead_cent_n["cc"].append((sum(cc_type_list)/len(cc_type_list))/sgc_subgraph.numNodes())
    
    # select harmonic centralities for a specific type: masses
    map_type = map(lambda p: 'masses' in p, sgc_subgraph.type_sgc())
    pr_type = list(itertools.compress(sgc_subgraph.pagerank(), map_type))
    pr_type_list = list(pr_type)
    if len(pr_type_list) > 0:
        masses_cent["pr"].append(sum(pr_type_list)/len(pr_type_list))
        masses_cent_n["pr"].append((sum(pr_type_list)/len(pr_type_list))/sgc_subgraph.numNodes())
    
    # select harmonic centralities for a specific type: celeb
    map_type = map(lambda p: 'celeb' in p, sgc_subgraph.type_sgc())
    pr_type = list(itertools.compress(sgc_subgraph.pagerank(), map_type))
    pr_type_list = list(hc_type)
    if len(pr_type_list) > 0:
        celeb_cent["pr"].append(sum(pr_type_list)/len(pr_type_list))
        celeb_cent_n["pr"].append((sum(pr_type_list)/len(pr_type_list))/sgc_subgraph.numNodes())
    
    # select harmonic centralities for a specific type: celeb
    map_type = map(lambda p: 'community leaders' in p, sgc_subgraph.type_sgc())
    pr_type = list(itertools.compress(sgc_subgraph.pagerank(), map_type))
    pr_type_list = list(pr_type)
    if len(pr_type_list) > 0:
        commlead_cent["pr"].append(sum(pr_type_list)/len(pr_type_list))
        commlead_cent_n["pr"].append((sum(pr_type_list)/len(pr_type_list))/sgc_subgraph.numNodes())

In [None]:
import matplotlib.pyplot as plt

min_t = min(len(masses_cent['hc']), len(celeb_cent['hc']), len(commlead_cent['hc']))
new_thresh = np.arange(0, ((min_t - 1)*5) + 1, 5)
plt.plot(new_thresh, masses_cent['hc'][0:min_t], label='masses')
plt.plot(new_thresh, celeb_cent['hc'][0:min_t], label='celeb')
plt.plot(new_thresh, commlead_cent['hc'][0:min_t], label='commlead')
plt.legend()
plt.show()

In [None]:
min_t = min(len(masses_cent['hc']), len(celeb_cent['hc']), len(commlead_cent['hc']))
new_thresh = np.arange(0, ((min_t - 1)*5) + 1, 5)
plt.plot(new_thresh, masses_cent_n['hc'][0:min_t], label='masses')
plt.plot(new_thresh, celeb_cent_n['hc'][0:min_t], label='celeb')
plt.plot(new_thresh, commlead_cent_n['hc'][0:min_t], label='commlead')
plt.legend()
plt.show()

In [None]:
min_t = min(len(masses_cent['cc']), len(celeb_cent['cc']), len(commlead_cent['cc']))
new_thresh = np.arange(0, ((min_t - 1)*5) + 1, 5)
plt.plot(new_thresh, masses_cent['cc'][0:min_t], label='masses')
plt.plot(new_thresh, celeb_cent['cc'][0:min_t], label='celeb')
plt.plot(new_thresh, commlead_cent['cc'][0:min_t], label='commlead')
plt.legend()
plt.show()

In [None]:
min_t = min(len(masses_cent['cc']), len(celeb_cent['cc']), len(commlead_cent['cc']))
new_thresh = np.arange(0, ((min_t - 1)*5) + 1, 5)
plt.plot(new_thresh, masses_cent_n['cc'][0:min_t], label='masses')
plt.plot(new_thresh, celeb_cent_n['cc'][0:min_t], label='celeb')
plt.plot(new_thresh, commlead_cent_n['cc'][0:min_t], label='commlead')
plt.legend()
plt.show()

In [None]:
min_t = min(len(masses_cent['pr']), len(celeb_cent['pr']), len(commlead_cent['pr']))
new_thresh = np.arange(0, ((min_t - 1)*5) + 1, 5)
plt.plot(new_thresh, masses_cent['pr'][0:min_t], label='masses')
plt.plot(new_thresh, celeb_cent['pr'][0:min_t], label='celeb')
plt.plot(new_thresh, commlead_cent['pr'][0:min_t], label='commlead')
plt.legend()
plt.show()

In [None]:
min_t = min(len(masses_cent['pr']), len(celeb_cent['pr']), len(commlead_cent['pr']))
new_thresh = np.arange(0, ((min_t - 1)*5) + 1, 5)
plt.plot(new_thresh, masses_cent_n['pr'][0:min_t], label='masses')
plt.plot(new_thresh, celeb_cent_n['pr'][0:min_t], label='celeb')
plt.plot(new_thresh, commlead_cent_n['pr'][0:min_t], label='commlead')
plt.legend()
plt.show()

# below: OLD --------------------------------------------------------------------------

## Computing Centralities for threshold = 0

### PageRank

In [None]:
graph.compute_transpose()
bv_sgc.compute_transpose()

In [None]:
graph.compute_pagerank()
bv_sgc.compute_pagerank()

In [None]:
import featgraph

from matplotlib import pyplot as plt
import featgraph.plots

refnames = (
  "Ludwig van Beethoven",
  "Robert Farnon Orchestra",
  "Ed Sheeran",
  "Rick Ross",
  "Vulfpeck",
)
refidx = tuple(
  graph.artist(name=n).index
  for n in refnames
)

def scatter_refs(x, y, **kwargs):
  xs = x() if callable(x) else x
  xs = [xs[i] for i in refidx]
  ys = y() if callable(y) else y
  ys = [ys[i] for i in refidx]
  for xi, yi, ni in zip(xs, ys, refnames):
    plt.scatter(xi, yi, label=ni, **kwargs)
  plt.legend()

featgraph.plots.scatter(
  graph.indegrees, graph.pagerank,
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="in-degree", ylabel="pagerank",
)
scatter_refs(graph.indegrees, graph.pagerank)
plt.show()

In [None]:
from matplotlib import pyplot as plt
import featgraph.plots

bv_sgc.compute_degrees()

featgraph.plots.scatter(
  bv_sgc.indegrees, bv_sgc.pagerank,
  marker=".", c="k", alpha=2**(-5),
  label=bv_sgc.basename, xlabel="in-degree", ylabel="pagerank",
)

plt.show()

### Harmonic centrality

In [None]:
graph.compute_harmonicc()
featgraph.plots.scatter(
  graph.pagerank, graph.harmonicc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="harmonic centrality",
)
scatter_refs(graph.pagerank, graph.harmonicc)
plt.show()

In [None]:
bv_sgc.compute_harmonicc()
featgraph.plots.scatter(
  bv_sgc.pagerank, bv_sgc.harmonicc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="harmonic centrality",
)
plt.show()

### PageRank for different alphas

In [None]:
from tqdm.notebook import tqdm
import numpy as np

da = 0.1
alphas = np.linspace(da, 1, int(1/da - 1), endpoint=False)
kt_hc_ranks_a = np.zeros(len(alphas))
for i, a in enumerate(tqdm(alphas)):
  graph.compute_pagerank(a)
  kt_hc_ranks_a[i] = jwebgraph.utils.kendall_tau(
    graph.pagerank(a), graph.harmonicc
  )
plt.plot(alphas, kt_hc_ranks_a, c="k")
plt.title(
  "{}\nCorrelation between Harmonic Centrality and PageRank".format(
    graph.basename
  )
)
plt.xlabel(r"PageRank $\alpha$")
plt.ylabel(r"Kendall $\tau$")
plt.xlim(*alphas[[0, -1]])
plt.show()

In [None]:
from tqdm.notebook import tqdm
import numpy as np

da = 0.1
alphas = np.linspace(da, 1, int(1/da - 1), endpoint=False)
kt_hc_ranks_a = np.zeros(len(alphas))
for i, a in enumerate(tqdm(alphas)):
  bv_sgc.compute_pagerank(a)
  kt_hc_ranks_a[i] = jwebgraph.utils.kendall_tau(
    bv_sgc.pagerank(a), bv_sgc.harmonicc
  )
plt.plot(alphas, kt_hc_ranks_a, c="k")
plt.title(
  "{}\nCorrelation between Harmonic Centrality and PageRank".format(
    bv_sgc.basename
  )
)
plt.xlabel(r"PageRank $\alpha$")
plt.ylabel(r"Kendall $\tau$")
plt.xlim(*alphas[[0, -1]])
plt.show()

### Closeness centrality

In [None]:
graph.compute_closenessc()
featgraph.plots.scatter(
  graph.pagerank, graph.closenessc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="closeness centrality",
)
scatter_refs(graph.pagerank, graph.closenessc)
plt.show()

In [None]:
bv_sgc.compute_closenessc()
featgraph.plots.scatter(
  bv_sgc.pagerank, bv_sgc.closenessc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="closeness centrality",
)
plt.show()

## Computing centralities for threshold = 46

### Filtering on Spotify graph

In [None]:
import itertools

missing_value = -20
map_pop = list(map(lambda p: p > 46, graph.popularity(missing_value)))
metrics_filtered_hc = list(itertools.compress(graph.harmonicc(), map_pop))
metrics_filtered_pr = list(itertools.compress(graph.pagerank(), map_pop))
metrics_filtered_indegrees = list(itertools.compress(graph.indegrees(), map_pop))
graph.compute_closenessc()
metrics_filtered_cc = list(itertools.compress(graph.closenessc(), map_pop))

### Filtering on SGC model

In [None]:
import itertools

missing_value = -20
map_pop_sgc = list(map(lambda p: p > 46, bv_sgc.popularity(missing_value)))
metrics_filtered_hc_sgc = list(itertools.compress(bv_sgc.harmonicc(), map_pop_sgc))
metrics_filtered_pr_sgc = list(itertools.compress(bv_sgc.pagerank(), map_pop_sgc))
metrics_filtered_indegrees_sgc = list(itertools.compress(bv_sgc.indegrees(), map_pop_sgc))
bv_sgc.compute_closenessc()
metrics_filtered_cc_sgc = list(itertools.compress(bv_sgc.closenessc(), map_pop_sgc))

### PageRank for threshold=46 - Spotify graph

In [None]:
featgraph.plots.scatter(
  metrics_filtered_indegrees, metrics_filtered_pr,
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="in-degree", ylabel="pagerank",
)
scatter_refs(graph.indegrees, graph.pagerank)
plt.show()

### PageRank for threshold=46 - SGC graph

In [None]:
featgraph.plots.scatter(
  metrics_filtered_indegrees_sgc, metrics_filtered_pr_sgc,
  marker=".", c="k", alpha=2**(-5),
  label=bv_sgc.basename, xlabel="in-degree", ylabel="pagerank",
)

plt.show()

### Harmonic Centrality for threshold=46 - Spotify graph

In [None]:
graph.compute_harmonicc()
featgraph.plots.scatter(
  metrics_filtered_pr, metrics_filtered_hc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="harmonic centrality",
)
scatter_refs(graph.pagerank, graph.harmonicc)
plt.show()

### Harmonic Centrality for threshold=46 - SGC graph

In [None]:
bv_sgc.compute_harmonicc()
featgraph.plots.scatter(
  metrics_filtered_pr_sgc, metrics_filtered_hc_sgc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="harmonic centrality",
)
plt.show()

### PageRank for different alphas threshold=46 - Spotify graph

In [None]:
from tqdm.notebook import tqdm
import numpy as np

da = 0.1
alphas = np.linspace(da, 1, int(1/da - 1), endpoint=False)
kt_hc_ranks_a = np.zeros(len(alphas))
for i, a in enumerate(tqdm(alphas)):
  graph.compute_pagerank(a)
  kt_hc_ranks_a[i] = jwebgraph.utils.kendall_tau(
    list(itertools.compress(graph.pagerank(a), map_pop)), metrics_filtered_hc
  )
plt.plot(alphas, kt_hc_ranks_a, c="k")
plt.title(
  "{}\nCorrelation between Harmonic Centrality and PageRank".format(
    graph.basename
  )
)
plt.xlabel(r"PageRank $\alpha$")
plt.ylabel(r"Kendall $\tau$")
plt.xlim(*alphas[[0, -1]])
plt.show()

### PageRank for different alphas threshold=46 - SGC graph

In [None]:
da = 0.1
alphas = np.linspace(da, 1, int(1/da - 1), endpoint=False)
kt_hc_ranks_a = np.zeros(len(alphas))
for i, a in enumerate(tqdm(alphas)):
  bv_sgc.compute_pagerank(a)
  kt_hc_ranks_a[i] = jwebgraph.utils.kendall_tau(
    list(itertools.compress(bv_sgc.pagerank(a), map_pop_sgc)), metrics_filtered_hc_sgc
  )
plt.plot(alphas, kt_hc_ranks_a, c="k")
plt.title(
  "{}\nCorrelation between Harmonic Centrality and PageRank".format(
    bv_sgc.basename
  )
)
plt.xlabel(r"PageRank $\alpha$")
plt.ylabel(r"Kendall $\tau$")
plt.xlim(*alphas[[0, -1]])
plt.show()

### Closeness centrality for threshold=46 - Spotify graph

In [None]:
featgraph.plots.scatter(
  metrics_filtered_pr, metrics_filtered_cc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="closeness centrality",
)
# scatter_refs(metrics_filtered_pr, metrics_filtered_cc)
plt.show()

### Closeness centrality for threshold=46 - SGC graph

In [None]:
featgraph.plots.scatter(
  metrics_filtered_pr_sgc, metrics_filtered_cc_sgc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="closeness centrality",
)
plt.show()

## Computing centralities for threshold = 47

### Filtering on Spotify graph

In [None]:
missing_value = -20
map_pop = list(map(lambda p: p > 47, graph.popularity(missing_value)))
metrics_filtered_hc = list(itertools.compress(graph.harmonicc(), map_pop))
metrics_filtered_pr = list(itertools.compress(graph.pagerank(), map_pop))
metrics_filtered_indegrees = list(itertools.compress(graph.indegrees(), map_pop))
graph.compute_closenessc()
metrics_filtered_cc = list(itertools.compress(graph.closenessc(), map_pop))

### Filtering on SGC model

In [None]:
missing_value = -20
map_pop_sgc = list(map(lambda p: p > 47, bv_sgc.popularity(missing_value)))
metrics_filtered_hc_sgc = list(itertools.compress(bv_sgc.harmonicc(), map_pop_sgc))
metrics_filtered_pr_sgc = list(itertools.compress(bv_sgc.pagerank(), map_pop_sgc))
metrics_filtered_indegrees_sgc = list(itertools.compress(bv_sgc.indegrees(), map_pop_sgc))
bv_sgc.compute_closenessc()
metrics_filtered_cc_sgc = list(itertools.compress(bv_sgc.closenessc(), map_pop_sgc))

### PageRank for threshold=47 - Spotify graph

In [None]:
featgraph.plots.scatter(
  metrics_filtered_indegrees, metrics_filtered_pr,
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="in-degree", ylabel="pagerank",
)
scatter_refs(graph.indegrees, graph.pagerank)
plt.show()

### PageRank for threshold=47 - SGC graph

In [None]:
featgraph.plots.scatter(
  metrics_filtered_indegrees_sgc, metrics_filtered_pr_sgc,
  marker=".", c="k", alpha=2**(-5),
  label=bv_sgc.basename, xlabel="in-degree", ylabel="pagerank",
)

plt.show()

### Harmonic Centrality for threshold=47 - Spotify graph

In [None]:
graph.compute_harmonicc()
featgraph.plots.scatter(
  metrics_filtered_pr, metrics_filtered_hc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="harmonic centrality",
)
scatter_refs(graph.pagerank, graph.harmonicc)
plt.show()

### Harmonic Centrality for threshold=47 - SGC graph

In [None]:
bv_sgc.compute_harmonicc()
featgraph.plots.scatter(
  metrics_filtered_pr_sgc, metrics_filtered_hc_sgc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="harmonic centrality",
)
plt.show()

### PageRank for different alphas threshold=47 - Spotify graph

In [None]:
from tqdm.notebook import tqdm
import numpy as np

da = 0.1
alphas = np.linspace(da, 1, int(1/da - 1), endpoint=False)
kt_hc_ranks_a = np.zeros(len(alphas))
for i, a in enumerate(tqdm(alphas)):
  graph.compute_pagerank(a)
  kt_hc_ranks_a[i] = jwebgraph.utils.kendall_tau(
    list(itertools.compress(graph.pagerank(a), map_pop)), metrics_filtered_hc
  )
plt.plot(alphas, kt_hc_ranks_a, c="k")
plt.title(
  "{}\nCorrelation between Harmonic Centrality and PageRank".format(
    graph.basename
  )
)
plt.xlabel(r"PageRank $\alpha$")
plt.ylabel(r"Kendall $\tau$")
plt.xlim(*alphas[[0, -1]])
plt.show()

### PageRank for different alphas threshold=47 - SGC graph

In [None]:
da = 0.1
alphas = np.linspace(da, 1, int(1/da - 1), endpoint=False)
kt_hc_ranks_a = np.zeros(len(alphas))
for i, a in enumerate(tqdm(alphas)):
  bv_sgc.compute_pagerank(a)
  kt_hc_ranks_a[i] = jwebgraph.utils.kendall_tau(
    list(itertools.compress(bv_sgc.pagerank(a), map_pop_sgc)), metrics_filtered_hc_sgc
  )
plt.plot(alphas, kt_hc_ranks_a, c="k")
plt.title(
  "{}\nCorrelation between Harmonic Centrality and PageRank".format(
    bv_sgc.basename
  )
)
plt.xlabel(r"PageRank $\alpha$")
plt.ylabel(r"Kendall $\tau$")
plt.xlim(*alphas[[0, -1]])
plt.show()

### Closeness centrality for threshold=47 - Spotify graph

In [None]:
featgraph.plots.scatter(
  metrics_filtered_pr, metrics_filtered_cc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="closeness centrality",
)
# scatter_refs(metrics_filtered_pr, metrics_filtered_cc)
plt.show()

### Closeness centrality for threshold=47 - SGC graph

In [None]:
featgraph.plots.scatter(
  metrics_filtered_pr_sgc, metrics_filtered_cc_sgc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="closeness centrality",
)
plt.show()

## Computing centralities for threshold = 60

### Filtering on Spotify graph

In [None]:
missing_value = -20
map_pop = list(map(lambda p: p > 60, graph.popularity(missing_value)))
metrics_filtered_hc = list(itertools.compress(graph.harmonicc(), map_pop))
metrics_filtered_pr = list(itertools.compress(graph.pagerank(), map_pop))
metrics_filtered_indegrees = list(itertools.compress(graph.indegrees(), map_pop))
graph.compute_closenessc()
metrics_filtered_cc = list(itertools.compress(graph.closenessc(), map_pop))

### Filtering on SGC model

In [None]:
missing_value = -20
map_pop_sgc = list(map(lambda p: p > 60, bv_sgc.popularity(missing_value)))
metrics_filtered_hc_sgc = list(itertools.compress(bv_sgc.harmonicc(), map_pop_sgc))
metrics_filtered_pr_sgc = list(itertools.compress(bv_sgc.pagerank(), map_pop_sgc))
metrics_filtered_indegrees_sgc = list(itertools.compress(bv_sgc.indegrees(), map_pop_sgc))
bv_sgc.compute_closenessc()
metrics_filtered_cc_sgc = list(itertools.compress(bv_sgc.closenessc(), map_pop_sgc))

### PageRank for threshold=60 - Spotify graph

In [None]:
featgraph.plots.scatter(
  metrics_filtered_indegrees, metrics_filtered_pr,
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="in-degree", ylabel="pagerank",
)
scatter_refs(graph.indegrees, graph.pagerank)
plt.show()

### PageRank for threshold=60 - SGC graph

In [None]:
featgraph.plots.scatter(
  metrics_filtered_indegrees_sgc, metrics_filtered_pr_sgc,
  marker=".", c="k", alpha=2**(-5),
  label=bv_sgc.basename, xlabel="in-degree", ylabel="pagerank",
)

plt.show()

### Harmonic Centrality for threshold=60 - Spotify graph

In [None]:
graph.compute_harmonicc()
featgraph.plots.scatter(
  metrics_filtered_pr, metrics_filtered_hc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="harmonic centrality",
)
scatter_refs(graph.pagerank, graph.harmonicc)
plt.show()

### Harmonic Centrality for threshold=60 - SGC graph

In [None]:
bv_sgc.compute_harmonicc()
featgraph.plots.scatter(
  metrics_filtered_pr_sgc, metrics_filtered_hc_sgc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="harmonic centrality",
)
plt.show()

### PageRank for different alphas threshold=60 - Spotify graph

In [None]:
from tqdm.notebook import tqdm
import numpy as np

da = 0.1
alphas = np.linspace(da, 1, int(1/da - 1), endpoint=False)
kt_hc_ranks_a = np.zeros(len(alphas))
for i, a in enumerate(tqdm(alphas)):
  graph.compute_pagerank(a)
  kt_hc_ranks_a[i] = jwebgraph.utils.kendall_tau(
    list(itertools.compress(graph.pagerank(a), map_pop)), metrics_filtered_hc
  )
plt.plot(alphas, kt_hc_ranks_a, c="k")
plt.title(
  "{}\nCorrelation between Harmonic Centrality and PageRank".format(
    graph.basename
  )
)
plt.xlabel(r"PageRank $\alpha$")
plt.ylabel(r"Kendall $\tau$")
plt.xlim(*alphas[[0, -1]])
plt.show()

### PageRank for different alphas threshold=60 - SGC graph

In [None]:
da = 0.1
alphas = np.linspace(da, 1, int(1/da - 1), endpoint=False)
kt_hc_ranks_a = np.zeros(len(alphas))
for i, a in enumerate(tqdm(alphas)):
  bv_sgc.compute_pagerank(a)
  kt_hc_ranks_a[i] = jwebgraph.utils.kendall_tau(
    list(itertools.compress(bv_sgc.pagerank(a), map_pop_sgc)), metrics_filtered_hc_sgc
  )
plt.plot(alphas, kt_hc_ranks_a, c="k")
plt.title(
  "{}\nCorrelation between Harmonic Centrality and PageRank".format(
    bv_sgc.basename
  )
)
plt.xlabel(r"PageRank $\alpha$")
plt.ylabel(r"Kendall $\tau$")
plt.xlim(*alphas[[0, -1]])
plt.show()

### Closeness centrality for threshold=60 - Spotify graph

In [None]:
featgraph.plots.scatter(
  metrics_filtered_pr, metrics_filtered_cc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="closeness centrality",
)
# scatter_refs(metrics_filtered_pr, metrics_filtered_cc)
plt.show()

### Closeness centrality for threshold=60 - SGC graph

In [None]:
featgraph.plots.scatter(
  metrics_filtered_pr_sgc, metrics_filtered_cc_sgc,
  xscale="log",
  marker=".", c="k", alpha=2**(-5),
  label=graph.basename, xlabel="pagerank", ylabel="closeness centrality",
)
plt.show()