# Transitions in Centralities
Comparison of centralities transitions in the Spotify graph and in the SGC model

## Configure

Set the dataset base path

In [None]:
spotify_basename = "graphs/spotify-2018"

Set the random graph base path

In [None]:
sgc_basename = "graphs/sgc"

Set the centralities CSV path

In [None]:
csv_path = "./centralities_comparison.csv"

Set `jvm_path` to your java virtual machine full path

In [None]:
jvm_path = None

Set up logging

In [None]:
import logging
logging.basicConfig(
  level=logging.INFO,
  format='%(asctime)s %(name)-12s %(levelname)-8s: %(message)s',
  datefmt='%Y-%m-%d %H:%M:%S',
)

Start jvm

In [None]:
from featgraph.jwebgraph import start_jvm
start_jvm(jvm_path=jvm_path)

## Load the Spotify graph

In [None]:
import featgraph.jwebgraph.utils
from featgraph import jwebgraph
import os

graph = jwebgraph.utils.BVGraph(spotify_basename)
print(graph)

for r in (
  "graph", "properties", "ids.txt"
):
  if not os.path.isfile(graph.path(r)):
    raise FileNotFoundError(graph.path(r))

## Sample a graph from the SGC model

In [None]:
from featgraph import sgc, pathutils

seed = 42
sgc_model = sgc.SGCModel()
sgc_graph = jwebgraph.utils.BVGraph(sgc_basename)
if pathutils.notisglob(sgc_graph.path("*"), msg="Found: %.40s... Skipping"):
  logging.info("Sampling SGC graph")
  sgc_nxgraph = sgc_model(seed=seed)
  logging.info("Converting nxgraph to BVGraph")
  sgc.to_bv(sgc_nxgraph, sgc_basename)

## Compute subgraphs for different thresholds
Define data structure

In [None]:
from featgraph.sgc import ThresholdComparison

tc = ThresholdComparison(
  ThresholdComparison.sgc_graph(sgc_graph),
  ThresholdComparison.spotify_graph(graph),
)

print(f"Thresholding based on {tc.attribute} at thresholds:\n  {', '.join(map(str, tc.thresholds))}")

Perform thresholing

In [None]:
from tqdm.notebook import tqdm

tc.threshold_graphs(tqdm=tqdm)

Compute centralities

In [None]:
tc.compute_centralities(tqdm=tqdm)

Build dataframe

In [None]:
df = tc.dataframe("centralities_transitions.csv", tqdm=tqdm)
df

## Plot centrality transitions

In [None]:
from featgraph import plots
import seaborn as sns

sns.set()
plots.plot_centrality_transitions(
  df, "Harmonic Centrality",
  norm="narcs",
  # logy=True,
  graph_names=(
    "spotify-2018",
    "sgc",
  ),
  cmap={
    "celebrities":       "C0",
    "community leaders": "C1",
    "masses":            "C2",
    "hip-hop":           "C0",
    "classical":         "C1",
    "rock":              "C2",
  },
)

## Plot boxplots

In [None]:
plots.plot_centrality_boxes(
  tc,
  centrality="Harmonic Centrality",
  th=50,
  graph_name="spotify-2018",
)