# Transitions in Centralities
Comparison of centralities transitions in the Spotify graph and in the SGC model

## Configure

Set the dataset base path

In [None]:
spotify_basename = "graphs/spotify-2018"

Set the random graph base path

In [None]:
sgc_basename = "graphs/sgc"

Set the centralities CSV path

In [None]:
csv_path = "./centralities_comparison.csv"

Set `jvm_path` to your java virtual machine full path

In [None]:
jvm_path = None

Set up logging

In [None]:
import logging
logging.basicConfig(
  level=logging.INFO,
  format='%(asctime)s %(name)-12s %(levelname)-8s: %(message)s',
  datefmt='%Y-%m-%d %H:%M:%S',
)

Start jvm

In [None]:
from featgraph.jwebgraph import start_jvm
start_jvm(jvm_path=jvm_path)

## Load the Spotify graph

In [None]:
import featgraph.jwebgraph.utils
from featgraph import jwebgraph
import os

graph = jwebgraph.utils.BVGraph(spotify_basename)
print(graph)

for r in (
  "graph", "properties", "ids.txt"
):
  if not os.path.isfile(graph.path(r)):
    raise FileNotFoundError(graph.path(r))

## Sample a graph from the SGC model

In [None]:
from featgraph import sgc, pathutils

seed = 42
sgc_model = sgc.SGCModel()
sgc_graph = jwebgraph.utils.BVGraph(sgc_basename)
if pathutils.notisglob(sgc_graph.path("*"), msg="Found: %.40s... Skipping"):
  logging.info("Sampling SGC graph")
  sgc_nxgraph = sgc_model(seed=seed)
  logging.info("Converting nxgraph to BVGraph")
  sgc.to_bv(sgc_nxgraph, sgc_basename)

## Compute subgraphs for different thresholds
Define data structure

In [None]:
from featgraph.sgc import ThresholdComparison

tc = ThresholdComparison(
  ThresholdComparison.sgc_graph(sgc_graph),
  ThresholdComparison.spotify_graph(graph),
  thresholds=tuple(range(81)),
)

print(f"Thresholding based on {tc.attribute} at thresholds:\n  {', '.join(map(str, tc.thresholds))}")

Perform thresholing

In [None]:
from unittest.mock import patch
from tqdm.notebook import tqdm
import featgraph

with patch.object(featgraph.logger, "info", lambda *_: None):
  tc.threshold_graphs(tqdm=tqdm)

Compute centralities

In [None]:
with patch.object(featgraph.logger, "info", lambda *_: None):
  tc.compute_centralities(tqdm=tqdm)

Build dataframe

In [None]:
df = tc.dataframe("graphs/spotify-2018_centrality-transitions.csv", tqdm=tqdm)
df

## Plot centrality transitions

In [None]:
from featgraph import plots
import seaborn as sns

sns.set()
plots.plot_centrality_transitions(
  df, "Harmonic Centrality",
  norm="nnodes",
  # logy=True,
  graph_names=(
    "spotify-2018",
    "sgc",
  ),
  cmap={
    "celebrities":       "C0",
    "community leaders": "C1",
    "masses":            "C2",
    "hip-hop":           "C0",
    "classical":         "C1",
    "rock":              "C2",
  },
)

Switch point

In [None]:
centrality = "Harmonic Centrality"

graphs = (
  ("spotify-2018", ("classical", "hip-hop")),
  ("sgc", ("community leaders", "celebrities")),
)
for g, ks in graphs:
  th = featgraph.misc.switch_point(df, *ks, graph=g, centrality=centrality)
  print(f"{g:<16}: '{ks[0]}' and '{ks[1]}' nodes switch '{centrality.lower()}' at threshold = {th:.0f}")

## Correlation between centrality transitions and SCC

Plot correlations

In [None]:
from matplotlib import pyplot as plt
from scipy import stats
import pandas as pd
import numpy as np
import itertools

df_cc, _, axs = plots.centrality_correlations(
  pd.concat(tuple(plots.preprocessed_additions(df))),
  "Strongly Connected Component Size" + plots.div_suffix,
  [
    "Indegree" + plots.div_suffix,
    "Pagerank" + plots.mul_suffix,
    "Harmonic Centrality" + plots.div_suffix,
    "Lin Centrality" + plots.div_suffix,
    "Closeness Centrality" + plots.div_suffix,
  ],
  cc_fn=stats.kendalltau,
  p_thresholds=(.001,),
  plt_kws=dict(edgecolor="none", alpha=0.5),
  subplot_kws=dict(sharey="row"),
  palette={
    "celebrities": "C0",
    "masses": "C1",
    "community leaders": "C2",
    "hip-hop": "C0",
    "rock": "C1",
    "classical": "C2",
  }
)
for i, ax in enumerate(itertools.chain.from_iterable(axs)):
  if i % 5 == 3:
    sns.move_legend(ax, "upper right")
  else:
    sns.move_legend(ax, "lower right")
plt.gcf().set_size_inches(np.array([1, 9 / 21]) * 21)

Display tests dataframe

In [None]:
df_cc.sort_values("p-value").reset_index()

In [None]:
switch_point(df, "classical", "hip-hop", graph=graph.basename, centrality="Indegree"), switch_point(df, "community leaders", "celebrities", graph="sgc", centrality="Indegree")

In [None]:
def switch_point(df: pd.DataFrame, k1, k2, x: str = "threshold", y: str = "mean", class_key: str = "type_value", **kwargs):
  """Compute the switching point for a value between two groups

  Args:
    df (DataFrame): The dataframe of values
    k1 (str): The class value for the first group
    k2 (str): The class value for the second group
    x (str): Column name for independent variable
    y (str): Column name for dependent variable
    class_key (str): Column name for classes
    """
  dfs = pd.merge(
    *tuple(featgraph.misc.dataframe_filter(df, **kwargs, **{class_key: k})[[x, y]].rename(columns={y: k}) for k in (k1, k2)),
    how="inner",
    on="threshold",
  )
  x_0 = dfs[x].min()
  diffs = dfs[k1] - dfs[k2]
  sign_0 = 1 if diffs[dfs[x] == x_0].min() >= 0 else -1
  return dfs[diffs * sign_0 < 0][x].min()
switch_point(df, "classical", "hip-hop", graph=graph.basename, centrality="Indegree"), switch_point(df, "community leaders", "celebrities", graph="sgc", centrality="Indegree")