In [1]:
import time
import json
import numpy as np
import pandas as pd
import utils

In [2]:
graph_short_names = ["Karate", "Student", "Jazz", "FB", "FB1", "NetSci", "ER1", "ER2"]

In [3]:
# load network statistics
df = pd.read_csv("./out/welfare_bounds.csv", index_col=0)
df.index = graph_short_names
df = df.drop(index=["ER1", "ER2"])
df

Unnamed: 0,Nodes,density,<k>,k_max,spectral_radius,rank,LB,UB
Karate,34.0,0.139037,4.588235,17.0,6.725698,24.0,0.994801,1.0
Student,141.0,0.030091,4.212766,10.0,6.311929,141.0,0.995461,1.0
Jazz,198.0,0.140594,27.69697,100.0,40.027376,198.0,0.554541,1.0
FB,329.0,0.036215,11.878419,63.0,24.446709,324.0,0.895303,1.0
FB1,320.0,0.046415,14.80625,113.0,29.729055,320.0,0.821017,1.0
NetSci,379.0,0.01276,4.823219,34.0,10.375459,358.0,0.986598,1.0


In [4]:
def max_values_ci(arr: np.ndarray, ci: float = 0.95):
    """
    Extract the maximum value from each row of a 2D array
    and compute the confidence interval of the resulting distribution.

    Parameters
    ----------
    arr : np.ndarray
        Input array of shape (sample_size, #nodes).
    ci : float, optional (default=0.95)
        Confidence level (e.g., 0.95 for 95%, 0.90 for 90%).

    Returns
    -------
    maxima : np.ndarray
        1D array of shape (sample_size,) containing row-wise maxima.
    interval : tuple[float, float]
        Lower and upper bounds of the confidence interval.
    """
    # Row-wise maxima (1D array)
    maxima = arr.max(axis=1)

    # Calculate lower and upper percentiles
    alpha = (1 - ci) * 100 / 2
    lower, upper = np.percentile(maxima, [alpha, 100 - alpha])

    return maxima, (lower, upper)

In [5]:
# Statistical test for each network
ci = 0.95
for index in df.index:
    print(f"Processing {index}...")
    LB = df.loc[index, "LB"]
    UB = df.loc[index, "UB"]
    data = np.load(f"data/{index}_spectra.npz", allow_pickle=True)
    spectra = data["spectra"]          # shape: (sample-size, #nodes)
    degrees = data["degrees"]          # obseved degrees
    meta = json.loads(str(data["meta"]))  # parameters etc
    print("Shape of spectra:", spectra.shape)
    welfare_ratios = utils.eigenvalues_PinvN(attenuation=0.01, alpha=spectra)
    lower_bounds = welfare_ratios[:, -1] # eigenvalue of PinvN, the value of mu(alpha) at alpha_1 (the largest eigenvalue of A)

    # Calculate lower and upper percentiles
    alpha = (1 - ci) * 100 / 2
    lower_LB, upper_LB = np.percentile(lower_bounds, [alpha, 100 - alpha])
    print(f"observed LB: {LB}")
    print(f"{int(ci*100)}% confidence interval (LB):", lower_LB, upper_LB)

    upper_bounds, UB_ci95 = max_values_ci(welfare_ratios, ci=ci)
    lower_UB, upper_UB = UB_ci95
    print(f"observed UB: {UB}")
    print(f"{int(ci*100)}% confidence interval (UB):", lower_UB, upper_UB)


    df.loc[index, f"LB_ci{int(ci*100)}_lower"] = lower_LB
    df.loc[index, f"LB_ci{int(ci*100)}_upper"] = upper_LB
    df.loc[index, f"UB_ci{int(ci*100)}_lower"] = lower_UB
    df.loc[index, f"UB_ci{int(ci*100)}_upper"] = upper_UB
    df.loc[index, "LB_within_CI"] = (LB >= lower_LB) and (LB <= upper_LB)
    df.loc[index, "UB_within_CI"] = (UB >= lower_UB) and (UB <= upper_UB)
    print(f"LB within {int(ci*100)}% CI: {df.loc[index, 'LB_within_CI']}, UB within {int(ci*100)}% CI: {df.loc[index, 'UB_within_CI']}")
    print("===========================================")
df

Processing Karate...
Shape of spectra: (10000, 34)
observed LB: 0.994800630624275
95% confidence interval (LB): 0.9923670807817834 0.9958321533107143
observed UB: 1.0
95% confidence interval (UB): 1.0 1.0
LB within 95% CI: True, UB within 95% CI: True
Processing Student...
Shape of spectra: (10000, 141)
observed LB: 0.9954610471908536
95% confidence interval (LB): 0.9961720037309824 0.9976616966007592
observed UB: 0.9999999900517424
95% confidence interval (UB): 1.0 1.0
LB within 95% CI: False, UB within 95% CI: False
Processing Jazz...
Shape of spectra: (10000, 198)
observed LB: 0.5545405912318706
95% confidence interval (LB): 0.559552474655509 0.6390800345587662
observed UB: 0.9999999823046752
95% confidence interval (UB): 0.9999999927918098 1.0
LB within 95% CI: False, UB within 95% CI: False
Processing FB...
Shape of spectra: (10000, 329)
observed LB: 0.8953030478539662
95% confidence interval (LB): 0.9023950744678555 0.9233867268622599
observed UB: 1.0
95% confidence interval (UB)

Unnamed: 0,Nodes,density,<k>,k_max,spectral_radius,rank,LB,UB,LB_ci95_lower,LB_ci95_upper,UB_ci95_lower,UB_ci95_upper,LB_within_CI,UB_within_CI
Karate,34.0,0.139037,4.588235,17.0,6.725698,24.0,0.994801,1.0,0.992367,0.995832,1.0,1.0,True,True
Student,141.0,0.030091,4.212766,10.0,6.311929,141.0,0.995461,1.0,0.996172,0.997662,1.0,1.0,False,False
Jazz,198.0,0.140594,27.69697,100.0,40.027376,198.0,0.554541,1.0,0.559552,0.63908,1.0,1.0,False,False
FB,329.0,0.036215,11.878419,63.0,24.446709,324.0,0.895303,1.0,0.902395,0.923387,1.0,1.0,False,True
FB1,320.0,0.046415,14.80625,113.0,29.729055,320.0,0.821017,1.0,0.825686,0.858522,1.0,1.0,False,False
NetSci,379.0,0.01276,4.823219,34.0,10.375459,358.0,0.986598,1.0,0.986948,0.991138,1.0,1.0,False,True


In [None]:
# for paper
df_paper = df.drop(
    columns=[
        f"LB_ci{int(ci*100)}_lower", f"LB_ci{int(ci*100)}_upper",
        f"UB_ci{int(ci*100)}_lower", f"UB_ci{int(ci*100)}_upper",
        ])
# df_paper.to_csv("out/welfare_bounds_CI.csv")
df_paper

Unnamed: 0,Nodes,density,<k>,k_max,spectral_radius,rank,LB,UB,LB_within_CI,UB_within_CI
Karate,34.0,0.139037,4.588235,17.0,6.725698,24.0,0.994801,1.0,True,True
Student,141.0,0.030091,4.212766,10.0,6.311929,141.0,0.995461,1.0,False,False
Jazz,198.0,0.140594,27.69697,100.0,40.027376,198.0,0.554541,1.0,False,False
FB,329.0,0.036215,11.878419,63.0,24.446709,324.0,0.895303,1.0,False,True
FB1,320.0,0.046415,14.80625,113.0,29.729055,320.0,0.821017,1.0,False,False
NetSci,379.0,0.01276,4.823219,34.0,10.375459,358.0,0.986598,1.0,False,True
