In [1]:
import pandas as pd

import torch

from torch_geometric.utils import to_scipy_sparse_matrix

from data import get_sbm
from stats import compute_statistics
from utils import get_results_path


In [None]:
# Dataset parameters
cluster_std = 0.4
n_features = 16
num_classes = 10
num_nodes_per_class = 100
num_nodes = num_nodes_per_class * num_classes
block_sizes = torch.tensor([num_nodes_per_class] * num_classes)


In [2]:
stats_path = get_results_path() / "statistics_synthetic.csv"
df = pd.DataFrame()


In [4]:
# Get values for random graphs

for edge_num_self, edge_num_other in [
    (6, 3),
    (6, 6),
    (3, 6),
    (1, 9),
]:
    name = f"p_ii={edge_num_self}/100, p_ij={edge_num_other}/100"

    prob_matrix = torch.full(
        (num_classes, num_classes),
        edge_num_other / (num_nodes - num_nodes_per_class),
    )
    edge_prob_self = edge_num_self / block_sizes
    edge_probs = (prob_matrix * (1 - torch.eye(num_classes))) + torch.diag(
        edge_prob_self
    )

    data = get_sbm(
        block_sizes=block_sizes,
        edge_probs=edge_probs,
        centers=torch.eye(num_classes),
        cluster_std=cluster_std,
        n_features=n_features,
        seed=0,
    )

    print(f"Processing {name}...")

    A = to_scipy_sparse_matrix(data.edge_index)

    df = compute_statistics(A, name, df)
    df.to_csv(stats_path)


In [6]:
df[["1_mean", "1_std", "1_expected", "2_mean", "2_std", "2_expected"]]


Unnamed: 0_level_0,Unnamed: 1_level_0,1_mean,1_std,1_expected,2_mean,2_std,2_expected
Dataset,Self-loops,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"p_ii=6/100, p_ij=3/100",True,0.113076,0.046143,0.101329,0.095121,0.010718,0.092278
"p_ii=6/100, p_ij=3/100",False,0.0,0.0,0.0,0.10275,0.012392,0.101659
"p_ii=6/100, p_ij=6/100",True,0.081962,0.02487,0.075942,0.071819,0.005457,0.070614
"p_ii=6/100, p_ij=6/100",False,0.0,0.0,0.0,0.076469,0.006172,0.075979
"p_ii=3/100, p_ij=6/100",True,0.109682,0.036987,0.09999,0.094211,0.009105,0.091537
"p_ii=3/100, p_ij=6/100",False,0.0,0.0,0.0,0.102021,0.010417,0.10076
"p_ii=1/100, p_ij=9/100",True,0.100141,0.032885,0.091475,0.086232,0.008388,0.083918
"p_ii=1/100, p_ij=9/100",False,0.0,0.0,0.0,0.09278,0.009371,0.091605


In [7]:
df[
    [
        "3_mean",
        "3_std",
        "4_mean",
        "4_std",
        "5_mean",
        "5_std",
        "6_mean",
        "6_std",
    ]
]


Unnamed: 0_level_0,Unnamed: 1_level_0,3_mean,3_std,4_mean,4_std,5_mean,5_std,6_mean,6_std
Dataset,Self-loops,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"p_ii=6/100, p_ij=3/100",True,0.026572,0.003723,0.018963,0.002708,0.00856,0.001732,0.006008,0.001524
"p_ii=6/100, p_ij=3/100",False,0.00248,0.002471,0.020318,0.003397,0.00223,0.001295,0.006021,0.001628
"p_ii=6/100, p_ij=6/100",True,0.015522,0.001438,0.010998,0.001345,0.004181,0.000745,0.002945,0.000675
"p_ii=6/100, p_ij=6/100",False,0.001288,0.001205,0.011642,0.001664,0.001206,0.00053,0.002965,0.000722
"p_ii=3/100, p_ij=6/100",True,0.024829,0.002786,0.017524,0.002066,0.00719,0.00111,0.004904,0.001051
"p_ii=3/100, p_ij=6/100",False,0.000788,0.001318,0.019002,0.00283,0.000956,0.000594,0.005092,0.00126
"p_ii=1/100, p_ij=9/100",True,0.021038,0.002454,0.014793,0.00178,0.005784,0.000926,0.003968,0.000881
"p_ii=1/100, p_ij=9/100",False,0.000746,0.001177,0.015925,0.002363,0.000923,0.000519,0.004103,0.001028
