In [1]:
import pandas as pd

import torch

from torch_geometric.utils import to_scipy_sparse_matrix

from data import get_sbm
from stats import compute_statistics
from utils import get_results_path


In [2]:
# Dataset parameters
cluster_std = 0.4
n_features = 16
num_classes = 10
num_nodes_per_class = 100
num_nodes = num_nodes_per_class * num_classes
block_sizes = torch.tensor([num_nodes_per_class] * num_classes)


In [3]:
stats_path = get_results_path() / "statistics_synthetic.csv"
df = pd.DataFrame()


In [4]:
# Get values for random graphs

for edge_num_self, edge_num_other in [
    (6, 3),
    (6, 6),
    (3, 6),
    (1, 9),
]:
    name = f"p_ii={edge_num_self}/100, p_ij={edge_num_other}/100"

    prob_matrix = torch.full(
        (num_classes, num_classes),
        edge_num_other / (num_nodes - num_nodes_per_class),
    )
    edge_prob_self = edge_num_self / block_sizes
    edge_probs = (prob_matrix * (1 - torch.eye(num_classes))) + torch.diag(
        edge_prob_self
    )

    data = get_sbm(
        block_sizes=block_sizes,
        edge_probs=edge_probs,
        centers=torch.eye(num_classes),
        cluster_std=cluster_std,
        n_features=n_features,
        seed=0,
    )

    print(f"Processing {name}...")

    A = to_scipy_sparse_matrix(data.edge_index)

    df = compute_statistics(A, name, df)
    df.to_csv(stats_path)


Processing p_ii=6/100, p_ij=3/100...
Processing p_ii=6/100, p_ij=6/100...
Processing p_ii=3/100, p_ij=6/100...
Processing p_ii=1/100, p_ij=9/100...


In [5]:
df[["1_mean", "1_std", "1_expected", "2_mean", "2_std", "2_expected"]]


Unnamed: 0_level_0,Unnamed: 1_level_0,1_mean,1_std,1_expected,2_mean,2_std,2_expected
Dataset,Self-loops,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"p_ii=6/100, p_ij=3/100",True,0.113581,0.03907,0.103199,0.097186,0.009861,0.094278
"p_ii=6/100, p_ij=3/100",False,0.0,0.0,0.0,0.105467,0.011502,0.104092
"p_ii=6/100, p_ij=6/100",True,0.082454,0.027257,0.075988,0.071834,0.005354,0.070589
"p_ii=6/100, p_ij=6/100",False,0.0,0.0,0.0,0.076419,0.005828,0.075951
"p_ii=3/100, p_ij=6/100",True,0.111512,0.040614,0.101092,0.095247,0.009794,0.092469
"p_ii=3/100, p_ij=6/100",False,0.0,0.0,0.0,0.103145,0.011202,0.101891
"p_ii=1/100, p_ij=9/100",True,0.101172,0.037535,0.091291,0.085868,0.009595,0.083262
"p_ii=1/100, p_ij=9/100",False,0.0,0.0,0.0,0.092139,0.01015,0.090824


In [6]:
df[
    [
        "3_mean",
        "3_std",
        "4_mean",
        "4_std",
        "5_mean",
        "5_std",
        "6_mean",
        "6_std",
    ]
]


Unnamed: 0_level_0,Unnamed: 1_level_0,3_mean,3_std,4_mean,4_std,5_mean,5_std,6_mean,6_std
Dataset,Self-loops,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"p_ii=6/100, p_ij=3/100",True,0.02736,0.003468,0.019599,0.002653,0.008849,0.001741,0.006231,0.001572
"p_ii=6/100, p_ij=3/100",False,0.002184,0.002464,0.021124,0.003415,0.002124,0.001319,0.006307,0.001712
"p_ii=6/100, p_ij=6/100",True,0.015532,0.001409,0.010986,0.001377,0.00418,0.000759,0.00294,0.000678
"p_ii=6/100, p_ij=6/100",False,0.001318,0.001211,0.01162,0.001694,0.001212,0.000523,0.002956,0.000723
"p_ii=3/100, p_ij=6/100",True,0.025371,0.003072,0.017949,0.002173,0.007409,0.001138,0.005055,0.001064
"p_ii=3/100, p_ij=6/100",False,0.000786,0.001349,0.019494,0.002957,0.000953,0.000594,0.005258,0.001281
"p_ii=1/100, p_ij=9/100",True,0.020803,0.002736,0.014562,0.001799,0.005671,0.000922,0.003887,0.000878
"p_ii=1/100, p_ij=9/100",False,0.000724,0.001146,0.01565,0.002387,0.000924,0.000516,0.004012,0.001022
