In [1]:
import os
import json
import numpy as np

In [2]:
# Define MMD values from other papers

metric_list = ["Degree", "Cluster", "Spectrum", "Orbit"]

benchmark_mmds = {  # From SPECTRE paper
    "Community (small)": {
        "GraphRNN": [0.08, 0.12, -1, 0.04],
        "GRAN": [0.06, 0.11, -1, 0.01],
        "MolGAN": [0.06, 0.13, -1, 0.01],
        "SPECTRE": [0.02, 0.21, -1, 0.01]
        
    },
    "Stochastic block models": {
        "GraphRNN": [0.0055, 0.0584, 0.0065, 0.0785],
        "GRAN": [0.0113, 0.0553, 0.0054, 0.0540],
        "MolGAN": [0.0235, 0.1161, 0.0117, 0.0712],
        "SPECTRE": [0.0079, 0.0528, 0.0643, 0.0074]
    }
}

benchmark_mmd_ratios = {  # From DiGress paper
    "Community (small)": {
        "GraphRNN": [4.0, 1.7, -1, 4.0],
        "GRAN": [3.0, 1.6, -1, 1.0],
        "SPECTRE": [0.5, 2.7, -1, 2.0],
        "DiGress": [1.0, 0.9, -1, 1.0],
        
    },
    "Stochastic block models": {
        "GraphRNN": [6.9, 1.7, -1, 3.1],
        "GRAN": [14.1, 1.7, -1, 2.1],
        "SPECTRE": [1.9, 1.6, -1, 1.6],
        "DiGress": [1.6, 1.5, -1, 1.7]
    }
}

benchmark_baselines = {  # From SPECTRE paper
    "Community (small)": [0.02, 0.07, 1, 0.01],
    "Stochastic block models": [0.0008, 0.0332, 0.0063, 0.0255]
}

In [3]:
def get_best_mmds(run_dir):
    # First, get the best run based on last loss
    best_loss, best_metrics = float("inf"), None
    for run_num in os.listdir(run_dir):
        if run_num == "_sources":
            continue
        metrics_path = os.path.join(run_dir, run_num, "metrics.json")
        with open(metrics_path, "r") as f:
            metrics = json.load(f)
        last_loss = metrics["train_epoch_loss"]["values"][-1]
        if last_loss < best_loss:
            best_loss, best_metrics = last_loss, metrics
    
    # Now return the MMDs and baselines
    return (
        [
            best_metrics["degree_mmd"]["values"][0],
            best_metrics["cluster_coef_mmd"]["values"][0],
            best_metrics["spectra_mmd"]["values"][0],
            best_metrics["orbit_mmd"]["values"][0]
        ],
        [
            best_metrics["degree_mmd_baseline"]["values"][0],
            best_metrics["cluster_coef_mmd_baseline"]["values"][0],
            best_metrics["spectra_mmd_baseline"]["values"][0],
            best_metrics["orbit_mmd_baseline"]["values"][0]
        ]
    )

In [4]:
# Import MMD and baseline values from training runs

base_path = "/gstore/home/tsenga5/discrete_graph_diffusion/models/trained_models/"
my_mmds_and_baselines = {
    "Community (small)": {
        "Edge-flip": get_best_mmds(os.path.join(base_path, "benchmark_community-small_edge-flip")),
        "Edge-one": get_best_mmds(os.path.join(base_path, "benchmark_community-small_edge-addition")),
        "Edge-zero": get_best_mmds(os.path.join(base_path, "benchmark_community-small_edge-deletion"))
    },
    "Stochastic block models": {
        "Edge-flip": get_best_mmds(os.path.join(base_path, "benchmark_sbm_edge-flip")),
        "Edge-one": get_best_mmds(os.path.join(base_path, "benchmark_sbm_edge-addition")),
        "Edge-zero": get_best_mmds(os.path.join(base_path, "benchmark_sbm_edge-deletion"))
    }
}

my_mmds = {d_key : {k_key : vals[0] for k_key, vals in d_dict.items()} for d_key, d_dict in my_mmds_and_baselines.items()}
my_baselines = {d_key : {k_key : vals[1] for k_key, vals in d_dict.items()} for d_key, d_dict in my_mmds_and_baselines.items()}

In [5]:
# Print out results

def print_vals(key, vals):
    vals =  np.sqrt(vals)
    print("%s & %.2f & %.2f & %.2f" % (key, vals[0], vals[1], vals[3]))

for d_key in my_mmds.keys():
    print(d_key)
    
    for bm_key, bm_vals in benchmark_mmds[d_key].items():
        print_vals(bm_key, np.array(bm_vals) / np.array(benchmark_baselines[d_key]))
        # print(bm_key, np.array(bm_vals) / np.array(my_baselines[d_key]["Edge-flip"]))
#     for bm_key, bm_vals in benchmark_mmd_ratios[d_key].items():
#         print(bm_key, np.array(bm_vals))
    print_vals("DiGress", np.array(benchmark_mmd_ratios[d_key]["DiGress"]))
        
    for my_key, my_vals in my_mmds[d_key].items():
        print_vals(my_key, np.array(my_vals) / np.array(benchmark_baselines[d_key]))
#         print(my_key, np.array(my_vals) / np.array(my_baselines[d_key][my_key]))
    print("=========================")

Community (small)
GraphRNN & 2.00 & 1.31 & 2.00
GRAN & 1.73 & 1.25 & 1.00
MolGAN & 1.73 & 1.36 & 1.00
SPECTRE & 1.00 & 1.73 & 1.00
DiGress & 1.00 & 0.95 & 1.00
Edge-flip & 0.99 & 0.58 & 2.55
Edge-one & 1.21 & 0.62 & 1.83
Edge-zero & 1.87 & 1.02 & 4.69
Stochastic block models
GraphRNN & 2.62 & 1.33 & 1.75
GRAN & 3.76 & 1.29 & 1.46
MolGAN & 5.42 & 1.87 & 1.67
SPECTRE & 3.14 & 1.26 & 0.54
DiGress & 1.26 & 1.22 & 1.30
Edge-flip & 2.73 & 1.23 & 0.94
Edge-one & 1.00 & 1.21 & 0.81
Edge-zero & 1.31 & 1.19 & 0.80


  vals =  np.sqrt(vals)
