In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
datasets = ['SBM-1', 'SBM-2', 'SBM-3', 'ER-4', 'ER-5', 'PROTEINS']
train_ratio_list = [0.7, 0.9] 
GNN_list = ['GCN_mean', 'GCN_RW_mean', 'MPGNN_mean', 'GCN_sum', 'GCN_RW_sum', 'MPGNN_sum']
hidden_list = [4, 8, 16, 32, 64, 128, 256]
seed_list = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]
alpha = 100
delta = 0.05

In [None]:
GE_bound_results_mean = np.zeros((len(datasets) * len(train_ratio_list), len(GNN_list)))
GE_bound_results_mean[:] = np.nan
GE_bound_results_std = GE_bound_results_mean.copy()

Rademacher_GE_bound_results_mean = np.zeros((len(datasets) * len(train_ratio_list), len(GNN_list)))
Rademacher_GE_bound_results_mean[:] = np.nan
Rademacher_GE_bound_results_std = Rademacher_GE_bound_results_mean.copy()
for model_ind, model_type in enumerate(GNN_list):
    print(model_type)
    all_ge_results = np.zeros((len(datasets) * len(train_ratio_list), len(hidden_list), len(seed_list)))
    all_ge_results[:] = np.nan
    all_bound_results = np.zeros((len(datasets) * len(train_ratio_list), len(hidden_list), len(seed_list)))
    all_bound_results[:] = np.nan
    all_Rademacher_bound_results = np.zeros((len(datasets) * len(train_ratio_list), len(hidden_list), len(seed_list)))
    all_Rademacher_bound_results[:] = np.nan
    combo_list = []
    first_ind = 0
    for dataset in datasets:
        for train_ratio in train_ratio_list:
            for hidden_ind, hidden_dim in enumerate(hidden_list):
                for seed_ind, random_seed in enumerate(seed_list):
                    save_name_str = '{}_{}_seed_{}_{}_hidden{}_100train_ratio{}_alpha{}'.format(
                    dataset, 'w_consts', random_seed, model_type, hidden_dim, int(100*train_ratio), 
                    alpha)
                    ge_file_name = '../result_arrays/' + save_name_str + '_excess_risk.npy'
                    bound_file_name = '../result_arrays/' + save_name_str + '_generalization_bound.npy'
                    bound_Rademacher_file_name = '../result_arrays/' + save_name_str + '_generalization_bound_Rademacher.npy'
                    try:
                        all_ge_results[first_ind, hidden_ind, seed_ind] = abs(np.load(ge_file_name)[-1])
                        all_bound_results[first_ind, hidden_ind, seed_ind] = np.load(bound_file_name)[-1]
                        all_Rademacher_bound_results[first_ind, hidden_ind, seed_ind] = np.load(bound_Rademacher_file_name)[-1]
                    except FileNotFoundError:
                        pass
            combo_list.append((dataset, train_ratio))
            first_ind += 1
    
    # first generalization error results
    all_ge_results = all_ge_results * 100000 # enlarge the values to avoid scientific notation
    if not np.isnan(all_ge_results).all():
        results_mean = np.nanmean(all_ge_results, axis=2)
        results_std = np.nanstd(all_ge_results, axis=2)
        for i in range(len(combo_list)):
            if not np.isnan(results_mean[i]).all():
                print('{}&${}$'.format(combo_list[i][0], combo_list[i][1]), end='')
                for hidden_ind, hidden_dim in enumerate(hidden_list):
                    if np.isnan(results_mean[i, hidden_ind]):
                        print('&N/A', end='')
                    else:
                        print('&${:.3f}\pm{:.3f}$'.format(results_mean[i, hidden_ind], results_std[i, hidden_ind]), end='')
                print('\\\\')
    else:
        print('N/A')

    GE_bound_results_mean[:, model_ind] = np.nanmean(all_bound_results, axis=2)[:, -1]
    GE_bound_results_std[:, model_ind] = np.nanstd(all_bound_results, axis=2)[:, -1]

    Rademacher_GE_bound_results_mean[:, model_ind] = np.nanmean(all_Rademacher_bound_results, axis=2)[:, -1]
    Rademacher_GE_bound_results_std[:, model_ind] = np.nanstd(all_Rademacher_bound_results, axis=2)[:, -1]

# then generalization bound results (latex)
print('All bounds!')
if not np.isnan(GE_bound_results_mean).all():
    for i in range(len(combo_list)):
        if not np.isnan(GE_bound_results_mean[i]).all():
            print('{}&${}$'.format(combo_list[i][0], combo_list[i][1]), end='')
            for GNN_ind in range(len(GNN_list)):
                if np.isnan(GE_bound_results_mean[i, GNN_ind]):
                    print('&N/A', end='')
                else:
                    print('&${:.3f}\pm{:.3f}$'.format(GE_bound_results_mean[i, GNN_ind], GE_bound_results_std[i, GNN_ind]), end='')
            print('\\\\')
else:
    print('N/A')

print('All Rademacher bounds!')
if not np.isnan(Rademacher_GE_bound_results_mean).all():
    for i in range(len(combo_list)):
        if not np.isnan(Rademacher_GE_bound_results_mean[i]).all():
            print('{}&${}$'.format(combo_list[i][0], combo_list[i][1]), end='')
            for GNN_ind in range(len(GNN_list)):
                if np.isnan(Rademacher_GE_bound_results_mean[i, GNN_ind]):
                    print('&N/A', end='')
                else:
                    print('&${:.3f}\pm{:.3f}$'.format(Rademacher_GE_bound_results_mean[i, GNN_ind], Rademacher_GE_bound_results_std[i, GNN_ind]), end='')
            print('\\\\')
else:
    print('N/A')

# then generalization bound results (markdown)
print('All bounds!')
if not np.isnan(GE_bound_results_mean).all():
    for i in range(len(combo_list)):
        if not np.isnan(GE_bound_results_mean[i]).all():
            print('|{}|${}$|'.format(combo_list[i][0], combo_list[i][1]), end='')
            for GNN_ind in range(len(GNN_list)):
                if np.isnan(GE_bound_results_mean[i, GNN_ind]):
                    print('N/A|', end='')
                else:
                    print('${:.3f}\pm{:.3f}$|'.format(GE_bound_results_mean[i, GNN_ind], GE_bound_results_std[i, GNN_ind]), end='')
            print('\n')
else:
    print('N/A')

print('All Rademacher bounds!')
if not np.isnan(Rademacher_GE_bound_results_mean).all():
    for i in range(len(combo_list)):
        if not np.isnan(Rademacher_GE_bound_results_mean[i]).all():
            print('|{}|${}$|'.format(combo_list[i][0], combo_list[i][1]), end='')
            for GNN_ind in range(len(GNN_list)):
                if np.isnan(Rademacher_GE_bound_results_mean[i, GNN_ind]):
                    print('N/A|', end='')
                else:
                    print('${:.3f}\pm{:.3f}$|'.format(Rademacher_GE_bound_results_mean[i, GNN_ind], Rademacher_GE_bound_results_std[i, GNN_ind]), end='')
            print('\n')
else:
    print('N/A')

In [None]:
# plot
datasets = ['SBM-1', 'SBM-2', 'SBM-3', 'ER-4', 'ER-5', 'PROTEINS']
train_ratio_list = [0.7] 
GNN_list = ['GCN_mean', 'MPGNN_mean']
hidden_list = [4, 8, 16, 32, 64, 128, 256]
seed_list = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]
log_hidden_list = [np.log2(h) for h in hidden_list]

plt.figure()
for model_type in GNN_list:
    all_results = np.zeros((len(datasets) * len(train_ratio_list), len(hidden_list), len(seed_list)))
    all_results[:] = np.nan
    combo_list = []
    first_ind = 0
    for dataset in datasets:
        for train_ratio in train_ratio_list:
            for hidden_ind, hidden_dim in enumerate(hidden_list):
                for seed_ind, random_seed in enumerate(seed_list):
                    file_name = '../result_arrays/' + '{}_{}_seed_{}_{}_hidden{}_100train_ratio{}_alpha{}'.format(
                    dataset, 'w_consts', random_seed, model_type, hidden_dim, int(100*train_ratio), 
                    alpha) + '_excess_risk.npy'
                    try:
                        all_results[first_ind, hidden_ind, seed_ind] = abs(np.load(file_name)[-1])
                    except FileNotFoundError:
                        pass
            combo_list.append(dataset)
            first_ind += 1
    all_results = all_results * 100000 # enlarge the values to avoid scientific notation
    if not np.isnan(all_results).all():
        results_mean = np.nanmean(all_results, axis=2)
        results_std = np.nanstd(all_results, axis=2)
        for i in range(len(combo_list)):
            if not np.isnan(results_mean[i]).all():
                plt.errorbar(log_hidden_list, results_mean[i], yerr=results_std[i], label=combo_list[i], linestyle='dashed', marker='o')
    plt.legend()
    plt.xlabel('log2(hidden dimension h)')
    plt.ylabel('Absolute empirical generalization error (X 100000)')
    plt.savefig(model_type+'_abs_ge_70sup.pdf', format='pdf')
    plt.show()