In [10]:
# load the hypergraph
import yaml
import logging
import itertools
import os
import sys
import random

import utils
# from utils import lazy_clique_edge_cover
import itertools as it

from scipy import sparse
import numpy as np
import seaborn as sns
import networkx as nx
import community

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, SAGEConv
from torch_geometric.data import Data

from cell.utils import link_prediction_performance
from cell.cell import Cell, EdgeOverlapCriterion, LinkPredictionCriterion
from cell.graph_statistics import compute_graph_statistics

from utils import load_graphs
from cliques import compute_cliques

import community.community_louvain as community


In [2]:
class GNN(torch.nn.Module):
    def __init__(self, node_features):
        super().__init__()
        # GCN initialization
        self.conv1 = SAGEConv(node_features, 128)
        self.conv2 = SAGEConv(128, 128)
        self.bn = torch.nn.BatchNorm1d(128)
        
        # self.conv2 = GCNConv(128, 128)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.bn(x)
        x = self.conv2(x, edge_index)

        return x


def save_hypergraph(hg, path):
    with open(path, 'w') as f:
        for edge in hg:
            f.write(' '.join(map(str,edge)) + '\n')


def hypergraph_metrics(hg):
    # original hypergraph
    num_edges = len(hg)
    nodes = set()
    node_degrees = {}
    for edge in hg:
        for node in edge:
            nodes.add(node)
            node_degrees[node] = node_degrees.get(node, 0) + 1
    num_nodes = len(nodes)
    
    # density
    density = num_edges / num_nodes

    # Average size
    avg_size = sum(len(edge) for edge in hg) / num_edges

    # Average degree
    avg_degree = sum(node_degrees.values()) / num_nodes


    # projected graph
    G = nx.Graph()
    # Add all nodes from the hypergraph
    nodes = set(node for edge in hg for node in edge)
    G.add_nodes_from(nodes)
    # For each hyperedge, create a clique
    for edge in hg:
        # Add edges between all pairs of nodes in the hyperedge
        G.add_edges_from(itertools.combinations(edge, 2))
    
    part_G = community.best_partition(G)
    mod_G = community.modularity(part_G, G)


    # bipartite graph
    B = nx.Graph()
    # Add nodes for the original vertices (left set)
    left_nodes = set(node for edge in hg for node in edge)
    B.add_nodes_from(left_nodes, bipartite=0)
    # Add nodes for the hyperedges (right set)
    right_nodes = [f'e{i}' for i in range(len(hg))]
    B.add_nodes_from(right_nodes, bipartite=1)
    # Add edges between vertices and their corresponding hyperedges
    for i, edge in enumerate(hg):
        for node in edge:
            B.add_edge(node, f'e{i}')


    part_B = community.best_partition(B)
    mod_B = community.modularity(part_B, B)

    return {
        "density": density,
        "average_size": avg_size,
        "average_degree": avg_degree,
        "coefficient": nx.average_clustering(G),
        "G_modularity": mod_G,
        "B_modularity": mod_B
    }


In [4]:

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
logger.setLevel(logging.INFO)

config  = yaml.safe_load(open('./config.yml'))
config['dataset'] = 'email-Enron'
config['dataset'] = 'NDC-classes'
graphs = load_graphs(config, logger)
config['beta'] = len(graphs['simplicies_train']) * 10

# data = np.array([len(s) for s in graphs['simplicies_train']])
# hist, bins = np.histogram(data, bins=np.linspace(1, 8, 8))
# sns.displot(data)

number of simplices: 49724
number of unique simplices: 1088
Node index should begin with 0, reindexing the hypergraphs ...
number of nodes in construct graph 1161


INFO:root:Finish loading graphs.
INFO:root:Nodes train: 1161
INFO:root:Simplicies train: 1088


In [5]:
from torch_geometric.nn import Node2Vec



graph_adjacency_matrix, weighted_graph_adjacency_matrix = nx.to_numpy_array(graphs['G_train'], nodelist=sorted(graphs['G_train'].nodes())), nx.to_numpy_array(graphs['G_weighted'], nodelist=sorted(graphs['G_train'].nodes()))


edge_index = torch.tensor(np.array(graph_adjacency_matrix.nonzero()), dtype=torch.long)
data = Data(edge_index=edge_index)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = Node2Vec(
    data.edge_index,
    embedding_dim=50,
    walks_per_node=10,
    walk_length=20,
    context_size=10,
    p=1.0,
    q=1.0,
    num_negative_samples=1,
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loader = model.loader(batch_size=128, shuffle=True, num_workers=4)

pos_rw, neg_rw = next(iter(loader))

model.train()
for pos_rw, neg_rw in loader:
    optimizer.zero_grad()
    loss = model.loss(pos_rw.to(device), neg_rw.to(device))
    loss.backward()
    optimizer.step()
    # print(loss.item())

embeddings = model()
embeddings.requires_grad = False


In [6]:
graph_adjacency_matrix, weighted_graph_adjacency_matrix = nx.to_numpy_array(graphs['G_train'], nodelist=sorted(graphs['G_train'].nodes())), nx.to_numpy_array(graphs['G_weighted'], nodelist=sorted(graphs['G_train'].nodes()))

gephi_graphs = []
ranks = [8, 10, 12]
# gephi_graphs.append(weighted_graph_adjacency_matrix)

for rank in ranks:
    print(f'rank: {rank}')
    edge_index = torch.tensor(np.array(graph_adjacency_matrix.nonzero()), dtype=torch.long)
    edge_value = weighted_graph_adjacency_matrix[graph_adjacency_matrix.nonzero()]

    # training for CELL
    data = Data(x=embeddings, edge_index=edge_index)
    model = GNN(50)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)
    model.train()
    for epoch in range(200):
        optimizer.zero_grad()
        out = model(data)
        src, dst = edge_index
        score = (out[src] * out[dst]).sum(dim=-1)
        loss = F.mse_loss(score, torch.tensor(edge_value, dtype=torch.float))
        loss.backward()
        optimizer.step()
        if (epoch + 1) % 10 == 0:
            print(f'epoch: {epoch}, loss: {loss.item()}')
    # edge_index = torch.tensor(np.array(graph.nonzero()), dtype=torch.long)

    # training for CELL
    sparse_matrix = sparse.csr_matrix(graph_adjacency_matrix)
    cell_model = Cell(A=sparse_matrix,
                H=rank,
                callbacks=[EdgeOverlapCriterion(invoke_every=10, edge_overlap_limit=.80)])
    cell_model.train(steps=400,
                optimizer_fn=torch.optim.Adam,
                optimizer_args={'lr': 0.1,
                                'weight_decay': 1e-7})





    # reconstruct the hypergraph by clique cover
    # YOU GUY!!!!!!!!!!!!!!!!!!!!!!
    # BAD API!!!!!!!!!!!!!!!!!!!!!!
    # G = graphs['G_weighted']
    # weighted_adjacency_matrix = nx.to_numpy_array(G, nodelist=sorted(G.nodes()))

    # # sampling cliques
    # os.remove(f'{config['data_dir']}/{config['dataset']}/cliques_train.pkl')
    # os.remove(f'{config['data_dir']}/{config['dataset']}/rho.pkl')

    # generate WLIG
    generated_graph = cell_model.sample_graph()
    graph_prime = generated_graph.A
    edge_index_prime = torch.tensor(graph_prime.nonzero(), dtype=torch.long)
    x = embeddings
    data_prime = Data(x=x, edge_index = edge_index_prime)
    out = model(data_prime)
    src, dst = edge_index_prime
    score = (out[src] * out[dst]).sum(dim=-1)
    weight = score.detach().numpy()
    weight[weight <= 1] = 1
    weight = np.rint(weight).astype(int)
    weighted_graph_prime = np.copy(graph_prime)
    weighted_graph_prime[weighted_graph_prime.nonzero()] = weight

    # gephi_graphs.append(weighted_graph_prime)

    # sample cliques
    cliques = compute_cliques(graphs, config, logger)
    sample_cliques_table = cliques['children_cliques_train']
    # print(sample_cliques_table)
    sample_cliques = []
    for v in sample_cliques_table.values():
        sample_cliques = sample_cliques + v
    sample_cliques = [list(c) for c in sample_cliques]
    set_sample_cliques = list(set([tuple(sorted(e)) for e in sample_cliques]))
    print(f'len of origin: {len(sample_cliques)}, len of deduplicates: {len(set_sample_cliques)}')

    # reconstruct hyperedges
    reconstruct_hyperedges = utils.lazy_clique_edge_cover(np.copy(weighted_graph_prime), set_sample_cliques, len(graphs['simplicies_train']))

    reconstruct_weighted_graph = np.zeros_like(weighted_graph_prime)
    for e in reconstruct_hyperedges:
        for u, v in it.combinations(e, 2):
            reconstruct_weighted_graph[u, v] = reconstruct_weighted_graph[u, v] + 1
            reconstruct_weighted_graph[v, u] = reconstruct_weighted_graph[v, u] + 1

    gephi_graphs.append(reconstruct_weighted_graph)


rank: 8
epoch: 9, loss: 43.132774353027344
epoch: 19, loss: 13.561848640441895
epoch: 29, loss: 6.4168548583984375
epoch: 39, loss: 3.5660345554351807
epoch: 49, loss: 2.195965051651001
epoch: 59, loss: 1.4474256038665771
epoch: 69, loss: 1.0073539018630981
epoch: 79, loss: 0.7241190671920776
epoch: 89, loss: 0.5336789488792419
epoch: 99, loss: 0.4011552631855011
epoch: 109, loss: 0.3084661662578583
epoch: 119, loss: 0.24149440228939056
epoch: 129, loss: 0.19176004827022552
epoch: 139, loss: 0.154227152466774
epoch: 149, loss: 0.12541212141513824
epoch: 159, loss: 0.1029987558722496
epoch: 169, loss: 0.08540230989456177
epoch: 179, loss: 0.07142386585474014
epoch: 189, loss: 0.060199953615665436
epoch: 199, loss: 0.05111322179436684


  self._set_intXint(row, col, x.flat[0])
  self._set_arrayXarray(i, j, x)


Step:  10/400 Loss: 5.14526 Edge-Overlap: 0.305 Total-Time: 1
Step:  20/400 Loss: 4.13074 Edge-Overlap: 0.488 Total-Time: 3
Step:  30/400 Loss: 3.79684 Edge-Overlap: 0.594 Total-Time: 4
Step:  40/400 Loss: 3.63880 Edge-Overlap: 0.699 Total-Time: 6
Step:  50/400 Loss: 3.55421 Edge-Overlap: 0.751 Total-Time: 7
Step:  60/400 Loss: 3.50247 Edge-Overlap: 0.780 Total-Time: 9
Step:  70/400 Loss: 3.46868 Edge-Overlap: 0.806 Total-Time: 10


  edge_index_prime = torch.tensor(graph_prime.nonzero(), dtype=torch.long)
INFO:root:Start computing cliques
INFO:root:Found cache for max cliques train
INFO:root:Number of maximum cliques:624


Number of maximum cliques: 624


INFO:root:Optimizing clique sampler .. 
INFO:root:Found cache for rho.
INFO:root:[(16, 16), (2, 2), (1, 1), (3, 3), (4, 4), (13, 13), (5, 5), (7, 7), (6, 6), (19, 19), (15, 15), (9, 9), (18, 18), (14, 14), (8, 8), (11, 11), (21, 21), (10, 10), (12, 12), (4, 2), (23, 23), (3, 2), (9, 1), (16, 1), (6, 4), (3, 1), (5, 3), (5, 2), (18, 1), (6, 2), (2, 1), (6, 5), (15, 14), (4, 3), (19, 1), (5, 4), (7, 6), (7, 5), (8, 6), (7, 3), (7, 1), (9, 2), (11, 9), (20, 19), (8, 7), (8, 2), (7, 4)]
INFO:root:Optimize Clique Sampler: #hyperedges collected:714, recall: 0.65625, efficiency:0.065625


(0, 0), (1, 1), (165, 165), (177, 177), (243, 242), (311, 306), (320, 314), (380, 364), (415, 389), (458, 419), (461, 421), (464, 423), (471, 427), (473, 428), (485, 434), (510, 446), (519, 449), (526, 451), (540, 455), (559, 460), (967, 507), (976, 508), (1174, 522), (1237, 526), (1253, 527), (1898, 554), (2096, 562), (2696, 582), (3296, 601), (3332, 602), (3977, 618), (4305, 626), (4563, 632), (4608, 633), (4880, 638), (4937, 639), (5237, 644), (5482, 648), (6217, 660), (6917, 671), (8142, 687), (8387, 690), (8639, 693), (9134, 698), (9234, 699), (9434, 701), (10134, 708), 


INFO:root:Clique analysis done.


len of origin: 10869, len of deduplicates: 9507
rank: 10
epoch: 9, loss: 46.29668426513672
epoch: 19, loss: 14.718175888061523
epoch: 29, loss: 7.205690383911133
epoch: 39, loss: 4.164053916931152
epoch: 49, loss: 2.580524444580078
epoch: 59, loss: 1.7371816635131836
epoch: 69, loss: 1.2187588214874268
epoch: 79, loss: 0.8960748910903931
epoch: 89, loss: 0.671278715133667
epoch: 99, loss: 0.5103116631507874
epoch: 109, loss: 0.3928928077220917
epoch: 119, loss: 0.30733534693717957
epoch: 129, loss: 0.24418139457702637
epoch: 139, loss: 0.19653823971748352
epoch: 149, loss: 0.15998363494873047
epoch: 159, loss: 0.13150431215763092
epoch: 169, loss: 0.10908602178096771
epoch: 179, loss: 0.09127847850322723
epoch: 189, loss: 0.07695101946592331
epoch: 199, loss: 0.06530674546957016
Step:  10/400 Loss: 4.90909 Edge-Overlap: 0.355 Total-Time: 1
Step:  20/400 Loss: 3.89200 Edge-Overlap: 0.556 Total-Time: 3
Step:  30/400 Loss: 3.60473 Edge-Overlap: 0.678 Total-Time: 4
Step:  40/400 Loss: 3.48

INFO:root:Start computing cliques
INFO:root:Found cache for max cliques train
INFO:root:Number of maximum cliques:624


Number of maximum cliques: 624


INFO:root:Optimizing clique sampler .. 
INFO:root:Found cache for rho.
INFO:root:[(16, 16), (2, 2), (1, 1), (3, 3), (4, 4), (13, 13), (5, 5), (7, 7), (6, 6), (19, 19), (15, 15), (9, 9), (18, 18), (14, 14), (8, 8), (11, 11), (21, 21), (10, 10), (12, 12), (4, 2), (23, 23), (3, 2), (9, 1), (16, 1), (6, 4), (3, 1), (5, 3), (5, 2), (18, 1), (6, 2), (2, 1), (6, 5), (15, 14), (4, 3), (19, 1), (5, 4), (7, 6), (7, 5), (8, 6), (7, 3), (7, 1), (9, 2), (11, 9), (20, 19), (8, 7), (8, 2), (7, 4)]
INFO:root:Optimize Clique Sampler: #hyperedges collected:715, recall: 0.6571691176470589, efficiency:0.06571691176470588


(0, 0), (1, 1), (165, 165), (177, 177), (243, 242), (311, 306), (320, 314), (380, 364), (415, 389), (458, 419), (461, 421), (464, 423), (471, 427), (473, 428), (485, 434), (510, 446), (519, 449), (526, 451), (540, 455), (559, 460), (967, 507), (976, 508), (1174, 522), (1237, 526), (1253, 527), (1898, 554), (2096, 562), (2696, 582), (3296, 601), (3332, 602), (3977, 618), (4305, 626), (4563, 632), (4608, 633), (4880, 638), (4937, 639), (5237, 644), (5482, 648), (6217, 660), (6917, 671), (8142, 687), (8387, 690), (8639, 693), (9134, 698), (9234, 699), (9434, 701), (10134, 708), 


INFO:root:Clique analysis done.


len of origin: 10869, len of deduplicates: 9507
rank: 12
epoch: 9, loss: 40.73535919189453
epoch: 19, loss: 12.882791519165039
epoch: 29, loss: 6.048609733581543
epoch: 39, loss: 3.4413955211639404
epoch: 49, loss: 2.0556397438049316
epoch: 59, loss: 1.3742691278457642
epoch: 69, loss: 0.9489268660545349
epoch: 79, loss: 0.6839794516563416
epoch: 89, loss: 0.5080205202102661
epoch: 99, loss: 0.385675311088562
epoch: 109, loss: 0.2984364628791809
epoch: 119, loss: 0.23441892862319946
epoch: 129, loss: 0.18642999231815338
epoch: 139, loss: 0.149820938706398
epoch: 149, loss: 0.12151259928941727
epoch: 159, loss: 0.0994233563542366
epoch: 169, loss: 0.08207985758781433
epoch: 179, loss: 0.06834164261817932
epoch: 189, loss: 0.057339705526828766
epoch: 199, loss: 0.048456210643053055
Step:  10/400 Loss: 4.65478 Edge-Overlap: 0.452 Total-Time: 1
Step:  20/400 Loss: 3.77136 Edge-Overlap: 0.586 Total-Time: 3
Step:  30/400 Loss: 3.53121 Edge-Overlap: 0.726 Total-Time: 4
Step:  40/400 Loss: 3.4

INFO:root:Start computing cliques
INFO:root:Found cache for max cliques train
INFO:root:Number of maximum cliques:624


Number of maximum cliques: 624


INFO:root:Optimizing clique sampler .. 
INFO:root:Found cache for rho.
INFO:root:[(16, 16), (2, 2), (1, 1), (3, 3), (4, 4), (13, 13), (5, 5), (7, 7), (6, 6), (19, 19), (15, 15), (9, 9), (18, 18), (14, 14), (8, 8), (11, 11), (21, 21), (10, 10), (12, 12), (4, 2), (23, 23), (3, 2), (9, 1), (16, 1), (6, 4), (3, 1), (5, 3), (5, 2), (18, 1), (6, 2), (2, 1), (6, 5), (15, 14), (4, 3), (19, 1), (5, 4), (7, 6), (7, 5), (8, 6), (7, 3), (7, 1), (9, 2), (11, 9), (20, 19), (8, 7), (8, 2), (7, 4)]
INFO:root:Optimize Clique Sampler: #hyperedges collected:715, recall: 0.6571691176470589, efficiency:0.06571691176470588


(0, 0), (1, 1), (165, 165), (177, 177), (243, 242), (311, 306), (320, 314), (380, 364), (415, 389), (458, 419), (461, 421), (464, 423), (471, 427), (473, 428), (485, 434), (510, 446), (519, 449), (526, 451), (540, 455), (559, 460), (967, 507), (976, 508), (1174, 522), (1237, 526), (1253, 527), (1898, 554), (2096, 562), (2696, 582), (3296, 601), (3332, 602), (3977, 618), (4305, 626), (4563, 632), (4608, 633), (4880, 638), (4937, 639), (5237, 644), (5482, 648), (6217, 660), (6917, 671), (8142, 687), (8387, 690), (8639, 693), (9134, 698), (9234, 699), (9434, 701), (10134, 708), 


INFO:root:Clique analysis done.


len of origin: 10869, len of deduplicates: 9507


In [7]:




# random.shuffle(set_sample_hyperedges)
# sample_clique_sizes = [len(c) for c in set_sample_cliques]
# data = np.array(sample_clique_sizes)
# hist, bins = np.histogram(data, bins=np.linspace(0, 5, 6))
# sns.displot(data)
# reconstruct_hyperedges = utils.lazy_clique_edge_cover(weighted_adjacency_matrix, set_sample_cliques, len(graphs['simplicies_train']))
# reconstruct_hyperedges_sizes = [len(e) for e in reconstruct_hyperedges]
# data = np.array(reconstruct_hyperedges_sizes)
# sns.displot(data)
# set_reconstruct_hyperedges = set([tuple(sorted(e)) for e in reconstruct_hyperedges])
# print(f'len: {len(graphs['simplicies_train'])}, {graphs['simplicies_train']}')
# print(f'len: {len(set_reconstruct_hyperedges)}, {set_reconstruct_hyperedges}')


# print('original hypergraph', hypergraph_metrics(graphs['simplicies_train']))
# print('reconstructed hypergraph', hypergraph_metrics(set_reconstruct_hyperedges))

In [8]:
import csv

# graph_names = ['original', 'generating', 'reconstructing']
graph_names = ['rank-8', 'rank-10', 'rank-12']

fileds = ['Source','Target','Type','Kind','Id','Label','Weight']
with open('./email-Enron-rank-8-10-12.csv', 'w') as csvfile:
    csvwriter = csv.writer(csvfile, delimiter=',')
    csvwriter.writerow(fileds)
    idx = 1
    for graph_name, graph in zip(graph_names, gephi_graphs):
        triu_adjacency_matrix = np.triu(graph)
        x, y = triu_adjacency_matrix.nonzero()
        for i, j in zip(x, y):
            csvwriter.writerow([i, j, 'Undirected', graph_name, idx, graph_name, triu_adjacency_matrix[i][j]])
            idx += 1

In [12]:
import os
import networkx as nx
import community
import itertools
from collections import defaultdict
import pickle
import community.community_louvain as community



def hypergraph_metrics(hg):
    # original hypergraph
    num_edges = len(hg)
    nodes = set()
    node_degrees = {}
    for edge in hg:
        for node in edge:
            nodes.add(node)
            node_degrees[node] = node_degrees.get(node, 0) + 1
    num_nodes = len(nodes)
    
    # density
    density = num_edges / num_nodes

    # Average size
    avg_size = sum(len(edge) for edge in hg) / num_edges

    # Average degree
    avg_degree = sum(node_degrees.values()) / num_nodes


    # projected graph
    G = nx.Graph()
    # Add all nodes from the hypergraph
    nodes = set(node for edge in hg for node in edge)
    G.add_nodes_from(nodes)
    # For each hyperedge, create a clique
    for edge in hg:
        # Add edges between all pairs of nodes in the hyperedge
        G.add_edges_from(itertools.combinations(edge, 2))
    
    part_G = community.best_partition(G)
    mod_G = community.modularity(part_G, G)


    # bipartite graph
    B = nx.Graph()
    # Add nodes for the original vertices (left set)
    left_nodes = set(node for edge in hg for node in edge)
    B.add_nodes_from(left_nodes, bipartite=0)
    # Add nodes for the hyperedges (right set)
    right_nodes = [f'e{i}' for i in range(len(hg))]
    B.add_nodes_from(right_nodes, bipartite=1)
    # Add edges between vertices and their corresponding hyperedges
    for i, edge in enumerate(hg):
        for node in edge:
            B.add_edge(node, f'e{i}')


    part_B = community.best_partition(B)
    mod_B = community.modularity(part_B, B)

    return {
        "density": density,
        "average_size": avg_size,
        "average_degree": avg_degree,
        "coefficient": nx.average_clustering(G),
        "G_modularity": mod_G,
        "B_modularity": mod_B
    }

def load_hypergraph(path, model):
    with open(path, 'r') as f:
        hg = f.readlines()
    if model == 'HyperDK00' or model == 'HyperDK11' or model == 'HyperPLR':
        hg = [list(map(int, e.split())) for e in hg]
    else:
        hg = [list(map(int, e.split(','))) for e in hg]
    return hg

metric_baseline = defaultdict(list)


def get_metrics_baseline(graph_path):
    models = os.listdir(graph_path)
    for model in models:
        graphs = os.listdir(f'{graph_path}/{model}')
        for graph in graphs:
            hypergraphs = os.listdir(f'{graph_path}/{model}/{graph}')
            for hypergraph in hypergraphs:
                hg = load_hypergraph(f'{graph_path}/{model}/{graph}/{hypergraph}', model)
                metric = hypergraph_metrics(hg)
                print(metric)
                metric_baseline[(graph, model)].append(metric)

    return metric_baseline

        # for hypergraphs in gen_model:
        #     for hg_file in hypergraphs:
        #         hg = load_hypergraph(hg_file)
        #         metric = hypergraph_metrics(hg)
        #         print(metric)

metric_baseline = get_metrics_baseline('./generate_graphs')
metric_baseline


{'density': 272.3272171253823, 'average_size': 3.09036394874847, 'average_degree': 841.5902140672783, 'coefficient': 0.9987801177423495, 'G_modularity': 0.0, 'B_modularity': 0.3241682752329701}
{'density': 271.908256880734, 'average_size': 3.088827406257732, 'average_degree': 839.8776758409786, 'coefficient': 0.9987422922013388, 'G_modularity': 0.0, 'B_modularity': 0.3243577486939129}
{'density': 272.5107033639144, 'average_size': 3.084377910695649, 'average_degree': 840.525993883792, 'coefficient': 0.9985933868011069, 'G_modularity': 0.0, 'B_modularity': 0.32489706673304763}
{'density': 271.92660550458714, 'average_size': 3.091048133153396, 'average_degree': 840.5382262996942, 'coefficient': 0.9986682607617726, 'G_modularity': 0.0, 'B_modularity': 0.3241800808225187}
{'density': 272.1345565749235, 'average_size': 3.092034881107565, 'average_degree': 841.4495412844037, 'coefficient': 0.9990618200538959, 'G_modularity': 0.0, 'B_modularity': 0.3240892775350938}
{'density': 229.9090909090

defaultdict(list,
            {('contact-high-school',
              'HyperDK00'): [{'density': 272.3272171253823,
               'average_size': 3.09036394874847,
               'average_degree': 841.5902140672783,
               'coefficient': 0.9987801177423495,
               'G_modularity': 0.0,
               'B_modularity': 0.3241682752329701}, {'density': 271.908256880734,
               'average_size': 3.088827406257732,
               'average_degree': 839.8776758409786,
               'coefficient': 0.9987422922013388,
               'G_modularity': 0.0,
               'B_modularity': 0.3243577486939129}, {'density': 272.5107033639144,
               'average_size': 3.084377910695649,
               'average_degree': 840.525993883792,
               'coefficient': 0.9985933868011069,
               'G_modularity': 0.0,
               'B_modularity': 0.32489706673304763}, {'density': 271.92660550458714,
               'average_size': 3.091048133153396,
               'average

In [None]:
# pickle.dump(metric_baseline, open('./metric_baseline.pkl', 'wb'))