In [1]:
from forceatlas2 import ForceAtlas2
import networkx as nx
import pandas as pd
import glob
import os, sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from tqdm import tqdm


from src.graph_dataset import GraphDataset
from src.graph_parser import parseGraphmlFile
from src.graph_utils import stress, total_stress, num_crossings, mean_edge_length, nodes_dict_to_array, distance_matrix
from src.graph_utils import gradient_kamada_kawai, max_neighbour_degrees_norm, sum_neighbour_degrees_norm, expansion_factor_norm, edge_crossings_norm
from src.graph_utils import max_j_node_centrality, sum_j_node_centrality, j_node_centrality, graph_entropy_norm

fa = ForceAtlas2()

No cython detected.  Install cython and compile the fa2util module for a 10-100x speed boost.


In [2]:
def read_list_of_graphs(dir_name,ext):
    list_graphs = [parseGraphmlFile(dir_name+f,weighted=False,directed=False) for f in os.listdir(dir_name) if f.endswith('.' + ext)]
    return list_graphs

In [None]:
benchmarks = ['random-dag', 'rome', 'north']

data = []
for bench in benchmarks:
    list_graphs = read_list_of_graphs(f'../data/{bench}/','graphml')
    for idx_graph, graph in tqdm(list(enumerate(list_graphs[n:m]))):

        # FA2 layout
        pos0 = fa.forceatlas2_networkx_layout(graph)

        # Compute general graph attributes
        eb = nx.edge_betweenness(graph)     # edge betweenness
        st = stress(graph, pos0)             # stress
        cross0 = num_crossings(graph, pos0) 
        edgel0 = mean_edge_length(graph, pos0)
        total_stress0 = total_stress(graph, pos0)
        deg = nx.degree(graph, graph.nodes)
        bridges = nx.bridges(graph)
        d0 = distance_matrix(graph) # Do we need this?

        for idx_edge, e in enumerate(graph.edges):
            n1, n2 = e
            
            # New position removing edge
            graph_copy = graph.copy()
            graph_copy.remove_edges_from([e])
            pos1 = fa.forceatlas2_networkx_layout(graph_copy, pos=pos0)
            pos1_arr = nodes_dict_to_array(pos1)
            pos0_arr = nodes_dict_to_array(pos0)
            cross1 = num_crossings(graph, pos1) 
            edgel1 = mean_edge_length(graph, pos1)
            total_stress1 = total_stress(graph, pos1)
            deg = nx.degree(graph, graph.nodes)
            exp_factor_norm = expansion_factor_norm(pos0_arr,pos1_arr)
            edge_cross_norm = edge_crossings_norm(cross0-cross1, len(graph_copy.edges))
            d1 = distance_matrix(graph_copy) # Do we need this?
            # Add new features computed linearly, physics inspired eventually
            # Extra attributes
            max_deg = max(deg[n1], deg[n2])
            min_deg = min(deg[n1], deg[n2])
            sum_neighbour_deg_norm = sum_neighbour_degrees_norm(graph_copy,e)
            max_neighbour_deg_norm = max_neighbour_degrees_norm(graph_copy,e)
            max_jnc = max_j_node_centrality(graph_copy,pos1_arr,e)
            sum_jnc = sum_j_node_centrality(graph_copy,pos1_arr,e)
            nnodes, nedges = len(graph.nodes), len(graph.edges)
            
            row = [idx_graph, idx_edge, nnodes, nedges, eb[e], st[e], max_deg, min_deg, e in bridges,
                total_stress0 - total_stress1, cross0 - cross1, edgel0 - edgel1,
                bench, exp_factor_norm, edge_cross_norm, sum_neighbour_deg_norm, max_neighbour_deg_norm, max_jnc, sum_jnc]
            data.append(row)

    cols = [] # Add column names when we have all attributes
    df = pd.DataFrame(data, columns=cols)