In [68]:
nx.__version__

'3.4.2'

In [96]:
# generate_fgraph_features.py
# ---------------------------
# run:
#   python generate_fgraph_features.py \
#          --input  fgraph_edges.csv   \
#          --output fgraph_features.csv

import argparse, ast, warnings
from collections import Counter
from math import log2

import networkx as nx
import numpy as np
import pandas as pd
from tqdm import tqdm
from scipy.stats import skew

from collections import defaultdict

def compute_node_orbits(graph):
    matcher = nx.algorithms.isomorphism.GraphMatcher(graph, graph)
    orbit_map = defaultdict(set)

    for iso in matcher.isomorphisms_iter():
        for u, v in iso.items():
            orbit_map[u].add(v)

    # Return list of unique orbits
    seen = set()
    orbits = []
    for group in orbit_map.values():
        g_frozen = frozenset(group)
        if g_frozen not in seen:
            seen.add(g_frozen)
            orbits.append(group)

    return orbits


# --------------------------------------------------------------------------- #
# helpers
# --------------------------------------------------------------------------- #
def shannon_entropy(counter: Counter) -> float:
    tot = sum(counter.values())
    return np.nan if tot == 0 else -sum((c / tot) * log2(c / tot) for c in counter.values())


def try_or_nan(func, *args, **kwargs):
    """Execute func(*args, **kwargs); return NaN on ANY Exception."""
    try:
        return func(*args, **kwargs)
    except Exception:
        return np.nan
    
def extract_faces(embedding: nx.PlanarEmbedding):
    """Returns a list of faces (as lists of nodes) from a PlanarEmbedding."""
    seen = set()
    faces = []

    for u in embedding:
        for v in embedding[u]:
            if (u, v) in seen:
                continue
            face = embedding.traverse_face(u, v)
            faces.append(face)
            seen.update((face[i], face[(i+1)%len(face)]) for i in range(len(face)))

    return faces



# --------------------------------------------------------------------------- #
# feature extractor
# --------------------------------------------------------------------------- #
def extract_features(edge_list):
    G = nx.Graph()
    G.add_edges_from(edge_list)
    feats = {}
    n = G.number_of_nodes()

    # I. Basic ----------------------------------------------------------------
    degs = [d for _, d in G.degree()]
    dh   = Counter(degs)

    feats.update({
        "Basic_num_nodes"          : n,
        "Basic_num_edges"          : G.number_of_edges(),
        "Basic_min_degree"         : min(degs) if degs else np.nan,
        "Basic_max_degree"         : max(degs) if degs else np.nan,
        "Basic_avg_degree"         : float(np.mean(degs)) if degs else np.nan,
        "Basic_degree_std"         : float(np.std(degs))  if degs else np.nan,
        "Basic_degree_skew"        : float(skew(degs))    if len(degs) > 2 else np.nan,
        "Basic_density"            : nx.density(G),
        "Basic_edge_to_node_ratio" : G.number_of_edges() / n if n else np.nan,
        "Basic_degree_entropy"     : shannon_entropy(dh),
    })

    # II. Connectivity -------------------------------------------------------
    feats.update({
        "Connectivity_is_connected"             : nx.is_connected(G),
        "Connectivity_num_components"           : nx.number_connected_components(G),
        "Connectivity_diameter"                 : try_or_nan(nx.diameter,                 G),
        "Connectivity_radius"                   : try_or_nan(nx.radius,                   G),
        "Connectivity_avg_shortest_path_length" : try_or_nan(nx.average_shortest_path_length, G),
        "Connectivity_wiener_index"             : try_or_nan(lambda g: nx.wiener_index(g), G),
    })

    # III. Centrality --------------------------------------------------------
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        bc = try_or_nan(nx.betweenness_centrality, G, normalized=True)
        cc = try_or_nan(nx.closeness_centrality,  G)
        try:
            ec = nx.eigenvector_centrality_numpy(G)
        except Exception:
            ec = np.nan

    def stats(d):
        if isinstance(d, dict) and d:
            vals = list(d.values())
            return {"mean": np.mean(vals), "max": np.max(vals),
                    "std": np.std(vals),   "skew": skew(vals) if len(vals) > 2 else np.nan}
        return dict.fromkeys(("mean", "max", "std", "skew"), np.nan)

    feats.update({f"Centrality_betweenness_{k}": v for k, v in stats(bc).items()})
    feats.update({f"Centrality_closeness_{k}":  v for k, v in stats(cc).items()})
    feats.update({f"Centrality_eigenvector_{k}": v for k, v in stats(ec).items()})

    # IV. Core structure -----------------------------------------------------
    core_numbers = try_or_nan(nx.core_number, G)
    if isinstance(core_numbers, dict) and core_numbers:
        core_vals = list(core_numbers.values())
        feats["Core_max_core_index"]  = max(core_vals)
        feats["Core_core_index_mean"] = float(np.mean(core_vals))
    else:
        feats["Core_max_core_index"]  = np.nan
        feats["Core_core_index_mean"] = np.nan

    # V. Robustness ----------------------------------------------------------
    feats["Robust_articulation_points"] = try_or_nan(lambda g: len(list(nx.articulation_points(g))), G)
    feats["Robust_bridge_count"]        = try_or_nan(lambda g: len(list(nx.bridges(g))),            G)

    # VI. Cycle extras -------------------------------------------------------
    cbasis  = nx.cycle_basis(G)
    feats["Cycle_num_cycles_len_5"] = sum(1 for c in cbasis if len(c) == 5)
    feats["Cycle_num_cycles_len_6"] = sum(1 for c in cbasis if len(c) == 6)

    # VII. Spectral & Kirchhoff ---------------------------------------------
    try:
        L     = nx.laplacian_matrix(G).todense()
        leigs = np.sort(np.linalg.eigvalsh(L))
        feats["Spectral_algebraic_connectivity"] = leigs[1]           if len(leigs) > 1 else np.nan
        feats["Spectral_spectral_gap"]           = leigs[1]-leigs[0]  if len(leigs) > 1 else np.nan
        feats["Spectral_laplacian_mean"]         = np.mean(leigs)
        feats["Spectral_laplacian_std"]          = np.std(leigs)
        feats["Spectral_laplacian_skew"]         = skew(leigs) if len(leigs) > 2 else np.nan

        # first 10 eigen-values (pad with NaN)
        pad = np.full(10, np.nan)
        pad[:min(10, len(leigs))] = leigs[:10]
        feats.update({f"Spectral_lap_eig_{i}": pad[i] for i in range(10)})

        # Kirchhoff index (use non-zero eigenvalues)
        nonzero = leigs[leigs > 1e-12]
        feats["Kirchhoff_index"] = n * np.sum(1.0 / nonzero) if len(nonzero) >= 1 else np.nan
    except Exception:
        feats.update({
            "Spectral_algebraic_connectivity": np.nan,
            "Spectral_spectral_gap":           np.nan,
            "Spectral_laplacian_mean":         np.nan,
            "Spectral_laplacian_std":          np.nan,
            "Spectral_laplacian_skew":         np.nan,
            **{f"Spectral_lap_eig_{i}": np.nan for i in range(10)},
            "Kirchhoff_index": np.nan,
        })

    # VIII. Planarity --------------------------------------------------------
    try:
        planar, emb = nx.check_planarity(G)
        #print(planar)
        #print(emb)
        if planar:
            #print('asd')
            feats["Planarity_num_faces"] = G.number_of_edges() - n + 2
            face_list = extract_faces(emb)
            f_sizes = [len(face) for face in face_list]
            #f_sizes = [len(f) for f in emb.faces()]

            feats["Planarity_face_size_mean"] = np.mean(f_sizes) if f_sizes else np.nan
            feats["Planarity_face_size_max"]  = max(f_sizes)     if f_sizes else np.nan
        else:  # non-planar
            feats["Planarity_num_faces"]      = np.nan
            feats["Planarity_face_size_mean"] = np.nan
            feats["Planarity_face_size_max"]  = np.nan
    except Exception:
        feats["Planarity_num_faces"]      = np.nan
        feats["Planarity_face_size_mean"] = np.nan
        feats["Planarity_face_size_max"]  = np.nan

    # IX. Symmetry -----------------------------------------------------------
    try:
        GM       = nx.algorithms.isomorphism.GraphMatcher(G, G)
        aut_size = len(list(GM.isomorphisms_iter()))
        
        orbits = compute_node_orbits(G)

        feats["Symmetry_automorphism_group_order"] = aut_size
        feats["Symmetry_num_orbits"]               = len(orbits)
        feats["Symmetry_orbit_size_max"]           = max(len(o) for o in orbits) if orbits else np.nan
    except Exception:
        feats["Symmetry_automorphism_group_order"] = np.nan
        feats["Symmetry_num_orbits"]               = np.nan
        feats["Symmetry_orbit_size_max"]           = np.nan

    return feats


# --------------------------------------------------------------------------- #
# CLI driver
# --------------------------------------------------------------------------- #
def main(inp, out):
    df = pd.read_csv(inp)
    if "EDGES" not in df.columns:
        raise ValueError("CSV must contain an 'EDGES' column")

    df["EDGES"] = df["EDGES"].apply(ast.literal_eval)

    tqdm.pandas(desc="Extracting graph features")
    feat_df = pd.DataFrame(df["EDGES"].progress_apply(extract_features).tolist())

    keep_cols = [c for c in ("COEFFICIENTS",) if c in df.columns]
    final_df  = pd.concat([df[keep_cols], feat_df], axis=1)

    final_df.to_csv(out, index=False)
    print(f"✓ Feature table saved → {out}")





In [107]:
main('/Users/rezadoobary/Documents/ML-correlator/Graph_Edge_Data/den_graph_data_10.csv',\
     '/Users/rezadoobary/Documents/ML-correlator/Tree classifier for graphs/mixed_loops/features_tabular/10loopfeats.csv')

  "Basic_degree_skew"        : float(skew(degs))    if len(degs) > 2 else np.nan,
  "std": np.std(vals),   "skew": skew(vals) if len(vals) > 2 else np.nan}
  "Basic_degree_skew"        : float(skew(degs))    if len(degs) > 2 else np.nan,
  "Basic_degree_skew"        : float(skew(degs))    if len(degs) > 2 else np.nan,
  "std": np.std(vals),   "skew": skew(vals) if len(vals) > 2 else np.nan}
  "Basic_degree_skew"        : float(skew(degs))    if len(degs) > 2 else np.nan,
  "std": np.std(vals),   "skew": skew(vals) if len(vals) > 2 else np.nan}
  "Basic_degree_skew"        : float(skew(degs))    if len(degs) > 2 else np.nan,
  "std": np.std(vals),   "skew": skew(vals) if len(vals) > 2 else np.nan}
  "Basic_degree_skew"        : float(skew(degs))    if len(degs) > 2 else np.nan,
  "std": np.std(vals),   "skew": skew(vals) if len(vals) > 2 else np.nan}
  "Basic_degree_skew"        : float(skew(degs))    if len(degs) > 2 else np.nan,
  "std": np.std(vals),   "skew": skew(vals) if len(vals)

KeyboardInterrupt: 

In [106]:
pd.read_csv("/Users/rezadoobary/Documents/ML-correlator/Tree classifier for graphs/mixed_loops/features_tabular/9loopfeats.csv")

Unnamed: 0,COEFFICIENTS,Basic_num_nodes,Basic_num_edges,Basic_min_degree,Basic_max_degree,Basic_avg_degree,Basic_degree_std,Basic_degree_skew,Basic_density,Basic_edge_to_node_ratio,...,Spectral_lap_eig_7,Spectral_lap_eig_8,Spectral_lap_eig_9,Kirchhoff_index,Planarity_num_faces,Planarity_face_size_mean,Planarity_face_size_max,Symmetry_automorphism_group_order,Symmetry_num_orbits,Symmetry_orbit_size_max
0,1,13,33,4,6,5.076923,0.474186,0.230525,0.423077,2.538462,...,6.316227,6.462598,6.708353,33.589509,22,3.000000,3,4,6,4
1,1,13,32,4,6,4.923077,0.474186,-0.230525,0.410256,2.461538,...,6.000000,6.389224,6.661891,34.974334,21,3.047619,4,2,8,2
2,1,13,31,4,5,4.769231,0.421325,-1.278019,0.397436,2.384615,...,6.000000,6.201015,6.574197,36.521209,20,3.100000,4,2,7,2
3,1,13,33,4,6,5.076923,0.615385,-0.046875,0.423077,2.538462,...,6.209654,6.570083,6.928693,33.912318,22,3.000000,3,2,7,2
4,1,13,32,4,6,4.923077,0.474186,-0.230525,0.410256,2.461538,...,6.194070,6.404493,6.579685,35.127514,21,3.047619,4,1,13,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13967,0,13,32,4,6,4.923077,0.916644,0.152471,0.410256,2.461538,...,5.663112,6.209057,6.210717,39.773890,21,3.047619,4,2,9,2
13968,0,13,30,4,6,4.615385,0.923077,0.833333,0.384615,2.307692,...,5.102196,5.254102,6.618034,49.014368,19,3.157895,4,16,4,4
13969,0,13,28,4,5,4.307692,0.461538,0.833333,0.358974,2.153846,...,4.482859,5.254102,6.230725,75.612069,17,3.294118,7,32,4,4
13970,0,13,33,4,7,5.076923,1.071414,0.222049,0.423077,2.538462,...,5.653615,6.519120,6.936457,38.560318,22,3.000000,3,2,9,2


In [75]:
main('/Users/rezadoobary/Documents/ML-correlator/Graph_Edge_Data/den_graph_data_6.csv',\
     '/Users/rezadoobary/Documents/ML-correlator/Tree classifier for graphs/mixed_loops/features_tabular/6loopfeats.csv')

Extracting graph features:   0%|          | 0/31 [00:00<?, ?it/s]

  "Basic_degree_skew"        : float(skew(degs))    if len(degs) > 2 else np.nan,
  "std": np.std(vals),   "skew": skew(vals) if len(vals) > 2 else np.nan}
  "Basic_degree_skew"        : float(skew(degs))    if len(degs) > 2 else np.nan,
  "std": np.std(vals),   "skew": skew(vals) if len(vals) > 2 else np.nan}
  "Basic_degree_skew"        : float(skew(degs))    if len(degs) > 2 else np.nan,
  "std": np.std(vals),   "skew": skew(vals) if len(vals) > 2 else np.nan}
Extracting graph features: 100%|██████████| 31/31 [00:00<00:00, 119.00it/s]

✓ Feature table saved → /Users/rezadoobary/Documents/ML-correlator/Tree classifier for graphs/mixed_loops/features_tabular/6loopfeats.csv





In [76]:
pd.read_csv("/Users/rezadoobary/Documents/ML-correlator/Tree classifier for graphs/mixed_loops/features_tabular/6loopfeats.csv")

Unnamed: 0,COEFFICIENTS,Basic_num_nodes,Basic_num_edges,Basic_min_degree,Basic_max_degree,Basic_avg_degree,Basic_degree_std,Basic_degree_skew,Basic_density,Basic_edge_to_node_ratio,...,Spectral_lap_eig_7,Spectral_lap_eig_8,Spectral_lap_eig_9,Kirchhoff_index,Planarity_num_faces,Planarity_face_size_mean,Planarity_face_size_max,Symmetry_automorphism_group_order,Symmetry_num_orbits,Symmetry_orbit_size_max
0,1,10,24,4,5,4.8,0.4,-1.5,0.533333,2.4,...,6.561553,7.0,7.0,19.177795,,,,,,
1,1,10,24,4,6,4.8,0.6,0.111111,0.533333,2.4,...,6.675131,6.675131,7.675131,19.260092,,,,,,
2,1,10,23,4,5,4.6,0.489898,-0.408248,0.511111,2.3,...,6.561553,6.675131,7.0,20.294601,,,,,,
3,1,10,24,4,7,4.8,0.979796,1.046136,0.533333,2.4,...,6.434805,7.150632,8.598124,19.799899,,,,,,
4,1,10,21,4,5,4.2,0.4,1.5,0.466667,2.1,...,5.801938,6.457836,6.866679,23.004816,,,,,,
5,1,10,22,4,6,4.4,0.663325,1.397916,0.488889,2.2,...,6.0,6.884738,7.44949,21.799143,,,,,,
6,1,10,24,4,6,4.8,0.748331,0.343622,0.533333,2.4,...,6.538445,7.0,8.043058,19.65034,,,,,,
7,0,10,23,4,5,4.6,0.489898,-0.408248,0.511111,2.3,...,6.522418,6.873211,7.0,20.470602,,,,,,
8,1,10,24,4,6,4.8,0.748331,0.343622,0.533333,2.4,...,6.579425,7.316265,7.843404,19.459211,,,,,,
9,1,10,23,4,6,4.6,0.663325,0.657843,0.511111,2.3,...,6.561553,6.675131,7.561553,20.41564,,,,,,


In [60]:
main('/Users/rezadoobary/Documents/ML-correlator/Graph_Edge_Data/den_graph_data_7.csv',\
     '/Users/rezadoobary/Documents/ML-correlator/Tree classifier for graphs/mixed_loops/features_tabular/7loopfeats.csv')




[A[A[A


  "Basic_degree_skew"        : float(skew(degs)) if len(degs) > 2 else 0,
  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))



[A[A[A


Extracting features: 100%|██████████| 164/164 [00:00<00:00, 452.31it/s]

✓ Saved feature table → /Users/rezadoobary/Documents/ML-correlator/Tree classifier for graphs/mixed_loops/features_tabular/7loopfeats.csv





In [61]:
main('/Users/rezadoobary/Documents/ML-correlator/Graph_Edge_Data/den_graph_data_8.csv',\
     '/Users/rezadoobary/Documents/ML-correlator/Tree classifier for graphs/mixed_loops/features_tabular/8loopfeats.csv')




  "Basic_degree_skew"        : float(skew(degs)) if len(degs) > 2 else 0,
  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))



[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


Extracting features: 100%|██████████| 1432/1432 [00:03<00:00, 471.70it/s]

✓ Saved feature table → /Users/rezadoobary/Documents/ML-correlator/Tree classifier for graphs/mixed_loops/features_tabular/8loopfeats.csv





In [62]:
main('/Users/rezadoobary/Documents/ML-correlator/Graph_Edge_Data/den_graph_data_9.csv',\
     '/Users/rezadoobary/Documents/ML-correlator/Tree classifier for graphs/mixed_loops/features_tabular/9loopfeats.csv')




[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


  "Basic_degree_skew"        : float(skew(degs)) if len(degs) > 2 else 0,
  return float((xy * (M - ab)).sum() / np.sqrt(vara * varb))



[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A[A


[A[A

✓ Saved feature table → /Users/rezadoobary/Documents/ML-correlator/Tree classifier for graphs/mixed_loops/features_tabular/9loopfeats.csv
