In [3]:

# Example usage:
file1 = "/home/ofrie.r/ncp_expand/target/random/facebook_random_100.edgelist"
file2 = "/home/ofrie.r/ncp_expand/target/random/facebook_random_100_old.edgelist"

In [5]:
from collections import defaultdict

def read_edgelist(path):
    """Read an edgelist file into a set of edges (as sorted tuples)."""
    edges = set()
    with open(path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 2:
                continue  # skip malformed lines
            u, v = parts
            edge = tuple(sorted((u, v)))  # undirected
            edges.add(edge)
    return edges

def compare_edgelists(file1, file2):
    edges1 = read_edgelist(file1)
    edges2 = read_edgelist(file2)

    common = edges1 & edges2
    only_in_1 = edges1 - edges2
    only_in_2 = edges2 - edges1

    # Build nodeâ†’edges mapping for the diff edges
    node_to_edges1 = defaultdict(list)
    node_to_edges2 = defaultdict(list)
    for u, v in only_in_1:
        node_to_edges1[u].append((u, v))
        node_to_edges1[v].append((u, v))
    for u, v in only_in_2:
        node_to_edges2[u].append((u, v))
        node_to_edges2[v].append((u, v))

    # Find overlapping nodes across diff edges
    shared_nodes = set(node_to_edges1.keys()) & set(node_to_edges2.keys())

    # Print stats
    print(f"File1: {file1}, edges={len(edges1)}")
    print(f"File2: {file2}, edges={len(edges2)}")
    print(f"Common edges: {len(common)}")
    print(f"Unique to {file1}: {len(only_in_1)}")
    print(f"Unique to {file2}: {len(only_in_2)}")
    print(f"Nodes shared between diff edges: {len(shared_nodes)}")

    # Print overlap details
    if not shared_nodes:
        print("\nNo nodes are shared between the different edges.")
    else:
        for node in shared_nodes:
            print(f"\nNode {node} is in unique edges of both files:")
            print(f"  {file1}: {node_to_edges1[node]}")
            print(f"  {file2}: {node_to_edges2[node]}")

compare_edgelists(file1, file2)


File1: /home/ofrie.r/ncp_expand/target/random/facebook_random_100.edgelist, edges=88334
File2: /home/ofrie.r/ncp_expand/target/random/facebook_random_100_old.edgelist, edges=88334
Common edges: 88234
Unique to /home/ofrie.r/ncp_expand/target/random/facebook_random_100.edgelist: 100
Unique to /home/ofrie.r/ncp_expand/target/random/facebook_random_100_old.edgelist: 100
Nodes shared between diff edges: 6

Node 2433 is in unique edges of both files:
  /home/ofrie.r/ncp_expand/target/random/facebook_random_100.edgelist: [('2433', '629')]
  /home/ofrie.r/ncp_expand/target/random/facebook_random_100_old.edgelist: [('2433', '3522')]

Node 2936 is in unique edges of both files:
  /home/ofrie.r/ncp_expand/target/random/facebook_random_100.edgelist: [('2936', '3091')]
  /home/ofrie.r/ncp_expand/target/random/facebook_random_100_old.edgelist: [('2936', '3058')]

Node 897 is in unique edges of both files:
  /home/ofrie.r/ncp_expand/target/random/facebook_random_100.edgelist: [('1999', '897')]
  /ho