In [109]:
import numpy as np

def make_synthetic_graph(
    community_sizes,
    num_features,
    mean_scale,
    sigma,
    seed=42,
    undirected=True,
    edge_mode="pairwise",
    edge_params=None,
):
    rng = np.random.default_rng(seed)

    community_sizes = tuple(int(s) for s in community_sizes)
    k = len(community_sizes)
    n = int(sum(community_sizes))

    y = np.empty(n, dtype=int)
    communities = []
    idx = 0
    for c, size in enumerate(community_sizes):
        nodes = np.arange(idx, idx + size, dtype=int)
        y[nodes] = c
        communities.append(nodes)
        idx += size

    means = rng.normal(loc=0.0, scale=float(mean_scale), size=(k, num_features))
    X = np.empty((n, num_features), dtype=float)
    for c in range(k):
        nodes = communities[c]
        X[nodes] = rng.normal(loc=means[c], scale=float(sigma), size=(len(nodes), num_features))

    edge_params = edge_params or {}

    edge_set = set()

    def add_edge(u, v):
        if u == v:
            return
        if undirected:
            a, b = (u, v) if u < v else (v, u)
            edge_set.add((a, b))
        else:
            edge_set.add((u, v))

    if edge_mode == "pairwise":
        p_out = float(edge_params.get("p_out", 0.1))
        intra_multiplier = float(edge_params.get("intra_multiplier", 2.0))
        p_in = min(p_out * intra_multiplier, 1.0)

        for i in range(n):
            for j in range(i + 1, n):
                p = p_in if y[i] == y[j] else p_out
                if rng.random() < p:
                    add_edge(i, j)

    elif edge_mode == "infomap":
        p_in_extra = float(edge_params.get("p_in_extra", 0.25))
        bridges_per_pair = int(edge_params.get("bridges_per_pair", 1))
        p_out_noise = float(edge_params.get("p_out_noise", 0.0))

        for nodes in communities:
            m = len(nodes)
            for i in range(m):
                add_edge(int(nodes[i]), int(nodes[(i + 1) % m]))

        for nodes in communities:
            nodes_list = nodes.tolist()
            for a in range(len(nodes_list)):
                for b in range(a + 1, len(nodes_list)):
                    if rng.random() < p_in_extra:
                        add_edge(nodes_list[a], nodes_list[b])

        for c1 in range(k):
            for c2 in range(c1 + 1, k):
                for _ in range(bridges_per_pair):
                    u = int(rng.choice(communities[c1]))
                    v = int(rng.choice(communities[c2]))
                    add_edge(u, v)

        if p_out_noise > 0:
            for i in range(n):
                for j in range(i + 1, n):
                    if y[i] != y[j] and rng.random() < p_out_noise:
                        add_edge(i, j)

    else:
        raise ValueError(f"Unknown edge_mode: {edge_mode}")

    edges = list(edge_set)
    if undirected:
        edges = edges + [(v, u) for (u, v) in edges]

    edge_index = np.array(edges, dtype=np.int64).T
    if edge_index.size == 0:
        edge_index = np.zeros((2, 0), dtype=np.int64)

    return X, y, edge_index, means

from scipy.optimize import linear_sum_assignment

def clustering_correctness(y_true, y_pred):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)

    true_labels = np.unique(y_true)
    pred_labels = np.unique(y_pred)

    C = np.zeros((len(true_labels), len(pred_labels)), dtype=int)
    for i, t in enumerate(true_labels):
        for j, p in enumerate(pred_labels):
            C[i, j] = np.sum((y_true == t) & (y_pred == p))

    row_ind, col_ind = linear_sum_assignment(-C)

    label_map = {
        pred_labels[col]: true_labels[row]
        for row, col in zip(row_ind, col_ind)
    }

    y_pred_aligned = np.array([label_map[p] for p in y_pred])

    y_correct = (y_pred_aligned == y_true).astype(int)

    return y_correct, y_pred_aligned

In [110]:
X, y, edge_index, means = make_synthetic_graph(
    community_sizes=(5, 5, 6, 6, 8, 10),
    num_features=4,
    mean_scale=2.5,
    sigma=0.6,
    seed=42,
    undirected=True,
    edge_mode="pairwise",
    edge_params=dict(p_out=0.15, intra_multiplier=2.0),
)

print("X:", X.shape)
print("y:", y.shape)
print("edge_index:", edge_index.shape)
print("num edges:", edge_index.shape[1])


X: (40, 4)
y: (40,)
edge_index: (2, 316)
num edges: 316


In [111]:
import torch
import pathpyG as pp

edge_index = torch.as_tensor(edge_index, dtype=torch.long).contiguous()
g = pp.Graph.from_edge_index(edge_index=edge_index, num_nodes=40)

pp.plot(
    g.to_undirected(),     
    backend="d3js",

    layout="kk",           

    node_color=y,           
    cmap="tab10",

    node_size=16,
    edge_opacity=0.15,
    edge_size=1,

    show_labels=False,
)

<pathpyG.visualisations._d3js.backend.D3jsBackend at 0x224d8d94210>

In [112]:
'Infomap Clustering'


import igraph as ig

def edge_index_to_undirected_edges(edge_index):
    ei = torch.as_tensor(edge_index, dtype=torch.long)
    if ei.numel() == 0:
        return []
    edges = set()
    for u, v in ei.t().tolist():
        if u == v:
            continue
        a, b = (u, v) if u < v else (v, u)
        edges.add((a, b))
    return list(edges)

def run_infomap_igraph(edge_index, num_nodes, trials=50):
    edges = edge_index_to_undirected_edges(edge_index)

    g = ig.Graph(n=num_nodes, edges=edges, directed=False)

    comps = g.components()

    cl = g.community_infomap(trials=trials) 
    labels = np.full(num_nodes, -1, dtype=int)
    for cid, nodes in enumerate(cl):
        labels[list(nodes)] = cid

    return labels

y_infomap = run_infomap_igraph(edge_index, num_nodes=40, trials=100)
print(y_infomap)


[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0]


In [113]:
X, y, edge_index, means = make_synthetic_graph(
    community_sizes=(5, 5, 6, 6, 8, 10),
    num_features=4,
    mean_scale=2.5,
    sigma=0.6,
    seed=42,
    undirected=True,
    edge_mode="infomap",
    edge_params=dict(
        p_in_extra=0.30,
        bridges_per_pair=1,
        p_out_noise=0.01,
    ),
)

In [114]:
import torch
import pathpyG as pp

edge_index = torch.as_tensor(edge_index, dtype=torch.long).contiguous()
g = pp.Graph.from_edge_index(edge_index=edge_index, num_nodes=40)

pp.plot(
    g.to_undirected(),     
    backend="d3js",

    layout="kk",           
    node_color=y,           
    cmap="tab10",

    node_size=16,
    edge_opacity=0.15,
    edge_size=1,

    show_labels=False,
)

<pathpyG.visualisations._d3js.backend.D3jsBackend at 0x224d8d96820>

In [115]:
y_infomap = run_infomap_igraph(edge_index, num_nodes=40, trials=100)
print(y_infomap)

[0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 3 3 3 3 3 3 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5
 5 5 5]


In [116]:
pp.plot(
    g.to_undirected(),     
    backend="d3js",

    layout="kk",           

    node_color=y_infomap,           
    cmap="tab10",

    node_size=16,
    edge_opacity=0.15,
    edge_size=1,

    show_labels=False,
)

<pathpyG.visualisations._d3js.backend.D3jsBackend at 0x224d8d95b00>

In [117]:
y_correct, y_infomap_aligned = clustering_correctness(y, y_infomap)

print("Accuracy:", y_correct.mean())
print("Correct nodes:", y_correct.sum(), "/", len(y_correct))

Accuracy: 1.0
Correct nodes: 40 / 40


In [118]:
'Mittelmäßig guter Graph'

X, y, edge_index, means = make_synthetic_graph(
    community_sizes=(5, 5, 6, 6, 8, 10),
    num_features=4,
    mean_scale=2.5,
    sigma=0.6,
    seed=42,
    undirected=True,
    edge_mode="infomap",
    edge_params=dict(
        p_in_extra=0.18,
        bridges_per_pair=2,
        p_out_noise=0.015,
    ),
)

In [119]:
y_infomap = run_infomap_igraph(edge_index, num_nodes=40, trials=100)
print(y_infomap)

[0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 2 3 0 0 2 2 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4
 4 4 4]


In [120]:
y_correct, y_infomap_aligned = clustering_correctness(y, y_infomap)

print("Accuracy:", y_correct.mean())
print("Correct nodes:", y_correct.sum(), "/", len(y_correct))

Accuracy: 0.85
Correct nodes: 34 / 40


In [121]:
import torch
import pathpyG as pp

edge_index = torch.as_tensor(edge_index, dtype=torch.long).contiguous()
g = pp.Graph.from_edge_index(edge_index=edge_index, num_nodes=40)

pp.plot(
    g.to_undirected(),     
    backend="d3js",

    layout="kk",           
    node_color=y,           
    cmap="tab10",

    node_size=16,
    edge_opacity=0.15,
    edge_size=1,

    show_labels=False,
)

<pathpyG.visualisations._d3js.backend.D3jsBackend at 0x224d8d94520>

In [10]:
from torch_geometric.datasets import StochasticBlockModelDataset
import pathpyG as pp

root= r"C:\Users\david\PythonProjekte\Bachelor\TestGraph"

dataset = StochasticBlockModelDataset(
    root=root,
    block_sizes=[20, 20, 20],      
    edge_probs=[
        [0.20, 0.02, 0.02],
        [0.02, 0.20, 0.02],
        [0.02, 0.02, 0.20],
    ],   
    is_undirected=True,
)

data = dataset[0]
print(data)

edge_index = data.edge_index
print(edge_index)
print(y)
y= data.y
g = pp.Graph.from_edge_index(edge_index=edge_index)

pp.plot(   
    g,
    backend="d3js",

    layout="kk",           
    node_color=y,           
    cmap="tab10",

    node_size=16,
    edge_opacity=0.15,
    edge_size=1,

    show_labels=False,
)

Data(edge_index=[2, 290], y=[60])
tensor([[ 0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  3,
          3,  4,  4,  4,  4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  8,
          8,  8,  9,  9, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12,
         12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 16, 16,
         16, 16, 17, 17, 17, 17, 17, 17, 17, 18, 18, 19, 19, 19, 20, 20, 20, 20,
         20, 20, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
         23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 26, 26, 26,
         26, 26, 26, 27, 27, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 31,
         31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34,
         34, 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37,
         37, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 41,
         41, 41, 41, 41, 42, 42, 42, 42, 42, 43, 43, 43, 44, 44, 44, 44, 44

Processing...
Done!


<pathpyG.visualisations._d3js.backend.D3jsBackend at 0x1c67840cc50>

In [None]:
from datasets import load_dataset

ds = load_dataset("graphs-datasets/MUTAG")
data = ds.data

print(data)



{'train': MemoryMappedTable
edge_index: list<item: list<item: int64>>
  child 0, item: list<item: int64>
      child 0, item: int64
node_feat: list<item: list<item: double>>
  child 0, item: list<item: double>
      child 0, item: double
edge_attr: list<item: list<item: double>>
  child 0, item: list<item: double>
      child 0, item: double
y: list<item: int64>
  child 0, item: int64
num_nodes: int64
----
edge_index: [[[[0,0,1,1,2,...,14,14,14,15,16],[1,5,0,2,1,...,12,15,16,14,14]],[[0,0,1,1,2,...,10,10,10,11,12],[1,9,0,2,1,...,8,11,12,10,10]],...,[[0,0,1,1,2,...,18,18,18,19,20],[1,5,0,2,1,...,16,19,20,18,18]],[[0,0,1,1,2,...,13,13,13,14,15],[1,12,0,2,1,...,8,14,15,13,13]]]]
node_feat: [[[[1,0,0,0,0,0,0],[1,0,0,0,0,0,0],...,[0,0,1,0,0,0,0],[0,0,1,0,0,0,0]],[[1,0,0,0,0,0,0],[1,0,0,0,0,0,0],...,[0,0,1,0,0,0,0],[0,0,1,0,0,0,0]],...,[[1,0,0,0,0,0,0],[1,0,0,0,0,0,0],...,[0,0,1,0,0,0,0],[0,0,1,0,0,0,0]],[[1,0,0,0,0,0,0],[1,0,0,0,0,0,0],...,[0,0,1,0,0,0,0],[0,0,1,0,0,0,0]]]]
edge_attr: [[[[1

In [None]:
import torch

graph = ds["train"][1]

edge_index = torch.tensor(graph["edge_index"], dtype=torch.long)

graph = pp.Graph.from_edge_index(edge_index=edge_index)

pp.plot(   
    g,
    backend="d3js",

    layout="kk",         

    node_size=16,
    edge_opacity=0.15,
    edge_size=1,

    show_labels=False,
)

<pathpyG.visualisations._d3js.backend.D3jsBackend at 0x21c8e301e50>