In [31]:
from utils import sbm_dataset, TeacherNet, LabelGeneration, communityPassing
import pathpyG as pp
import torch

root= r"C:\Users\david\PythonProjekte\Bachelor\TestGraph"

dataset = sbm_dataset.StochasticBlockModelDataset(
    root=root,
    block_sizes=[20, 20, 20],      
    edge_probs=[
        [0.20, 0.02, 0.02],
        [0.02, 0.20, 0.02],
        [0.02, 0.02, 0.20],
    ],   
    is_undirected=True,
    num_channels=5
)

In [32]:
data = dataset[0]
print(data)
print(data.x)
print(data.get_edge_index)
print(data.y)

Data(x=[60, 5], edge_index=[2, 320], y=[60])
tensor([[-7.1414e-01, -2.3441e+00,  1.2613e+00,  2.1721e+00,  1.3653e-01],
        [-5.5623e-01,  1.1993e+00,  5.3192e-01,  2.2204e+00,  3.0013e-01],
        [-3.9531e+00, -1.3429e+00,  3.0359e+00, -1.0861e-01,  2.7132e+00],
        [-1.2721e+00, -7.4850e-01,  1.6894e+00,  1.5753e+00,  1.1964e+00],
        [-2.3260e+00, -2.6960e+00,  1.6646e+00,  1.1867e+00,  1.2128e+00],
        [-3.1224e+00, -2.6539e+00,  2.7879e+00,  2.3431e-01,  2.3350e+00],
        [ 1.1646e-01,  1.3145e-01,  8.7490e-01,  1.2113e+00,  8.4448e-01],
        [-1.4612e+00, -1.9314e+00,  1.5811e+00,  1.4969e+00,  1.0985e+00],
        [ 9.5892e-01,  6.7349e-01,  3.6764e-01,  2.4848e+00, -3.3061e-01],
        [-1.9077e+00, -3.9594e+00,  2.2972e+00,  7.8866e-01,  1.3290e+00],
        [ 2.5142e+00,  7.2036e-01, -1.1975e+00,  9.7290e-01,  8.6780e-01],
        [-4.7860e-02,  2.0208e+00,  1.8444e+00,  2.7323e+00, -6.9510e-02],
        [ 1.6690e+00, -4.1813e-01,  2.2099e-01,  5.7118

In [33]:
from utils.LabelGeneration import LabelGeneration

data = dataset[0]

label_gen = LabelGeneration(
    in_channels=data.x.size(1),
    num_classes=5,
    hidden=64,
    temperature=1.0,
    stochastic=False,
    seed=42,
    device=data.x.device
)

data.labels = label_gen.teacher_labels(data)
print(data.labels)


tensor([3, 3, 3, 3, 0, 3, 3, 3, 3, 0, 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 0, 0, 0,
        0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 3,
        3, 3, 3, 0, 3, 3, 3, 3, 0, 3, 3, 3])


In [None]:
import torch.nn.functional as F
def make_splits(num_nodes: int, train_ratio=0.6, val_ratio=0.2, seed=0, device="cpu"):
    g = torch.Generator(device=device).manual_seed(seed)
    perm = torch.randperm(num_nodes, generator=g, device=device)

    n_train = int(train_ratio * num_nodes)
    n_val = int(val_ratio * num_nodes)

    train_idx = perm[:n_train]
    val_idx = perm[n_train:n_train + n_val]
    test_idx = perm[n_train + n_val:]

    train_mask = torch.zeros(num_nodes, dtype=torch.bool, device=device)
    val_mask = torch.zeros(num_nodes, dtype=torch.bool, device=device)
    test_mask = torch.zeros(num_nodes, dtype=torch.bool, device=device)

    train_mask[train_idx] = True
    val_mask[val_idx] = True
    test_mask[test_idx] = True

    return train_mask, val_mask, test_mask


@torch.no_grad()
def accuracy(logits, y, mask):
    pred = logits.argmax(dim=-1)
    return (pred[mask] == y[mask]).float().mean().item()


def train_model(data, model, lr=1e-2, weight_decay=5e-4, epochs=200):
    device = data.x.device
    model = model.to(device)

    opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    best_val = -1.0
    best_state = None

    for epoch in range(1, epochs + 1):
        model.train()
        opt.zero_grad()

        logits = model(data.x, data.edge_index, data.y)  
        loss = F.cross_entropy(logits[data.train_mask], data.y_task[data.train_mask])
        loss.backward()
        opt.step()

        model.eval()
        with torch.no_grad():
            logits = model(data.x, data.edge_index, data.y)
            train_acc = accuracy(logits, data.y_task, data.train_mask)
            val_acc = accuracy(logits, data.y_task, data.val_mask)
            test_acc = accuracy(logits, data.y_task, data.test_mask)

        if val_acc > best_val:
            best_val = val_acc
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}

        if epoch % 20 == 0 or epoch == 1:
            print(f"Epoch {epoch:03d} | loss {loss.item():.4f} | train {train_acc:.3f} | val {val_acc:.3f} | test {test_acc:.3f}")

    if best_state is not None:
        model.load_state_dict({k: v.to(device) for k, v in best_state.items()})

    model.eval()
    with torch.no_grad():
        logits = model(data.x, data.edge_index, data.y)
        train_acc = accuracy(logits, data.y_task, data.train_mask)
        val_acc = accuracy(logits, data.y_task, data.val_mask)
        test_acc = accuracy(logits, data.y_task, data.test_mask)

    print(f"Best checkpoint | train {train_acc:.3f} | val {val_acc:.3f} | test {test_acc:.3f}")
    return model


In [35]:
from models.communityGCN import CommunityGCN
from utils.LabelGeneration import LabelGeneration

data = dataset[0]

num_classes = 3

label_gen = LabelGeneration(
    in_channels=data.x.size(1),
    num_classes=num_classes,
    hidden=64,
    temperature=1.0,
    stochastic=False,
    seed=42,
    device=data.x.device
)

data.y_task = label_gen.teacher_labels(data)

data.train_mask, data.val_mask, data.test_mask = make_splits(
    data.num_nodes,
    train_ratio=0.6,
    val_ratio=0.2,
    seed=123,
    device=data.x.device
)

model = CommunityGCN(
    in_channels=data.x.size(1),
    hidden=64,
    num_classes=num_classes,
    dropout=0.2
)

trained = train_model(
    data,
    model,
    lr=1e-2,
    weight_decay=5e-4,
    epochs=200
)


Epoch 001 | loss 1.1531 | train 0.694 | val 0.750 | test 0.917
Epoch 020 | loss 0.2386 | train 0.917 | val 0.917 | test 0.833
Epoch 040 | loss 0.1871 | train 0.944 | val 0.917 | test 0.833
Epoch 060 | loss 0.1047 | train 0.944 | val 0.917 | test 0.833
Epoch 080 | loss 0.1906 | train 0.944 | val 0.833 | test 1.000
Epoch 100 | loss 0.1594 | train 0.944 | val 0.917 | test 0.917
Epoch 120 | loss 0.0511 | train 1.000 | val 0.917 | test 0.833
Epoch 140 | loss 0.0276 | train 1.000 | val 0.917 | test 0.833
Epoch 160 | loss 0.0319 | train 1.000 | val 0.917 | test 0.833
Epoch 180 | loss 0.1577 | train 1.000 | val 0.917 | test 0.917
Epoch 200 | loss 0.0159 | train 1.000 | val 0.917 | test 0.833
Best checkpoint | train 0.917 | val 1.000 | test 0.833


Schwerer Fall

In [None]:
dataset = sbm_dataset.StochasticBlockModelDataset(
    root=root,
    block_sizes=[80, 80, 80, 80],   
    edge_probs=[
        [0.15, 0.03, 0.02, 0.01],
        [0.03, 0.15, 0.03, 0.02],
        [0.02, 0.03, 0.15, 0.03],
        [0.01, 0.02, 0.03, 0.15],
    ],
    is_undirected=True,
    num_channels=32,             
    force_reload=True
)


Processing...
Done!


In [None]:
from utils.LabelGeneration import LabelGeneration

data = dataset[0]

num_classes = 6

label_gen = LabelGeneration(
    in_channels=data.x.size(1),
    num_classes=num_classes,
    hidden=64,
    temperature=1.8,    
    stochastic=True, 
    seed=42,
    device=data.x.device
)

data.y_task = label_gen.teacher_labels(data)
print(data.y_task.bincount())


tensor([56, 50, 33, 54, 54, 73])


In [38]:
data.train_mask, data.val_mask, data.test_mask = make_splits(
    data.num_nodes,
    train_ratio=0.6,
    val_ratio=0.2,
    seed=123,
    device=data.x.device
)


In [39]:
from models.communityGCN import CommunityGCN

model = CommunityGCN(
    in_channels=data.x.size(1),
    hidden=128,
    num_classes=num_classes,
    dropout=0.35
)


In [40]:
trained = train_model(
    data,
    model,
    lr=3e-3,
    weight_decay=1e-3,
    epochs=400
)


Epoch 001 | loss 2.2942 | train 0.188 | val 0.312 | test 0.219
Epoch 020 | loss 1.7174 | train 0.276 | val 0.109 | test 0.156
Epoch 040 | loss 1.6669 | train 0.380 | val 0.188 | test 0.188
Epoch 060 | loss 1.5378 | train 0.401 | val 0.125 | test 0.156
Epoch 080 | loss 1.4390 | train 0.526 | val 0.172 | test 0.203
Epoch 100 | loss 1.3119 | train 0.578 | val 0.141 | test 0.203
Epoch 120 | loss 1.2360 | train 0.474 | val 0.172 | test 0.125
Epoch 140 | loss 1.2896 | train 0.641 | val 0.156 | test 0.141
Epoch 160 | loss 1.1295 | train 0.625 | val 0.141 | test 0.188
Epoch 180 | loss 1.1849 | train 0.703 | val 0.094 | test 0.141
Epoch 200 | loss 1.0370 | train 0.708 | val 0.109 | test 0.172
Epoch 220 | loss 1.0790 | train 0.698 | val 0.156 | test 0.141
Epoch 240 | loss 1.0653 | train 0.734 | val 0.156 | test 0.172
Epoch 260 | loss 1.0226 | train 0.734 | val 0.141 | test 0.203
Epoch 280 | loss 1.2649 | train 0.651 | val 0.188 | test 0.203
Epoch 300 | loss 1.0319 | train 0.557 | val 0.188 | tes