In [1]:
import torch
import torch_geometric.graphgym.register as register
from torch_geometric.graphgym.config import cfg
from torch_geometric.graphgym.models.gnn import GNNPreMP
from torch_geometric.graphgym.models.layer import (new_layer_config,BatchNorm1dNode)
from torch_geometric.graphgym.register import register_network

In [17]:
import networkx as nx
import torch
import numpy as np
from torch import Tensor
import os
import random
from torch_geometric.data import Data
from itertools import combinations
import matplotlib.pyplot as plt
from torch_geometric.nn import GCNConv,  global_mean_pool
import torch.nn.functional as F
from torch.optim import Adam
from torch_geometric.loader import DataLoader
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np

In [13]:
#load attributes from the whole graph
def get_graph_attributes(graph):
    """Retrieves global graph-level attributes from GraphML metadata."""
    attributes = {
        "edge_crossings": graph.graph.get("edge_crossings", 0),
        "diameter": graph.graph.get("diameter", 0),
        "avg_shortest_path": graph.graph.get("avg_shortest_path", 0),
        "num_components": graph.graph.get("num_components", 0),
        "density": graph.graph.get("density", 0),
        "assortativity": graph.graph.get("assortativity", 0),
    }
    return attributes

def get_node_features(graph):
    """Retrieves node attributes from GraphML."""
    node_features = []
    node_mapping = {node: i for i, node in enumerate(graph.nodes())}

    for node in graph.nodes():
        x_pos = float(graph.nodes[node].get("x_pos", 0))
        y_pos = float(graph.nodes[node].get("y_pos", 0))
        degree = graph.degree[node]
        clustering = float(graph.nodes[node].get("clustering", 0))
        betweenness = float(graph.nodes[node].get("betweenness", 0))
        eigenvector = float(graph.nodes[node].get("eigenvector", 0))
        pagerank = float(graph.nodes[node].get("pagerank", 0))

        node_features.append([x_pos, y_pos, degree, clustering, betweenness, eigenvector, pagerank])

    return torch.tensor(node_features, dtype=torch.float), node_mapping

def networkx_to_pyg_data(graph):
    """Converts a NetworkX graph to a PyTorch Geometric Data object with all features."""
    node_features, node_mapping = get_node_features(graph)
    edge_index = torch.tensor([[node_mapping[u], node_mapping[v]] for u, v in graph.edges()], dtype=torch.long).t().contiguous()
    
    graph_attrs = get_graph_attributes(graph)
    y = torch.tensor([graph_attrs["edge_crossings"]], dtype=torch.float)  # Target variable

    return Data(x=node_features, edge_index=edge_index, y=y)

def load_graphs(folder_path):
    """Loads GraphML files and converts them into PyTorch Geometric dataset."""
    dataset = []
    for file in os.listdir(folder_path):
        if file.endswith(".graphml"):
            file_path = os.path.join(folder_path, file)
            try:
                graph = nx.read_graphml(file_path)
                pyg_data = networkx_to_pyg_data(graph)
                dataset.append(pyg_data)
            except Exception as e:
                print(f"Error loading {file}: {e}")

    random.shuffle(dataset)
    return dataset




In [11]:
class FeatureEncoder(torch.nn.Module):
    """
    Encoding node and edge features

    Args:
        dim_in (int): Input feature dimension
    """
    def __init__(self, dim_in):
        super(FeatureEncoder, self).__init__()
        self.dim_in = dim_in
        if cfg.dataset.node_encoder:
            # print(register.node_encoder_dict)
            # Encode integer node features via nn.Embeddings
            # print(len(register.node_encoder_dict))
            NodeEncoder = register.node_encoder_dict[
                cfg.dataset.node_encoder_name]
            self.node_encoder = NodeEncoder(cfg.gnn.dim_inner)
            if cfg.dataset.node_encoder_bn:
                self.node_encoder_bn = BatchNorm1dNode(
                    new_layer_config(cfg.gnn.dim_inner, -1, -1, has_act=False,
                                     has_bias=False, cfg=cfg))
            # Update dim_in to reflect the new dimension fo the node features
            self.dim_in = cfg.gnn.dim_inner
        if cfg.dataset.edge_encoder:
            # Hard-limit max edge dim for PNA.
            if 'PNA' in cfg.gt.layer_type:
                cfg.gnn.dim_edge = min(128, cfg.gnn.dim_inner)
            else:
                cfg.gnn.dim_edge = cfg.gnn.dim_inner
            # Encode integer edge features via nn.Embeddings
            EdgeEncoder = register.edge_encoder_dict[
                cfg.dataset.edge_encoder_name]
            self.edge_encoder = EdgeEncoder(cfg.gnn.dim_edge)
            if cfg.dataset.edge_encoder_bn:
                self.edge_encoder_bn = BatchNorm1dNode(
                    new_layer_config(cfg.gnn.dim_edge, -1, -1, has_act=False,
                                     has_bias=False, cfg=cfg))

    def forward(self, batch):
        for module in self.children():
            batch = module(batch)
        return batch


@register_network('GritTransformer')
class GritTransformer(torch.nn.Module):
    '''
        The proposed GritTransformer (Graph Inductive Bias Transformer)
    '''

    def __init__(self, dim_in, dim_out):
        super().__init__()
        self.encoder = FeatureEncoder(dim_in)
        dim_in = self.encoder.dim_in

        self.ablation = True
        self.ablation = False

        if cfg.posenc_RRWP.enable:
            self.rrwp_abs_encoder = register.node_encoder_dict["rrwp_linear"]\
                (cfg.posenc_RRWP.ksteps, cfg.gnn.dim_inner)
            rel_pe_dim = cfg.posenc_RRWP.ksteps
            self.rrwp_rel_encoder = register.edge_encoder_dict["rrwp_linear"] \
                (rel_pe_dim, cfg.gnn.dim_edge,
                 pad_to_full_graph=cfg.gt.attn.full_attn,
                 add_node_attr_as_self_loop=False,
                 fill_value=0.
                 )


        if cfg.gnn.layers_pre_mp > 0:
            self.pre_mp = GNNPreMP(
                dim_in, cfg.gnn.dim_inner, cfg.gnn.layers_pre_mp)
            dim_in = cfg.gnn.dim_inner

        assert cfg.gt.dim_hidden == cfg.gnn.dim_inner == dim_in, \
            "The inner and hidden dims must match."

        global_model_type = cfg.gt.get('layer_type', "GritTransformer")
        # global_model_type = "GritTransformer"

        TransformerLayer = register.layer_dict.get(global_model_type)

        layers = []
        for l in range(cfg.gt.layers):
            layers.append(TransformerLayer(
                in_dim=cfg.gt.dim_hidden,
                out_dim=cfg.gt.dim_hidden,
                num_heads=cfg.gt.n_heads,
                dropout=cfg.gt.dropout,
                sparse=cfg.gt.sparse,
                act=cfg.gnn.act,
                attn_dropout=cfg.gt.attn_dropout,
                layer_norm=cfg.gt.layer_norm,
                batch_norm=cfg.gt.batch_norm,
                residual=True,
                norm_e=cfg.gt.attn.norm_e,
                O_e=cfg.gt.attn.O_e,
                cfg=cfg.gt,
            ))
        # layers = []

        self.layers = torch.nn.Sequential(*layers)
        GNNHead = register.head_dict[cfg.gnn.head]
        self.post_mp = GNNHead(dim_in=cfg.gnn.dim_inner, dim_out=dim_out)

    def forward(self, batch):
        for module in self.children():
            batch = module(batch)

        return batch

KeyError: "Module with 'GritTransformer' already defined"

In [19]:

# Load dataset
folder_path = r"C:\Users\lucas\Desktop\thesis\code\ThesisLucasdelArco\Data\rome1_processed"
dataset = load_graphs(folder_path)

# Split dataset
train_data, test_data = train_test_split(dataset, test_size=0.3, random_state=42)
val_data, test_data = train_test_split(test_data, test_size=0.5, random_state=42)

print(f"Training Samples: {len(train_data)}, Validation Samples: {len(val_data)}, Testing Samples: {len(test_data)}")

# Create Data Loaders
train_loader = DataLoader(train_data, batch_size=4, shuffle=True)
val_loader = DataLoader(val_data, batch_size=4, shuffle=False)
test_loader = DataLoader(test_data, batch_size=4, shuffle=False)

# Initialize model
num_node_features = dataset[0].x.shape[1]
num_classes = 1  # Modify if needed

# Use GritTransformer instead of GCN
model = GritTransformer(dim_in=num_node_features, dim_out=num_classes)

# Set optimizer and loss function
learning_rate = 0.001
epochs = 50
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.HuberLoss()

train_losses, val_losses = [], []

# Training Loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for data in train_loader:
        optimizer.zero_grad()
        
        # Pass entire batch through GritTransformer
        pred = model(data)  # GritTransformer expects full batch input
        loss = loss_fn(pred.squeeze(), data.y)
        
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # Validation Loop
    model.eval()
    val_loss = 0
    
    with torch.no_grad():
        for data in val_loader:
            pred = model(data).squeeze()
            loss = loss_fn(pred, data.y)
            val_loss += loss.item()

    avg_val_loss = val_loss / len(val_loader)
    val_losses.append(avg_val_loss)

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

# Plot Training and Validation Loss
plt.figure(figsize=(8, 5))
plt.plot(range(epochs), train_losses, label='Train Loss')
plt.plot(range(epochs), val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()


Training Samples: 328, Validation Samples: 70, Testing Samples: 71


AttributeError: posenc_RRWP