In [73]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric as pyg
from torch_geometric.utils.num_nodes import maybe_num_nodes
from torch_scatter import scatter, scatter_max, scatter_add
from torch_geometric.utils import remove_self_loops, add_remaining_self_loops, add_self_loops
#from grit.utils import negate_edge_index
from torch_geometric.graphgym.register import *
import opt_einsum as oe
from yacs.config import CfgNode as CN
import warnings
#from .GRITSparseConv import GRITSparseConv
from torch_geometric.nn.conv import GINEConv, GINConv
import networkx as nx
from torch_geometric.utils import from_networkx
from torch_geometric.utils import from_networkx
from torch_geometric.loader import DataLoader  # Ensure you use the correct DataLoader

In [45]:
import networkx as nx
import torch
import numpy as np
from torch import Tensor
import os
import random
from torch_geometric.data import Data
from itertools import combinations
import matplotlib.pyplot as plt
from torch_geometric.nn import GCNConv,  global_mean_pool
import torch.nn.functional as F
from torch.optim import Adam
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import numpy as np

In [69]:
from yacs.config import CfgNode as CN

def get_default_cfg():
    """Creates a default configuration object for the GRIT model."""
    cfg = CN()
    cfg.bn_momentum = 0.1 
    cfg.bn_no_runner = False
    cfg.rezero = False
    cfg.update_e = True

    cfg.attn = CN()
    cfg.attn.use_bias = False
    cfg.attn.clamp = 5.0
    cfg.attn.act = "relu"
    cfg.attn.edge_enhance = True
    cfg.attn.sqrt_relu = False
    cfg.attn.signed_sqrt = False
    cfg.attn.scaled_attn = False
    cfg.attn.no_qk = False
    cfg.attn.use = True
    cfg.attn.deg_scaler = True

    return cfg


In [59]:
def networkx_to_pyg_data(graph):
    """Converts a NetworkX graph to a PyTorch Geometric Data object with all features."""
    node_features, node_mapping = get_node_features(graph)
    edge_index = torch.tensor([[node_mapping[u], node_mapping[v]] for u, v in graph.edges()], dtype=torch.long).t().contiguous()
    
    graph_attrs = get_graph_attributes(graph)
    y = torch.tensor([graph_attrs["edge_crossings"]], dtype=torch.float)  # Target variable

    return Data(x=node_features, edge_index=edge_index, y=y)

def get_graph_attributes(graph):
    """Retrieves global graph-level attributes from GraphML metadata."""
    attributes = {
        "edge_crossings": graph.graph.get("edge_crossings", 0),
        "diameter": graph.graph.get("diameter", 0),
        "avg_shortest_path": graph.graph.get("avg_shortest_path", 0),
        "num_components": graph.graph.get("num_components", 0),
        "density": graph.graph.get("density", 0),
        "assortativity": graph.graph.get("assortativity", 0),
    }
    return attributes
    
def get_node_features(graph):
    """Retrieves node attributes from GraphML."""
    node_features = []
    node_mapping = {node: i for i, node in enumerate(graph.nodes())}

    for node in graph.nodes():
        x_pos = float(graph.nodes[node].get("x_pos", 0))
        y_pos = float(graph.nodes[node].get("y_pos", 0))
        degree = graph.degree[node]
        clustering = float(graph.nodes[node].get("clustering", 0))
        betweenness = float(graph.nodes[node].get("betweenness", 0))
        eigenvector = float(graph.nodes[node].get("eigenvector", 0))
        pagerank = float(graph.nodes[node].get("pagerank", 0))

        node_features.append([x_pos, y_pos, degree, clustering, betweenness, eigenvector, pagerank])

    return torch.tensor(node_features, dtype=torch.float), node_mapping
    
def load_graphs(folder_path):
    """Loads GraphML files and converts them into PyTorch Geometric dataset."""
    dataset = []
    for file in os.listdir(folder_path):
        if file.endswith(".graphml"):
            file_path = os.path.join(folder_path, file)
            try:
                graph = nx.read_graphml(file_path)
                pyg_data = networkx_to_pyg_data(graph)
                dataset.append(pyg_data)
            except Exception as e:
                print(f"Error loading {file}: {e}")

    random.shuffle(dataset)
    return dataset
    
# Load dataset
folder_path = r"C:\Users\lucas\Desktop\thesis\code\ThesisLucasdelArco\Data\rome1_processed"
dataset = load_graphs(folder_path)

In [81]:
import os
import torch
import matplotlib.pyplot as plt
from torch.optim import Adam
from torch.utils.data import DataLoader
from torch_geometric.data import Data
from sklearn.model_selection import train_test_split


# Load dataset
folder_path = r"C:\Users\lucas\Desktop\thesis\code\ThesisLucasdelArco\Data\rome1_processed"
dataset = load_graphs(folder_path)

# Split dataset
train_data, test_data = train_test_split(dataset, test_size=0.3, random_state=42)
val_data, test_data = train_test_split(test_data, test_size=0.5, random_state=42)

print(f"Training Samples: {len(train_data)}, Validation Samples: {len(val_data)}, Testing Samples: {len(test_data)}")



train_loader = DataLoader(train_data, batch_size=4, shuffle=True)
val_loader = DataLoader(val_data, batch_size=4, shuffle=False)
test_loader = DataLoader(test_data, batch_size=4, shuffle=False)


# Initialize the GritTransformer model
num_node_features = dataset[0].x.shape[1]

cfg = get_default_cfg()  # Get the properly formatted config

model = GritTransformerLayer(
    in_dim=num_node_features, out_dim=32, num_heads=4,
    dropout=0.1, attn_dropout=0.1, residual=True, sparse=False, act='relu',
    layer_norm=True, batch_norm=True, cfg=cfg  # Pass the proper config
)

# Set optimizer and loss function
learning_rate = 0.001
epochs = 50
optimizer = Adam(model.parameters(), lr=learning_rate)
loss_fn = torch.nn.HuberLoss()

train_losses, val_losses = [], []

# Training Loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for data in train_loader:
        optimizer.zero_grad()
        data = data.to(device)  # Move data to GPU/CPU if necessary
        pred = model(data.x, data.edge_index, data.batch)  # ✅ Proper way to pass data
        loss = loss_fn(pred.squeeze(), data.y)  # Ensure shapes match

    avg_train_loss = total_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # Validation Loop
    model.eval()
    val_loss = 0
    
    with torch.no_grad():
        for data in val_loader:
            pred = model(data.x, data.edge_index, data.batch).squeeze()
            loss = loss_fn(pred, data.y)
            val_loss += loss.item()

    avg_val_loss = val_loss / len(val_loader)
    val_losses.append(avg_val_loss)

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

# Plot Training and Validation Loss
plt.figure(figsize=(8, 5))
plt.plot(range(epochs), train_losses, label='Train Loss')
plt.plot(range(epochs), val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()


Training Samples: 328, Validation Samples: 70, Testing Samples: 71


TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'torch_geometric.data.data.Data'>