In [257]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
import networkx as nx
from itertools import combinations
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import roc_auc_score, average_precision_score


In [259]:
def normalize_adjacency_dense_gpu(A):

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    A = A.to(device)  # Move to GPU if available

    # Ensure self-loops
    A = A + torch.eye(A.size(0), device=A.device)

    # Degree vector
    row_sum = torch.sum(A, dim=1)

    # Avoid division by zero by adding a small epsilon
    D_inv_sqrt = torch.diag(1.0 / torch.sqrt(1e-10+ row_sum ))
    # Normalize adjacency
    normalized_A = D_inv_sqrt @ A @ D_inv_sqrt

    # Enforce symmetry (optional but helps to handle numerical instability)
    normalized_A = (normalized_A + normalized_A.T) / 2.0

    return normalized_A


Define the GCN Layers

In [260]:
class GCNLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(GCNLayer, self).__init__()
        self.weight = nn.Parameter(torch.randn(input_dim, output_dim))
        #self.weight = nn.Parameter(torch.randn(output_dim, input_dim))  # output_dim should be first


    def forward(self, X, A_tilde):
        return A_tilde @ X @ self.weight


Inference Model

In [261]:
class InferenceModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(InferenceModel, self).__init__()
        self.gcn1 = GCNLayer(input_dim, hidden_dim)
        self.gcn2_mu = GCNLayer(hidden_dim, latent_dim)
        self.gcn2_logsigma = GCNLayer(hidden_dim, latent_dim)

    def forward(self, X, A_tilde):
        A_tilde = A_tilde / (A_tilde.sum(dim=1, keepdim=True) + 1e-8)

        H = F.relu(self.gcn1(X, A_tilde))  # Shared first layer
        if torch.isnan(H).any():
            print("NaN detected in H!")

        mu = self.gcn2_mu(H, A_tilde)  # Mean matrix
        log_sigma = self.gcn2_logsigma(H, A_tilde)  # Log-variance matrix
        return mu, log_sigma


Variational Grpah Auto-Encoder

In [262]:
class VGAE(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(VGAE, self).__init__()
        self.encoder = InferenceModel(input_dim, hidden_dim, latent_dim)

    def forward(self, X, A_tilde):
        mu, log_sigma = self.encoder(X, A_tilde)
        # Reparameterization trick
        std = torch.exp(0.5 * log_sigma)
        std = torch.clamp(std, min=1e-5, max=10)
        eps = torch.randn_like(std)
        if torch.isnan(std).any():
            print("NaN detected in std!")
        Z = mu + eps * std
        Z = F.normalize(Z, p=2, dim=1)  # Normalize rows of Z to unit length
        #Z = Z / torch.sqrt(torch.tensor(Z.shape[1], dtype=torch.float32, device=Z.device))
        if torch.isnan(mu).any():
            print("NaN detected in mu!")

        if torch.isnan(log_sigma).any():
            print("NaN detected in log_sigma!")

        A_reconstructed = torch.sigmoid(torch.matmul(Z, Z.T))
        return Z, A_reconstructed, mu, log_sigma


In [263]:

class VGAE_MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super().__init__()
        self.encoder = InferenceModel(input_dim, hidden_dim, latent_dim)

        # MLP Decoder (2-layer perceptron)
        self.decoder = nn.Sequential(
            nn.Linear(2 * latent_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Linear(hidden_dim // 2, hidden_dim // 4),
            nn.ReLU(),
            nn.Linear(hidden_dim // 4, 1)
            )


    def forward(self, X, A_tilde, edge_index=None):
        mu, log_sigma = self.encoder(X, A_tilde)

        # Reparameterization trick
        std = torch.exp(0.5 * log_sigma)
        std = torch.clamp(std, max=1e5)
        eps = torch.randn_like(std)
        Z = mu + eps * std
        #Z = F.normalize(Z, p=2, dim=1)  # Normalize rows of Z to unit length



        batch_size = 10000  # Adjust based on memory
        num_edges = edge_index.shape[1]
        A_reconstructed_list = []

        for i in range(0, num_edges, batch_size):
            batch_edges = edge_index[:, i : i + batch_size]
            src, dst = batch_edges
            Z_concat = torch.cat([Z[src], Z[dst]], dim=1)
            A_reconstructed_list.append(torch.sigmoid(self.decoder(Z_concat)).squeeze())

        A_reconstructed = torch.cat(A_reconstructed_list)

        return Z, A_reconstructed, mu, log_sigma


In [264]:
class WeightedInnerProductDecoder(nn.Module):
    def __init__(self, latent_dim):
        super().__init__()
        self.weight = nn.Parameter(torch.ones(latent_dim))  # Learnable weight vector

    def forward(self, Z):
        Z_weighted = Z * self.weight  # Apply element-wise weight
        A_reconstructed = torch.sigmoid(torch.matmul(Z, Z_weighted.T))  # Full adjacency matrix
        return A_reconstructed


class VGAE_W(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super().__init__()
        self.encoder = InferenceModel(input_dim, hidden_dim, latent_dim)
        self.decoder = WeightedInnerProductDecoder(latent_dim)

    def forward(self, X, A_tilde):
        mu, log_sigma = self.encoder(X, A_tilde)

        # Reparameterization trick
        std = torch.exp(0.5 * log_sigma)
        std = torch.clamp(std, max=1e5)
        eps = torch.randn_like(std)
        Z = mu + eps * std
        Z = F.normalize(Z, p=2, dim=1)  # Normalize rows of Z to unit length

        A_reconstructed = self.decoder(Z)  # No need for edge_index now

        return Z, A_reconstructed, mu, log_sigma


Loss Function

In [265]:
def loss_function(A, A_reconstructed, mu, log_sigma):
    # Reconstruction loss (Binary Cross-Entropy)
    epsilon = 1e-7
    A_reconstructed = torch.clamp(A_reconstructed, min=epsilon, max=1 - epsilon)
    recon_loss = F.binary_cross_entropy(A_reconstructed, A, reduction='sum')

    # KL Divergence
    kl_loss = -0.5 * torch.sum(1 + log_sigma.clamp(min=-2, max=2) - mu.clamp(min=-2, max=2).pow(2) - log_sigma.clamp(min=-2, max=2).exp())
    #print(log_sigma)
    return recon_loss + kl_loss


In [266]:
def loss_function_mlp(A, A_reconstructed, mu, log_sigma, edge_index):
    src, dst = edge_index  # Edge indices

    # If A_reconstructed is 1D, select indices correctly
    A_pred = A_reconstructed[torch.arange(edge_index.shape[1])]

    # Get true adjacency values
    A_true = A[src, dst]

    # BCE loss
    recon_loss = F.binary_cross_entropy(A_pred, A_true, reduction='sum')

    # KL Divergence
    kl_loss = -0.5 * torch.sum(1 + log_sigma - mu.pow(2) - log_sigma.clamp(max=10).exp())

    return recon_loss + kl_loss

In [None]:
def to_dense_adj_custom(edge_index, batch=None, num_nodes=None):

    if num_nodes is None:
        num_nodes = edge_index.max().item() + 1  # Infer number of nodes if not provided

    if batch is None:
        adj = torch.zeros((1, num_nodes, num_nodes), dtype=torch.float32, device=edge_index.device)
        adj[0, edge_index[0], edge_index[1]] = 1
        return adj
    else:
        num_graphs = batch.max().item() + 1
        max_nodes = torch.bincount(batch).max().item()  # Max nodes per graph
        adj = torch.zeros((num_graphs, max_nodes, max_nodes), dtype=torch.float32, device=edge_index.device)

        for i in range(num_graphs):
            mask = batch[edge_index[0]] == i
            nodes = batch == i
            node_idx = torch.arange(nodes.sum(), device=edge_index.device)
            node_map = torch.full((batch.size(0),), -1, device=edge_index.device)
            node_map[nodes] = node_idx
            adj[i, node_map[edge_index[0, mask]], node_map[edge_index[1, mask]]] = 1

        return adj

# Training

## VGAE

### Cora

In [272]:
from torch_geometric.datasets import CoraFull

cora_dataset = CoraFull(root='GraphDatasets/Cora')
print(f'Dataset: {cora_dataset}:')
print('======================')
print(f'Number of graphs: {len(cora_dataset)}')
print(f'Number of features: {cora_dataset.num_features}')
print(f'Number of classes: {cora_dataset.num_classes}')

Dataset: CoraFull():
Number of graphs: 1
Number of features: 8710
Number of classes: 70


In [273]:
# Graph data
data = cora_dataset[0]
X = data.x  # features matrix (N x D)
edge_index = data.edge_index  # Edge list (2 x E)

# Create the adjacency matrix (A)
num_nodes = X.size(0)
A = torch.zeros((num_nodes, num_nodes))

# Convert the edge_index to an adjacency matrix
row, col = edge_index
A[row, col] = 1
A[col, row] = 1  # Since the graph is undirected

# Optionally, add self-loops (diagonal elements set to 1)
A.fill_diagonal_(1)

print("Adjacency matrix (A):", A)
print("Node feature matrix (X):", X)


Adjacency matrix (A): tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]])
Node feature matrix (X): tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [277]:
X.shape

torch.Size([19793, 8710])

In [276]:
edge_index.shape

torch.Size([2, 126842])

In [62]:
from torch_geometric.datasets import Planetoid

cite_dataset = Planetoid(root='GraphDatasets/CiteSeer', name='CiteSeer')
print(f'Dataset: {cite_dataset}:')
print('======================')
print(f'Number of graphs: {len(cite_dataset)}')
print(f'Number of features: {cite_dataset.num_features}')
print(f'Number of classes: {cite_dataset.num_classes}')

Dataset: CiteSeer():
Number of graphs: 1
Number of features: 3703
Number of classes: 6


In [190]:
# Graph data
data = cite_dataset[0]
X = data.x  # features matrix (N x D)
edge_index = data.edge_index  # Edge list (2 x E)

# Create the adjacency matrix (A)
num_nodes = X.size(0)
A = torch.zeros((num_nodes, num_nodes))

# Convert the edge_index to an adjacency matrix
row, col = edge_index
A[row, col] = 1
A[col, row] = 1  # Since the graph is undirected

# Optionally, add self-loops (diagonal elements set to 1)
A.fill_diagonal_(1)

print("Adjacency matrix (A):", A)
print("Node feature matrix (X):", X)


Adjacency matrix (A): tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]])
Node feature matrix (X): tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [267]:
from torch_geometric.datasets import Planetoid

pub_dataset = Planetoid(root='GraphDatasets/PubMed', name='PubMed')
print(f'Dataset: {pub_dataset}:')
print('======================')
print(f'Number of graphs: {len(pub_dataset)}')
print(f'Number of features: {pub_dataset.num_features}')
print(f'Number of classes: {pub_dataset.num_classes}')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.test.index
Processing...


Dataset: PubMed():
Number of graphs: 1
Number of features: 500
Number of classes: 3


Done!


In [268]:
# Graph data
data = pub_dataset[0]
X = data.x  # features matrix (N x D)
edge_index = data.edge_index  # Edge list (2 x E)

# Create the adjacency matrix (A)
num_nodes = X.size(0)
A = torch.zeros((num_nodes, num_nodes))

# Convert the edge_index to an adjacency matrix
row, col = edge_index
A[row, col] = 1
A[col, row] = 1  # Since the graph is undirected

# Optionally, add self-loops (diagonal elements set to 1)
A.fill_diagonal_(1)

print("Adjacency matrix (A):", A)
print("Node feature matrix (X):", X)


Adjacency matrix (A): tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]])
Node feature matrix (X): tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.1046, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0194, 0.0080,  ..., 0.0000, 0.0000, 0.0000],
        [0.1078, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0266, 0.0000,  ..., 0.0000, 0.0000, 0.0000]])


In [269]:


def remove_edges_and_sample_optimized(edge_index, num_nodes, test_size=0.1, val_size=0.05):

    # Convert edge_index to a set of edges for faster lookup
    edges = set(map(tuple, edge_index.t().tolist()))

    # Generate all possible node pairs (i, j) for non-edges
    all_pairs = set(combinations(range(num_nodes), 2))
    non_edges = list(all_pairs - edges)

    # Split edges into validation and test sets
    edges = list(edges)
    train_edges, temp_edges = train_test_split(edges, test_size=test_size + val_size, random_state=42)
    val_edges, test_edges = train_test_split(temp_edges, test_size=test_size / (test_size + val_size), random_state=42)

    # Sample non-edges for validation and test sets
    num_val_non_edges = len(val_edges)
    num_test_non_edges = len(test_edges)

    val_non_edges = random.sample(non_edges, num_val_non_edges)
    test_non_edges = random.sample(non_edges, num_test_non_edges)
    # Recreate the training graph without validation and test edges
    train_graph = nx.Graph()
    train_graph.add_edges_from(train_edges)
    train_graph.add_nodes_from(range(num_nodes))  # Add isolated nodes
    train_edge_index = torch.tensor(list(train_graph.edges)).t().contiguous()

    # Recreate training edge_index
    train_edge_index = torch.tensor(train_edges).t().contiguous()

    return train_edge_index, val_edges, test_edges, val_non_edges, test_non_edges,train_graph

In [270]:
# Extract edge_index and number of nodes
edge_index = data.edge_index  # (2, E)
num_nodes = data.num_nodes

# Split edges and sample non-edges
train_edge_index, val_edges, test_edges, val_non_edges, test_non_edges, train_graph = remove_edges_and_sample_optimized(edge_index, num_nodes)

print("Train edge index shape:", train_edge_index.shape)
print("Number of validation edges:", len(val_edges))
print("Number of test edges:", len(test_edges))
print("Number of validation non-edges:", len(val_non_edges))
print("Number of test non-edges:", len(test_non_edges))


Train edge index shape: torch.Size([2, 75350])
Number of validation edges: 4432
Number of test edges: 8866
Number of validation non-edges: 4432
Number of test non-edges: 8866


#### Normal

In [237]:
A.shape

torch.Size([3327, 3327])

In [238]:
X.shape

torch.Size([3327, 3327])

In [252]:
# Extract train graph adjacency matrix
num_n=train_edge_index.max().item() + 1
train_adj_matrix = to_dense_adj_custom(train_edge_index, max_num_nodes=num_nodes)[0]
train_adj_matrix = train_adj_matrix.to(torch.float32)  # Ensure float type for computations

train_adj_matrix = train_adj_matrix.clamp(max=1)

# Normalize adjacency for training graph
A_tilde_train = normalize_adjacency_dense_gpu(train_adj_matrix)

# Initialize model
input_dim = X.shape[1]
hidden_dim = 32
latent_dim = 16
model = VGAE(input_dim, hidden_dim, latent_dim)

model = model.to('cuda')  # Move model to GPU if available
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

#### Weighted

In [126]:
# Extract train graph adjacency matrix
num_n=train_edge_index.max().item() + 1
train_adj_matrix = to_dense_adj_custom(train_edge_index, max_num_nodes=num_n)[0]  # Convert to dense adjacency matrix
train_adj_matrix = train_adj_matrix.to(torch.float32)  # Ensure float type for computations

train_adj_matrix = train_adj_matrix.clamp(max=1)

A_tilde_train = normalize_adjacency_dense_gpu(train_adj_matrix)

# Initialize model
input_dim = X.shape[1]
hidden_dim = 128
latent_dim = 64
model = VGAE_W(input_dim, hidden_dim, latent_dim)

model = model.to('cuda')  # Move model to GPU if available
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [253]:
num_epochs = 200

X = torch.eye(num_nodes)
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    # Forward pass
    Z, A_reconstructed, mu, log_sigma = model(X.to('cuda'), A_tilde_train.to('cuda'))

    # Debugging: Check for NaNs in the output of the model
    if torch.isnan(A_reconstructed).sum() > 0 :
        print("NaN  detected in A_reconstructed!")
        break
    if  torch.isinf(A_reconstructed).sum() > 0:
        print(" Inf detected in A_reconstructed!")
        break


    # Compute loss
    loss = loss_function(train_adj_matrix.to('cuda'), A_reconstructed.to('cuda'), mu.to('cuda'), log_sigma.to('cuda'))

    # Check if loss becomes NaN or Inf
    if torch.isnan(loss).sum() > 0 or torch.isinf(loss).sum() > 0:
        print("NaN or Inf detected in loss!")
        break

    # Apply gradient clipping before backward pass
    #torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

    # Backward pass
    loss.backward()

    # Update parameters
    optimizer.step()

    # Print loss at each epoch
    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")


Epoch 1, Loss: 8522986.0
Epoch 2, Loss: 8439624.0
Epoch 3, Loss: 8373316.5
Epoch 4, Loss: 8294545.5
Epoch 5, Loss: 8218629.5
Epoch 6, Loss: 8172857.5
Epoch 7, Loss: 8122834.0
Epoch 8, Loss: 8086443.0
Epoch 9, Loss: 8047990.5
Epoch 10, Loss: 8026068.5
Epoch 11, Loss: 7984561.5
Epoch 12, Loss: 7984052.5
Epoch 13, Loss: 7955595.0
Epoch 14, Loss: 7930405.5
Epoch 15, Loss: 7928451.5
Epoch 16, Loss: 7925911.5
Epoch 17, Loss: 7904379.5
Epoch 18, Loss: 7913075.5
Epoch 19, Loss: 7890591.5
Epoch 20, Loss: 7891773.0
Epoch 21, Loss: 7887209.0
Epoch 22, Loss: 7887262.5
Epoch 23, Loss: 7879203.0
Epoch 24, Loss: 7872646.0
Epoch 25, Loss: 7875378.5
Epoch 26, Loss: 7877303.5
Epoch 27, Loss: 7868734.5
Epoch 28, Loss: 7868485.5
Epoch 29, Loss: 7867430.5
Epoch 30, Loss: 7867819.0
Epoch 31, Loss: 7865159.5
Epoch 32, Loss: 7863379.5
Epoch 33, Loss: 7865214.5
Epoch 34, Loss: 7864693.5
Epoch 35, Loss: 7862830.0
Epoch 36, Loss: 7861997.0
Epoch 37, Loss: 7861716.5
Epoch 38, Loss: 7860861.0
Epoch 39, Loss: 78590

#### MLP

In [57]:
import torch
from torch_geometric.utils import to_dense_adj

# Assume edge_index, num_nodes, and remove_edges_and_sample_optimized are defined
# Extract train graph adjacency matrix
num_n=train_edge_index.max().item() + 1
train_adj_matrix = to_dense_adj(train_edge_index, max_num_nodes=num_n)[0]  # Convert to dense adjacency matrix
train_adj_matrix = train_adj_matrix.to(torch.float32)  # Ensure float type for computations
train_adj_matrix = to_dense_adj(train_edge_index, max_num_nodes=num_nodes)[0]
# Convert to SciPy sparse matrix
# Create the adjacency matrix from the edge list (train_edge_index)
#train_adj_matrix = to_dense_adj(train_edge_index, max_num_nodes=num_nodes)[0]

# Enforce symmetry (add the transpose to ensure both directions are captured)
train_adj_matrix = train_adj_matrix + train_adj_matrix.T

# Ensure that the diagonal entries are 1 (self-loops)
train_adj_matrix.fill_diagonal_(1.0)
train_adj_matrix = train_adj_matrix.clamp(max=1)
# Node features
if data.x is not None:
    X = data.x  # Use provided node features
else:
    X = torch.eye(num_nodes)  # Use identity matrix if featureless

# Normalize adjacency for training graph
A_tilde_train = normalize_adjacency_dense_gpu(train_adj_matrix)

# Initialize model
input_dim = X.shape[1]
hidden_dim = 128
latent_dim = 64
model = VGAE_MLP(input_dim, hidden_dim, latent_dim)

model = model.to('cuda')  # Move model to GPU if available
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

In [58]:

num_epochs = 200
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    # Forward pass
    Z, A_reconstructed, mu, log_sigma = model(X.to('cuda'), A_tilde_train.to('cuda'), edge_index=edge_index)

    # Clamp log_sigma to prevent extreme values
    log_sigma = torch.clamp(log_sigma, min=-18, max=18)

    # Compute loss
    loss = loss_function_mlp(train_adj_matrix.to('cuda'), A_reconstructed, mu, log_sigma, train_edge_index.to('cuda'))
    loss.backward()

    # Update parameters
    optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")



torch.Size([126842])
Epoch 1, Loss: 6154940928.0
torch.Size([126842])


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


#### Evaluation

In [254]:

A_reconstructed = A_reconstructed.detach().cpu()

# Ensure test edges and non-edges are tensors
test_edges = torch.tensor(test_edges, dtype=torch.long)
test_non_edges = torch.tensor(test_non_edges, dtype=torch.long)

# Handle different decoder outputs
if A_reconstructed.dim() == 2:
    # If A_reconstructed is a full adjacency matrix
    test_edge_scores = A_reconstructed[test_edges[:, 0], test_edges[:, 1]].numpy()
    test_non_edge_scores = A_reconstructed[test_non_edges[:, 0], test_non_edges[:, 1]].numpy()
else:
    # If A_reconstructed is a 1D tensor (edge probabilities only)
    test_edge_scores = A_reconstructed[:len(test_edges)].numpy()
    test_non_edge_scores = A_reconstructed[len(test_edges):].numpy()

# Combine scores and create labels
scores = np.concatenate([test_edge_scores, test_non_edge_scores])
labels = np.concatenate([np.ones(len(test_edge_scores)), np.zeros(len(test_non_edge_scores))])


  test_edges = torch.tensor(test_edges, dtype=torch.long)
  test_non_edges = torch.tensor(test_non_edges, dtype=torch.long)


In [255]:

roc_auc = roc_auc_score(labels, scores)
print(f"ROC-AUC Score: {roc_auc}")


ROC-AUC Score: 0.6329530160099577


In [256]:
ap_score = average_precision_score(labels, scores)
print(f"Average Precision (AP): {ap_score:.4f}")

Average Precision (AP): 0.6490


### Patent dataset

In [11]:
from scipy.sparse import save_npz, load_npz
import numpy as np
import torch
from scipy.sparse import coo_matrix
from sklearn.model_selection import train_test_split

A = load_npz("combined_adj_small.npz")
X = load_npz("combined_features_matrix.npz")

X = torch.tensor(X.toarray(), dtype=torch.float32)

In [12]:
# Check if the matrix is symmetric
if (A != A.T).nnz == 0:  # If the number of non-zero elements in (A - A.T) is zero
    print("Matrix A is symmetric.")
else:
    print("Matrix A is not symmetric.")

Matrix A is symmetric.


In [13]:
X

tensor([[-0.0367, -0.0003, -0.0465,  ...,  0.0678, -0.0619, -0.0571],
        [-0.0490, -0.0121, -0.0688,  ...,  0.1723,  0.0379, -0.0342],
        [-0.0546, -0.0845, -0.1177,  ...,  0.0683,  0.0276, -0.0078],
        ...,
        [-0.0380,  0.0360,  0.0635,  ...,  0.0293,  0.0540,  0.0328],
        [-0.1023,  0.0321, -0.0490,  ..., -0.0618,  0.0613,  0.0230],
        [-0.1295,  0.0772, -0.0172,  ..., -0.0530,  0.0565,  0.0502]])

In [14]:


def split_edges_and_sample(A, num_samples=None, test_size=0.1, val_size=0.05, random_state=42):
    """
    Efficiently splits edges and samples non-edges.

    Parameters:
    - A: scipy.sparse.coo_matrix (adjacency matrix)
    - num_samples: Number of non-edges to sample (adjust based on graph size)
    - test_size: Proportion of edges/non-edges for testing
    - val_size: Proportion of edges/non-edges for validation
    - random_state: Random seed for reproducibility

    Returns:
    - train_edge_index: Edge list for training
    - val_edges, test_edges: Validation and test edge lists
    - val_non_edges, test_non_edges: Validation and test non-edges
    - train_graph: Sparse matrix representing the training graph
    """
    A_coo = coo_matrix(A)
    edges = np.vstack((A_coo.row, A_coo.col)).T  # Extract edges
    num_nodes = A.shape[0]

    # Convert edges to a set for fast lookup
    existing_edges = set(map(tuple, edges))
    num_samples=len(edges)*0.15
    # Randomly sample non-edges
    np.random.seed(random_state)
    non_edges = set()
    while len(non_edges) < num_samples:
        i = np.random.randint(0, num_nodes)
        j = np.random.randint(0, num_nodes)
        if i != j and (i, j) not in existing_edges and (j, i) not in existing_edges:
            non_edges.add((i, j))
    #print(existing_edges)
    non_edges = np.array(list(non_edges))

    # Split edges into train, validation, and test sets
    train_edges, temp_edges = train_test_split(edges, test_size=(test_size + val_size), random_state=random_state)

    val_edges, test_edges = train_test_split(temp_edges, test_size=(test_size / (test_size + val_size)), random_state=random_state)

    # Split sampled non-edges into validation and test sets
    val_non_edges, test_non_edges = train_test_split(non_edges, test_size=(test_size / (test_size + val_size)), random_state=random_state)

    # Create a training graph (without val/test edges)
    train_graph = coo_matrix(
        (np.ones(len(train_edges)), (train_edges[:, 0], train_edges[:, 1])),
        shape=A.shape
    )
    train_graph = train_graph + train_graph.T  # Ensure symmetry

    # Convert to PyTorch tensors
    train_edge_index = torch.tensor(train_edges.T, dtype=torch.long)
    val_edges = torch.tensor(val_edges, dtype=torch.long)
    test_edges = torch.tensor(test_edges, dtype=torch.long)
    val_non_edges = torch.tensor(val_non_edges, dtype=torch.long)
    test_non_edges = torch.tensor(test_non_edges, dtype=torch.long)

    return train_edge_index, val_edges, test_edges, val_non_edges, test_non_edges, train_graph


In [15]:
#A = A + A.T  # Ensure symmetry for an undirected graph
#A[A > 1] = 1  # Remove duplicate edges

# Split edges and sample non-edges
train_edge_index, val_edges, test_edges, val_non_edges, test_non_edges, train_graph= split_edges_and_sample(A)

print("Train edge index shape:", train_edge_index.shape)
print("Number of validation edges:", len(val_edges))
print("Number of test edges:", len(test_edges))
print("Number of validation non-edges:", len(val_non_edges))
print("Number of test non-edges:", len(test_non_edges))


Train edge index shape: torch.Size([2, 44438])
Number of validation edges: 2614
Number of test edges: 5229
Number of validation non-edges: 2614
Number of test non-edges: 5229


In [16]:
def normalize_adjacency_sparse_gpu(A):
    """
    Normalize adjacency matrix on GPU using sparse matrices.
    A: Sparse adjacency matrix (torch.sparse.FloatTensor).
    """
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    A = A.to(device)  # Move to GPU if available

    # Ensure self-loops (can be done with sparse matrices too)
    #eye = torch.eye(A.size(0), device=A.device).to_sparse()
    #A = A + eye

    # Degree vector (sparse sum)
    row_sum = torch.sum(A, dim=1) # Sparse sum and convert to dense

    # Avoid division by zero by adding a small epsilon
    D_inv_sqrt = torch.diag(1.0 / torch.sqrt(row_sum + 1e-10))

    # Normalize adjacency
    normalized_A = D_inv_sqrt @ A @ D_inv_sqrt

    # Enforce symmetry
    normalized_A = (normalized_A + normalized_A.T) / 2.0

    return normalized_A


#### MLP

In [31]:
import torch
#import torch_sparse
from torch_geometric.utils import to_scipy_sparse_matrix
from torch_geometric.utils import to_dense_adj

# Assume edge_index, num_nodes, and remove_edges_and_sample_optimized are defined
# Extract train graph adjacency matrix

# Extract train graph adjacency matrix
num_nodes =  max(train_edge_index[0].max(), train_edge_index[1].max()) + 1


train_adj_matrix = to_dense_adj(train_edge_index, max_num_nodes=num_nodes)[0]
# Convert to SciPy sparse matrix


#train_adj_matrix = to_scipy_sparse_matrix(train_edge_index, num_nodes=num_nodes)
# Ensure adjacency matrix is sparse on GPU
train_adj_matrix = train_adj_matrix.to('cuda')  # Move sparse matrix to GPU

# Convert directly to a PyTorch sparse tensor
#train_adj_matrix = torch.tensor(train_adj_matrix.toarray(), dtype=torch.float32)
# Ensure adjacency matrix is sparse on GPU
#train_adj_matrix = train_adj_matrix.to_sparse().to('cuda')  # Move sparse matrix to GPU
#train_adj_matrix = torch.sparse_coo_tensor(indices, values, size=(num_nodes, num_nodes), dtype=torch.float32).to(device)

# Normalize adjacency for training graph
A_tilde_train = normalize_adjacency_dense_gpu(train_adj_matrix.to(torch.float32))  # Normalize

#train_adj_matrix = train_adj_matrix.to('cuda')
#A_tilde_train = normalize_adjacency_dense_gpu(train_adj_matrix)


# Initialize model
input_dim = X.shape[1]
hidden_dim = 32
latent_dim = 16
model = VGAE_MLP(input_dim, hidden_dim, latent_dim)

model = model.to('cuda')  # Move model to GPU if available
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [32]:

num_epochs = 200
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    # Forward pass
    Z, A_reconstructed, mu, log_sigma = model(X.to('cuda'), A_tilde_train.to('cuda'), edge_index=train_edge_index)

    # Clamp log_sigma to prevent extreme values
    log_sigma = torch.clamp(log_sigma, min=-14, max=14)

    # Compute loss
    loss = loss_function_mlp(train_adj_matrix.to('cuda'), A_reconstructed, mu, log_sigma, train_edge_index.to('cuda'))
    loss.backward()

    # Update parameters
    optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")



torch.Size([44438])
Epoch 1, Loss: 11628205.0
torch.Size([44438])
Epoch 2, Loss: 11194987.0
torch.Size([44438])
Epoch 3, Loss: 10772963.0
torch.Size([44438])
Epoch 4, Loss: 10367990.0
torch.Size([44438])
Epoch 5, Loss: 9977155.0
torch.Size([44438])
Epoch 6, Loss: 9607456.0
torch.Size([44438])
Epoch 7, Loss: 9240159.0
torch.Size([44438])
Epoch 8, Loss: 8882024.0
torch.Size([44438])
Epoch 9, Loss: 8530211.0
torch.Size([44438])
Epoch 10, Loss: 8196824.0
torch.Size([44438])
Epoch 11, Loss: 7889602.0
torch.Size([44438])
Epoch 12, Loss: 7605178.5
torch.Size([44438])
Epoch 13, Loss: 7334371.5
torch.Size([44438])
Epoch 14, Loss: 7078561.5
torch.Size([44438])
Epoch 15, Loss: 6840553.5
torch.Size([44438])
Epoch 16, Loss: 6620122.5
torch.Size([44438])
Epoch 17, Loss: 6413637.5
torch.Size([44438])
Epoch 18, Loss: 6214259.0
torch.Size([44438])
Epoch 19, Loss: 6023259.0
torch.Size([44438])
Epoch 20, Loss: 5838478.5
torch.Size([44438])
Epoch 21, Loss: 5656990.5
torch.Size([44438])
Epoch 22, Loss: 548

In [None]:
A_tilde_train = normalize_adjacency_dense_gpu(train_adj_matrix)
A_tilde_train = A_tilde_train.to(torch.float32)

Matrix A is on device: cuda:0
A after adding self-loops: torch.Size([17859, 17859])
Row sums: torch.Size([17859])
D_inv_sqrt: torch.Size([17859, 17859])
Normalized A: torch.Size([17859, 17859])
Final normalized A: torch.Size([17859, 17859])


In [None]:
D = torch.diag(A_tilde_train.sum(dim=1).clamp(min=1).pow(-0.5))
A_tilde_train = D @ A_tilde_train @ D


In [None]:
A_tilde_train = A_tilde_train / A_tilde_train.sum(dim=1, keepdim=True).clamp(min=1)


In [None]:
is_symmetric = torch.allclose(train_adj_matrix, train_adj_matrix.T, atol=1e-6)
print("Is A_tilde_train symmetric?", is_symmetric)

Is A_tilde_train symmetric? True


In [None]:
is_symmetric = torch.allclose(A_tilde_train, A_tilde_train.T, atol=1e-6)
print("Is A_tilde_train symmetric?", is_symmetric)

Is A_tilde_train symmetric? True


In [None]:

for i in train_adj_matrix[0]:
    if i!=0:
        print(i)

tensor(1.)
tensor(1.)


In [None]:
for i in A_tilde_train[0]:
    if i!=0:
        print(i)

tensor(0.5000, device='cuda:0')
tensor(0.0928, device='cuda:0')


In [47]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"Layer: {name} | Weights: {param.data}")


Layer: encoder.gcn1.weight | Weights: tensor([[ 0.2844, -1.2023,  0.0181,  ..., -0.3716,  0.4400, -2.0281],
        [ 1.0278, -0.5476,  0.7557,  ...,  0.0956, -2.2373,  1.5964],
        [ 1.0956,  1.1783,  0.8374,  ..., -0.7773,  1.4663,  0.3260],
        ...,
        [-1.5276, -0.2777,  0.4698,  ..., -1.6284,  1.6496,  0.4437],
        [-2.3021, -0.9397,  0.8102,  ..., -0.7855, -1.8552, -1.1287],
        [ 0.1980, -1.0253, -0.1974,  ...,  0.5522, -0.6829,  1.1012]],
       device='cuda:0')
Layer: encoder.gcn2_mu.weight | Weights: tensor([[-2.0520e-01,  1.9662e+00, -1.6177e+00, -1.2190e+00, -8.4244e-01,
          5.6901e-01, -4.4265e-01, -8.6830e-01, -5.1377e-01,  2.6267e-01,
          6.6546e-01, -1.6125e-01,  2.3547e+00, -1.0704e+00, -2.1678e+00,
          4.7659e-01],
        [ 1.6484e+00, -2.7656e-01, -1.7024e+00,  1.9620e-01,  6.7228e-01,
          3.0308e-01,  4.6277e-01, -5.2939e-01,  5.0141e-01, -7.3981e-01,
          5.4150e-01,  1.7330e-01,  1.3890e+00, -5.1541e-01,  1.7745e+

In [None]:
Layer: decoder.weight | Weights: tensor([ 0.7694,  0.7375,  0.7400,  0.8426,  0.6726,  0.5184,  0.8114, -0.0369,
         0.8284,  0.7812,  0.6179,  0.5305,  0.7229,  0.0629,  0.1734,  0.5006]

In [None]:
Layer: decoder.weight | Weights: tensor([0.2511, 0.4997, 0.5713, 0.3977, 0.4057, 0.6872, 0.3339, 0.1781, 0.3881,
        0.8694, 0.3839, 0.3271, 0.4893, 0.7137, 0.7647, 0.5228],

In [None]:
row_sums=A_tilde_train.sum(dim=1)
print("Row sum min:", row_sums.min().item())
print("Row sum max:", row_sums.max().item())
print("Row sum mean:", row_sums.mean().item())


Row sum min: 0.2660941779613495
Row sum max: 21.876142501831055
Row sum mean: 0.6087040901184082


In [None]:
def split_edges_and_sample(A, num_samples=None, test_size=0.1, val_size=0.05, random_state=42):
    """
    Efficiently splits edges and samples non-edges.

    Parameters:
    - A: scipy.sparse.coo_matrix (adjacency matrix)
    - num_samples: Number of non-edges to sample (adjust based on graph size)
    - test_size: Proportion of edges/non-edges for testing
    - val_size: Proportion of edges/non-edges for validation
    - random_state: Random seed for reproducibility

    Returns:
    - train_edge_index: Edge list for training
    - val_edges, test_edges: Validation and test edge lists
    - val_non_edges, test_non_edges: Validation and test non-edges
    - train_graph: Sparse matrix representing the training graph
    """
    A_coo = coo_matrix(A)
    edges = np.vstack((A_coo.row, A_coo.col)).T  # Extract edges
    num_nodes = A.shape[0]

    # Normalize edge representation to avoid both (i, j) and (j, i)
    edges = np.array([tuple(sorted((i, j))) for i, j in edges])
    existing_edges = set(map(tuple, edges))

    # Sample non-edges
    if num_samples is None:
        num_samples = int(len(edges) * 0.15)  # Sample 15% of edges as non-edges by default

    np.random.seed(random_state)
    non_edges = set()
    while len(non_edges) < num_samples:
        i = np.random.randint(0, num_nodes)
        j = np.random.randint(0, num_nodes)
        if i != j:
            edge = tuple(sorted((i, j)))
            if edge not in existing_edges:
                non_edges.add(edge)

    non_edges = np.array(list(non_edges))

    # Split edges into train, validation, and test sets
    train_edges, temp_edges = train_test_split(edges, test_size=(test_size + val_size), random_state=random_state)
    val_edges, test_edges = train_test_split(temp_edges, test_size=(test_size / (test_size + val_size)), random_state=random_state)

    # Split sampled non-edges into validation and test sets
    val_non_edges, test_non_edges = train_test_split(non_edges, test_size=(test_size / (test_size + val_size)), random_state=random_state)

    # Create a training graph (without val/test edges)
    train_graph = coo_matrix(
        (np.ones(len(train_edges)), (train_edges[:, 0], train_edges[:, 1])),
        shape=A.shape
    )
    train_graph = train_graph + train_graph.T  # Ensure symmetry

    # Convert to PyTorch tensors
    train_edge_index = torch.tensor(train_edges.T, dtype=torch.long)
    val_edges = torch.tensor(val_edges, dtype=torch.long)
    test_edges = torch.tensor(test_edges, dtype=torch.long)
    val_non_edges = torch.tensor(val_non_edges, dtype=torch.long)
    test_non_edges = torch.tensor(test_non_edges, dtype=torch.long)

    return train_edge_index, val_edges, test_edges, val_non_edges, test_non_edges, train_graph


In [None]:
!pip install torch_sparse -f https://pytorch-geometric.com/whl/cpu.html


Looking in links: https://pytorch-geometric.com/whl/cpu.html
Collecting torch_sparse
  Using cached torch_sparse-0.6.18.tar.gz (209 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: torch_sparse


In [123]:

import torch
#import torch_sparse
from torch_geometric.utils import to_scipy_sparse_matrix
from torch_geometric.utils import to_dense_adj

# Assume edge_index, num_nodes, and remove_edges_and_sample_optimized are defined
# Extract train graph adjacency matrix

# Extract train graph adjacency matrix
num_nodes = train_edge_index.max().item() + 1

train_adj_matrix = to_dense_adj(train_edge_index, max_num_nodes=num_nodes)[0]
# Convert to SciPy sparse matrix
# Create the adjacency matrix from the edge list (train_edge_index)
#train_adj_matrix = to_dense_adj(train_edge_index, max_num_nodes=num_nodes)[0]

# Enforce symmetry (add the transpose to ensure both directions are captured)
train_adj_matrix = train_adj_matrix + train_adj_matrix.T

# Ensure that the diagonal entries are 1 (self-loops)
train_adj_matrix.fill_diagonal_(1.0)
train_adj_matrix = train_adj_matrix.clamp(max=1)
#train_adj_matrix = to_scipy_sparse_matrix(train_edge_index, num_nodes=num_nodes)

# Convert directly to a PyTorch sparse tensor
#train_adj_matrix = torch.tensor(train_adj_matrix.toarray(), dtype=torch.float32)

# Normalize adjacency for training graph
A_tilde_train = normalize_adjacency_dense_gpu(train_adj_matrix.to(torch.float32))  # Normalize
#D = torch.diag(train_adj_matrix.sum(dim=1).clamp(min=1).pow(-0.5))
#A_tilde_train = D @ train_adj_matrix @ D


# Initialize model
input_dim = X.shape[1]
hidden_dim = 128
latent_dim = 64
model = VGAE(input_dim, hidden_dim, latent_dim)

model = model.to('cuda')  # Move model to GPU if available
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

In [124]:
# Training OLD
num_epochs = 200
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    # Forward pass
    Z, A_reconstructed, mu, log_sigma = model(X.to('cuda'), A_tilde_train.to('cuda'))

    # Clamp log_sigma to prevent extreme values
    log_sigma = torch.clamp(log_sigma, min=-18, max=18)

    # Compute loss
    loss = loss_function(train_adj_matrix.to('cuda'), A_reconstructed.to('cuda'), mu, log_sigma)
    loss.backward()

    # Update parameters
    optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

Epoch 1, Loss: 71663824.0
NaN detected in H!
NaN detected in std!
NaN detected in mu!
NaN detected in log_sigma!


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [33]:

import numpy as np
from sklearn.metrics import roc_auc_score

# Convert the reconstructed adjacency matrix to CPU if necessary
A_reconstructed = A_reconstructed.detach().cpu()

# Ensure test edges and non-edges are tensors
test_edges = torch.tensor(test_edges, dtype=torch.long)
test_non_edges = torch.tensor(test_non_edges, dtype=torch.long)

# Handle different decoder outputs
if A_reconstructed.dim() == 2:
    # If A_reconstructed is a full adjacency matrix
    test_edge_scores = A_reconstructed[test_edges[:, 0], test_edges[:, 1]].numpy()
    test_non_edge_scores = A_reconstructed[test_non_edges[:, 0], test_non_edges[:, 1]].numpy()
else:
    # If A_reconstructed is a 1D tensor (edge probabilities only)
    test_edge_scores = A_reconstructed[:len(test_edges)].numpy()
    test_non_edge_scores = A_reconstructed[len(test_edges):].numpy()

# Combine scores and create labels
scores = np.concatenate([test_edge_scores, test_non_edge_scores])
labels = np.concatenate([np.ones(len(test_edge_scores)), np.zeros(len(test_non_edge_scores))])


  test_edges = torch.tensor(test_edges, dtype=torch.long)
  test_non_edges = torch.tensor(test_non_edges, dtype=torch.long)


In [34]:
from sklearn.metrics import roc_auc_score

# Assuming y_true contains actual edge labels (1 for edges, 0 for non-edges)
# and y_score contains the predicted scores for each pair of nodes
roc_auc = roc_auc_score(labels, scores)
print(f"ROC-AUC Score: {roc_auc}")


ROC-AUC Score: 0.5003852210158115


In [35]:
from sklearn.metrics import average_precision_score
ap_score = average_precision_score(labels, scores)
print(f"Average Precision (AP): {ap_score:.4f}")

Average Precision (AP): 0.1197


## GAE

In [219]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GCNLayer(nn.Module):
    """
    GAE
    """
    def __init__(self, in_channels, out_channels):
        super(GCNLayer, self).__init__()
        self.weight = nn.Parameter(torch.randn(in_channels, out_channels))
        self.bias = nn.Parameter(torch.zeros(out_channels))

    def forward(self, X, A):

        support = torch.matmul(X, self.weight)  # Apply linear transformation
        output = torch.matmul(A, support)  # Aggregate neighbor information
        return output + self.bias  # Add bias term

class GAE(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim):
        super(GAE, self).__init__()
        # Encoder layers
        self.gcn1 = GCNLayer(input_dim, hidden_dim)
        self.gcn2 = GCNLayer(hidden_dim, latent_dim)

    def forward(self, X, A_tilde):

        # Graph convolution layer 1
        H = F.relu(self.gcn1(X, A_tilde))  # First GCN layer with ReLU activation

        # Graph convolution layer 2
        Z = self.gcn2(H, A_tilde)  # Second GCN layer for embeddings Z

        # Normalize the embeddings (optional, based on your specific use case)
        Z = F.normalize(Z, p=2, dim=1)  # Normalize each row of Z to have unit length

        # Reconstruct the adjacency matrix
        A_reconstructed = torch.sigmoid(torch.matmul(Z, Z.T))  # Reconstruct the adjacency matrix

        return Z, A_reconstructed


In [220]:
def loss_function(A, A_reconstructed):

    A = A.view(-1)
    A_reconstructed = A_reconstructed.view(-1)

    return F.binary_cross_entropy(A_reconstructed, A)


In [232]:
train_adj_matrix = to_dense_adj_custom(train_edge_index, max_num_nodes=num_nodes)[0]  # Convert to dense adjacency matrix
train_adj_matrix = train_adj_matrix.to(torch.float32)  # Ensure float type for computations

# Node features
if data.x is not None:
    X = data.x  # Use provided node features
else:
    X = torch.eye(num_nodes)  # Use identity matrix if featureless


A_tilde = normalize_adjacency_dense_gpu(train_adj_matrix)

In [233]:
input_dim = X.shape[1]  # Number of features per node
hidden_dim = 32  # Hidden layer size
latent_dim = 16  # Latent space size (embedding dimension)
num_epochs = 200
learning_rate = 0.01
device='cuda'


model = GAE(input_dim, hidden_dim, latent_dim).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    # Forward pass
    Z, A_reconstructed = model(X.to(device), train_adj_matrix.to(device))

    # Compute loss
    loss = loss_function(train_adj_matrix.to(device), A_reconstructed)

    # Backward pass
    loss.backward()

    # Update parameters
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")


Epoch 10/200, Loss: 0.7204144597053528
Epoch 20/200, Loss: 0.7118179202079773
Epoch 30/200, Loss: 0.7078151702880859
Epoch 40/200, Loss: 0.7055830955505371
Epoch 50/200, Loss: 0.7051553130149841
Epoch 60/200, Loss: 0.7044070363044739
Epoch 70/200, Loss: 0.7040360569953918
Epoch 80/200, Loss: 0.7036408185958862
Epoch 90/200, Loss: 0.7033143043518066
Epoch 100/200, Loss: 0.7030094265937805
Epoch 110/200, Loss: 0.702742874622345
Epoch 120/200, Loss: 0.7025085687637329
Epoch 130/200, Loss: 0.7023085951805115
Epoch 140/200, Loss: 0.7021378874778748
Epoch 150/200, Loss: 0.7019903063774109
Epoch 160/200, Loss: 0.7018605470657349
Epoch 170/200, Loss: 0.7017422914505005
Epoch 180/200, Loss: 0.7016328573226929
Epoch 190/200, Loss: 0.7015302777290344
Epoch 200/200, Loss: 0.7014331817626953


In [234]:
# Convert the reconstructed adjacency matrix to CPU if necessary
A_reconstructed = A_reconstructed.detach().cpu()

test_edges = torch.tensor(test_edges, dtype=torch.long)
test_non_edges = torch.tensor(test_non_edges, dtype=torch.long)
# Get the scores for test edges and test non-edges
test_edge_scores = A_reconstructed[test_edges[:, 0], test_edges[:, 1]].numpy()
test_non_edge_scores = A_reconstructed[test_non_edges[:, 0], test_non_edges[:, 1]].numpy()

# Combine scores and create labels
scores = np.concatenate([test_edge_scores, test_non_edge_scores])
labels = np.concatenate([np.ones(len(test_edge_scores)), np.zeros(len(test_non_edge_scores))])

# Assuming y_true contains actual edge labels (1 for edges, 0 for non-edges)
# and y_score contains the predicted scores for each pair of nodes
roc_auc = roc_auc_score(labels, scores)
print(f"ROC-AUC Score: {roc_auc}")

ap_score = average_precision_score(labels, scores)
print(f"Average Precision (AP): {ap_score:.4f}")

ROC-AUC Score: 0.8122218861795278
Average Precision (AP): 0.8354


  test_edges = torch.tensor(test_edges, dtype=torch.long)
  test_non_edges = torch.tensor(test_non_edges, dtype=torch.long)
