In [1]:
import torch

In [2]:
x = torch.tensor([[1], [2], [3]])


In [4]:
x[:, 0]

tensor([1, 2, 3])

In [5]:
import torch
import torch.nn as nn

# Example input tensor (n x 9)
x = torch.randint(0, 100, (5, 9))  # just sample data
embedding_indices = x[:, 0].long()        # Shape: (n,)
feature_inputs = x[:, 1:]                 # Shape: (n, 8)

# Assume embedding layer
embedding_layer = nn.Embedding(num_embeddings=100, embedding_dim=4)
embedded = embedding_layer(embedding_indices)  # Shape: (n, 4)

# Assume a simple feedforward for the features
feature_layer = nn.Linear(8, 4)
feature_processed = feature_layer(feature_inputs.float())  # Shape: (n, 4)

# Concatenate both parts
output = torch.cat([embedded, feature_processed], dim=1)  # Shape: (n, 8)

print(output.shape)

torch.Size([5, 8])


In [6]:
x

tensor([[18,  1, 15,  7, 89, 28, 55, 99,  0],
        [49, 11, 98,  6, 33, 97, 93, 18, 23],
        [19,  3, 84, 10, 65,  2, 63, 21, 75],
        [47, 19, 82, 48,  3,  0,  4, 92, 23],
        [74, 57, 26, 22, 26, 52, 80, 94, 76]])

In [8]:
import torch

# Input tensor (n x 9)
x = torch.tensor([
    [0, 2, 3, 4, -1, 6, -1, -1, -1],
    [1, 0, 1, -1, -1, -1, 5, 6, 7],
], dtype=torch.long)

embedding_indices = x[:, 0].long()        # Shape: (n,)
indices = x[:, 1:]  # shape: (n, 8)

# Assume embedding layer
embedding_layer = nn.Embedding(num_embeddings=100, embedding_dim=4)
embedded = embedding_layer(embedding_indices)  # Shape: (n, 4)


value_map = torch.tensor([00, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100], dtype=torch.float)

# Create mask: True where indices are not -1
mask = indices != -1

# Replace -1 with 0 (safe dummy index — will be masked out later)
safe_indices = indices.clone()
safe_indices[~mask] = 0  # now all -1s are 0

# Lookup from value_map
values = value_map[safe_indices]  # shape: (n, 8)

# Zero out values where original index was -1
values = values * mask.float()

# Sum along dim=1
row_sums = values.sum(dim=1)

print(row_sums)


tensor([150., 190.])


In [18]:
import torch
import torch.nn as nn

# x is shape [num_total_instances, 9]
x = torch.tensor([
    [0, 2, 3, 4, -1, 6, -1, -1, -1],
    [1, 0, 1, -1, -1, -1, 5, 6, 7],
], dtype=torch.long)

embedding_indices = x[:, 0]       # (n_instances,)
indices = x[:, 1:]                # (n_instances, max_substruct_size)

# Value lookup (could be node-level features or external map)
value_map = torch.tensor([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150], dtype=torch.float)

# n_substructure_instances per graph (2 instances total, across 2 graphs)
n_substructure_instances = torch.tensor([1, 1])  # shape [batch_size]

# ptr: cumulative node counts for batching (e.g., graph0 has 8 nodes, graph1 has 8)
ptr = torch.tensor([0, 8, 16])  # shape [batch_size + 1]

# Step 1: map each substructure instance to its graph
instance_to_graph = torch.arange(len(n_substructure_instances)).repeat_interleave(n_substructure_instances)
# -> tensor([0, 1])

# Step 2: use ptr to offset node indices
offsets = ptr[instance_to_graph]         # shape: [num_instances] → tensor([0, 8])
offsets = offsets.unsqueeze(1)           # shape: [num_instances, 1] for broadcasting

# Step 3: apply offset only where index != -1
mask = indices != -1
safe_indices = indices.clone()
offset_indices = safe_indices + offsets  # now aligned to global node space
safe_indices[~mask] = 0  # dummy index for -1s

# Step 4: lookup and sum
values = value_map[offset_indices]
values = values * mask.float()
row_sums = values.sum(dim=1)

print("Offset indices:\n", offset_indices)
print("Row sums:\n", row_sums)


Offset indices:
 tensor([[ 2,  3,  4, -1,  6, -1, -1, -1],
        [ 8,  9,  7,  7,  7, 13, 14, 15]])
Row sums:
 tensor([150., 590.])


In [14]:
n = 6 # ie max key value, ie max(x[:, 0])
v = 30 # ie max value possible in x[:, 1:]
x = torch.tensor([
    [ 2, 19, 24, 20, 23, 21, -1, -1, -1],
    [ 3,  2,  3, 28,  4,  5, 27, -1, -1],
    [ 3,  5, 27,  6, 10,  8,  7, -1, -1],
    [ 3, 12, 26, 13, 25, 15, 14, -1, -1]
], dtype=torch.long)
keys = x[:, 0]         # shape (n,)
indices = x[:, 1:]     # shape (n, 8)

out = torch.zeros(n, v, dtype=torch.long)
for i in range(x.size(0)):
    key = x[i, 0]
    valid_indices = x[i, 1:]
    valid_indices = valid_indices[valid_indices != -1]  # filter out -1s
    out[key, valid_indices] += 1  # increment count for those indices
out

tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
         1, 0, 0, 0, 0, 0],
        [0, 0, 1, 1, 1, 2, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 1, 1, 2, 1, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0]])

In [15]:
x = torch.tensor([
    [0, 2, 3, 4, -1, 6, -1, -1, -1],
    [1, 0, 1, -1, -1, -1, 5, 6, 7],
], dtype=torch.long)

embedding_indices = x[:, 0].long()        # Shape: (n,)
indices = x[:, 1:]  # shape: (n, 8)

# Create mask for valid (non -1) indices
mask = indices != -1

# Flatten valid keys and corresponding indices
flat_keys = keys.unsqueeze(1).expand_as(indices)[mask]     # shape: (num_valid,)
flat_indices = indices[mask]                               # shape: (num_valid,)
ones = torch.ones_like(flat_indices, dtype=torch.long)

# Use scatter_add to add ones at (flat_keys, flat_indices)
out.index_put_((flat_keys, flat_indices), ones, accumulate=True)

tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 2,
         2, 0, 0, 0, 0, 0],
        [0, 0, 2, 2, 2, 4, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 2, 2, 4, 2, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0]])

In [19]:
import torch

# Example vertex list to connect to vn
vertex_list = torch.tensor([0, 3, 5])  # indices of existing nodes

# Original edge_index
edge_index = torch.tensor([
    [0, 1, 2],
    [1, 2, 3]
], dtype=torch.long)  # shape [2, num_edges]

# Define the virtual node index
vn = edge_index.max().item() + 1  # or simply num_nodes if known

# Create new edges: from vn to each vertex
vn_to_v = torch.stack([torch.full_like(vertex_list, vn), vertex_list], dim=0)

# (Optional) also add reverse edges if undirected or bidirectional
v_to_vn = torch.stack([vertex_list, torch.full_like(vertex_list, vn)], dim=0)

# Combine
new_edges = torch.cat([vn_to_v, v_to_vn], dim=1)  # shape [2, 2 * n]
edge_index = torch.cat([edge_index, new_edges], dim=1)

print(edge_index)

tensor([[0, 1, 2, 4, 4, 4, 0, 3, 5],
        [1, 2, 3, 0, 3, 5, 4, 4, 4]])


In [20]:
import torch

# Example inputs
vertex_lists = torch.tensor([
    [0, 3, 5, -1],
    [2, 4, -1, -1]
])  # shape: [num_vns, max_neighbors]

vn_ids = torch.tensor([10, 11])  # virtual node IDs

# 1. Mask out the padded entries (-1)
mask = vertex_lists != -1  # shape: [num_vns, max_neighbors]

# 2. Prepare VN -> vertex edges
vn_repeat = vn_ids.unsqueeze(1).expand_as(vertex_lists)  # [num_vns, max_neighbors]

src = vn_repeat[mask]     # virtual node source
dst = vertex_lists[mask]  # real node targets

vn_to_v = torch.stack([src, dst], dim=0)

# 3. (Optional) reverse direction
v_to_vn = torch.stack([dst, src], dim=0)

# 4. Combine all new edges
new_edges = torch.cat([vn_to_v, v_to_vn], dim=1)  # shape [2, num_edges]

print("New edges:\n", new_edges)

New edges:
 tensor([[10, 10, 10, 11, 11,  0,  3,  5,  2,  4],
        [ 0,  3,  5,  2,  4, 10, 10, 10, 11, 11]])


In [21]:
src

tensor([10, 10, 10, 11, 11])

In [22]:
dst

tensor([0, 3, 5, 2, 4])

In [29]:
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx
from ogb.graphproppred import PygGraphPropPredDataset
from torch_geometric.datasets import ZINC
import tqdm
from torch_geometric.utils import to_networkx
from torch_geometric.datasets import PCQM4Mv2


In [24]:
dataset = PCQM4Mv2(root='./data/pcqm4m-v2', split='train')
dataset, dataset.len()

Downloading https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/pcqm4m-v2.zip
Extracting data/pcqm4m-v2/raw/pcqm4m-v2.zip
Processing...
100%|██████████| 3747/3747 [00:15<00:00, 248.64it/s]
Done!


(PCQM4Mv2(3378606), 3747)

In [30]:
d = dataset[0]

In [31]:
d

Data(x=[14, 9], edge_index=[2, 30], edge_attr=[30, 3], y=5.8831014478100006, smiles='CN1CCN([C@H]2[C@@H]1C(=CC2)C(C)C)C')

In [32]:
d.x

tensor([[6, 0, 4, 5, 3, 0, 4, 0, 0],
        [7, 0, 3, 5, 0, 0, 4, 0, 1],
        [6, 0, 4, 5, 2, 0, 4, 0, 1],
        [6, 0, 4, 5, 2, 0, 4, 0, 1],
        [7, 0, 3, 5, 0, 0, 4, 0, 1],
        [6, 1, 4, 5, 1, 0, 4, 0, 1],
        [6, 2, 4, 5, 1, 0, 4, 0, 1],
        [6, 0, 3, 5, 0, 0, 3, 0, 1],
        [6, 0, 3, 5, 1, 0, 3, 0, 1],
        [6, 0, 4, 5, 2, 0, 4, 0, 1],
        [6, 0, 4, 5, 1, 0, 4, 0, 0],
        [6, 0, 4, 5, 3, 0, 4, 0, 0],
        [6, 0, 4, 5, 3, 0, 4, 0, 0],
        [6, 0, 4, 5, 3, 0, 4, 0, 0]])