# Embedding nodes properties and node attributes
We will deconstruct, what GCN is doing by considering what each embedding (of structural properties is doing) and what the time-series for nodes are doing.

We use 2 steps for this
## step 1: generate Node Embeddings using node2vec

Use the node2vec library to generate embeddings that capture the structural information of the nodes in the graph.

## step 2: generate embedding of attributes



In [9]:
!pip install node2vec

Collecting node2vec
  Downloading node2vec-0.5.0-py3-none-any.whl.metadata (849 bytes)
Downloading node2vec-0.5.0-py3-none-any.whl (7.2 kB)
Installing collected packages: node2vec
Successfully installed node2vec-0.5.0


In [10]:
from node2vec import Node2Vec
import networkx as nx

# Create a graph
G = nx.Graph()
G.add_edges_from([(1, 2), (2, 3), (3, 4), (4, 1)])

# Generate node2vec embeddings
node2vec = Node2Vec(G, dimensions=64, walk_length=30, num_walks=200, workers=4)
model = node2vec.fit(window=10, min_count=1, batch_words=4)

# Get embeddings for each node
structural_embeddings = {node: model.wv[str(node)] for node in G.nodes()}


Computing transition probabilities:   0%|          | 0/4 [00:00<?, ?it/s]

In [11]:
print(structural_embeddings)

{1: array([-0.02631349,  0.05089882,  0.17280838,  0.14105776, -0.12393833,
       -0.17387788,  0.11665705,  0.2706954 ,  0.00486176, -0.03152571,
        0.21411921,  0.0916369 ,  0.15909027,  0.05304003, -0.23802675,
       -0.08597304, -0.06934562,  0.18728486, -0.03112677, -0.11849643,
        0.14588998,  0.11832988,  0.30683884, -0.19891143,  0.10072954,
        0.12988633, -0.07587767,  0.15272784, -0.01583031, -0.00756422,
       -0.06256703,  0.06438688,  0.01060378, -0.16112167, -0.02108328,
        0.00207937,  0.03760183,  0.04577088,  0.18683806, -0.14921485,
       -0.03411896,  0.08479237, -0.22927839, -0.13514379,  0.04177726,
        0.05127777, -0.11240242, -0.02732335, -0.11053132,  0.17460172,
       -0.02872381,  0.13135329, -0.09071255,  0.16037709,  0.16271955,
       -0.17188382,  0.06721143, -0.17067735,  0.03274924, -0.09745877,
        0.01641579, -0.09072948, -0.15733744, -0.1375738 ], dtype=float32), 2: array([-0.01243238,  0.00682303,  0.14969034,  0.1548

Step 2: Prepare Attribute Vectors

We have time-series for each node is associated with a vector of attributes.

In [12]:
attribute_vectors = {
    1: [0.1, 0.2, 0.3],
    2: [0.4, 0.5, 0.6],
    3: [0.7, 0.8, 0.9],
    4: [1.0, 1.1, 1.2]
}

import numpy as np

combined_embeddings = {}
for node in G.nodes():
    structural_embedding = structural_embeddings[node]
    attribute_vector = attribute_vectors[node]
    combined_embedding = np.concatenate((structural_embedding, attribute_vector))
    combined_embeddings[node] = combined_embedding

# Example output for node 1
print(combined_embeddings[1])

[-0.02631349  0.05089882  0.17280838  0.14105776 -0.12393833 -0.17387788
  0.11665705  0.27069539  0.00486176 -0.03152571  0.21411921  0.0916369
  0.15909027  0.05304003 -0.23802675 -0.08597304 -0.06934562  0.18728486
 -0.03112677 -0.11849643  0.14588998  0.11832988  0.30683884 -0.19891143
  0.10072954  0.12988633 -0.07587767  0.15272784 -0.01583031 -0.00756422
 -0.06256703  0.06438688  0.01060378 -0.16112167 -0.02108328  0.00207937
  0.03760183  0.04577088  0.18683806 -0.14921485 -0.03411896  0.08479237
 -0.22927839 -0.13514379  0.04177726  0.05127777 -0.11240242 -0.02732335
 -0.11053132  0.17460172 -0.02872381  0.13135329 -0.09071255  0.16037709
  0.16271955 -0.17188382  0.06721143 -0.17067735  0.03274924 -0.09745877
  0.01641579 -0.09072948 -0.15733744 -0.13757379  0.1         0.2
  0.3       ]


# GCN realisation



In [2]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [5]:
# load some graph or create a graph
import networkx as nx
G = nx.Graph()
G.add_edges_from([(1, 2), (2, 3), (3, 4), (4, 1)])

In [17]:
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv

# Node features (4 nodes, each with 2 features, they can be anything, degrees or even other properties )
x = torch.tensor([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6], [0.7, 0.8]], dtype=torch.float)

# Edge indices (connections between nodes)
edge_index = torch.tensor([[0, 1, 1, 2, 2, 3],  # Source nodes
                           [1, 0, 2, 1, 3, 2]], dtype=torch.long)  # Target nodes

# Create a PyTorch Geometric data object
data = Data(x=x, edge_index=edge_index)

# Define the GCN model
class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(data.num_features, 16)
        self.conv2 = GCNConv(16, 64)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Instantiate the model
model = GCN()

# Generate embeddings
embeddings = model(data.x, data.edge_index)
print("Node embeddings:", embeddings)

Node embeddings: tensor([[ 1.6693e-02, -3.2546e-02,  1.4114e-02, -3.3845e-02,  5.0788e-03,
         -4.0756e-02,  4.3694e-02, -1.8000e-02, -1.6657e-02,  7.9980e-04,
         -1.0239e-02, -5.5602e-03,  1.5350e-02,  3.6906e-02, -7.0971e-02,
          4.5729e-02,  4.0141e-02, -1.4429e-02, -1.1805e-02,  2.3613e-02,
         -2.0569e-02,  1.3397e-02, -2.6575e-02, -6.3894e-03,  2.3917e-02,
         -4.6345e-02,  1.0203e-02, -1.6867e-03, -4.2605e-02, -7.6576e-03,
         -2.8930e-02,  6.3790e-03,  1.2553e-02,  1.2120e-02,  8.0710e-03,
         -2.3439e-02, -2.4702e-03, -1.2501e-02,  2.6824e-03,  8.0765e-03,
          1.2334e-02, -5.0202e-02, -8.3622e-03,  2.4013e-02,  1.6856e-02,
          1.1289e-03, -1.8522e-02, -1.3105e-02, -2.4180e-03,  1.5451e-02,
         -5.7389e-03, -1.3324e-02, -6.9575e-03, -1.7982e-03,  1.2721e-02,
         -1.8096e-02, -2.3011e-03,  3.2086e-03,  1.9126e-02,  2.9487e-02,
         -4.4912e-04, -1.7479e-02,  2.9978e-02,  1.8737e-02],
        [ 2.3554e-02, -5.2153e-02

In [15]:
num_nodes = data.x.shape[0]  # Number of nodes in the graph
max_index = edge_index.max().item()  # Maximum node index in edge_index

if max_index >= num_nodes:
    raise ValueError(f"edge_index contains node index {max_index}, but there are only {num_nodes} nodes.")

ValueError: edge_index contains node index 4, but there are only 4 nodes.

In [None]:
print