In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import torch_geometric as tg
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [26]:
# Local imports
from l2gv2.datasets import get_dataset
from l2gv2.graphs import TGraph, induced_subgraph
from l2gv2.patch import create_patch_data
from l2gv2.patch.clustering import louvain_clustering, metis_clustering
from l2gv2.embedding import patch_embeddings
from l2gv2.align.utils import preprocess_graphs, get_embedding, intersections_nodes
from l2gv2.align.geo import train_alignment_model
from l2gv2.embedding.gae import VGAE


# <font color="grey"> Local2Global Alignment</font>

We use the Cora citation index dataset for the experiments.

In [14]:
cora = get_dataset("Cora")
tgcora = TGraph(edge_index=cora[0].edge_index, 
                edge_attr=cora[0].edge_attr,  
                num_nodes=cora[0].num_nodes, 
                ensure_sorted=True, 
                undir=False)

Loading edge and node data from memory


HBox(children=(HTML(value=''), IntProgress(value=0, max=10556), HTML(value='')))

HBox(children=(HTML(value=''), IntProgress(value=0, max=2708), HTML(value='')))

In [15]:
min_overlap = 100
target_overlap = 200
dim = 64

In [17]:
partition_tensor = louvain_clustering(tgcora)

In [7]:
partition_tensor

tensor([ 0, 28, 28,  ..., 49,  0,  0])

In [18]:
pt, pgraph= create_patch_data(tgcora, 
                              partition_tensor=partition_tensor,
                              min_overlap=min_overlap, 
                              target_overlap=target_overlap, 
                              verbose=True)

number of patches: 12
average patch degree: 3.75


enlarging patch overlaps:   0%|          | 0/12 [00:00<?, ?it/s]

In [23]:
patches = [induced_subgraph(tgcora, p) for p in pt]
neg_edges = tg.utils.negative_sampling(tgcora.edge_index, num_nodes=tgcora.num_nodes)

In [24]:
patches_emb, _ = patch_embeddings(patches, model="VGAE", dim=dim, hidden_dim=128, num_epochs=200, device=device)
nodes = intersections_nodes(patches)
n_patches = len(patches)

training patch with 2702 edges
training patch with 2304 edges
training patch with 1264 edges
training patch with 1492 edges
training patch with 2518 edges
training patch with 1172 edges
training patch with 2186 edges
training patch with 1262 edges
training patch with 1290 edges
training patch with 3206 edges
training patch with 1528 edges
training patch with 1948 edges


In [None]:
emb_patches = preprocess_graphs(patches_emb, nodes)
res, loss_hist = train_alignment_model(emb_patches, dim, n_patches , num_epochs=200, learning_rate=0.05, verbose=False)
emb = get_embedding(patches_emb, res)
full_model_ip = VGAE(dim=dim, hidden_dim=128, num_features=cora.num_node_features)
auc, ap = full_model_ip.test(torch.tensor(emb), cora.edge_index, neg_edges)