In [3]:
from torch_geometric.nn import Node2Vec
import os.path as osp
import torch
from torch_geometric.datasets import Planetoid
from tqdm.notebook import tqdm
import networkx as nx
from clm_nets.config import global_config as glob

dataset = 'Cora'
path = osp.join(glob.UC_DATA_PKG_DIR, dataset)

dataset = Planetoid(path, dataset)  # dowload or load the Cora dataset
data = dataset[0]

device = 'cuda' if torch.cuda.is_available() else 'cpu'  # check if cuda is available to send the model and tensors to the GPU

In [2]:
G = nx.read_gpickle(osp.join(glob.UC_DATA_PKG_DIR, "doc_graph.gpickle"))

ValueError: unsupported pickle protocol: 5

In [4]:
model = Node2Vec(data.edge_index, embedding_dim=128, walk_length=20,
                 context_size=10, walks_per_node=10,
                 num_negative_samples=1, p=1, q=1, sparse=True).to(device)

loader = model.loader(batch_size=128, shuffle=True, num_workers=4)  # data loader to speed the train 

optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=0.01)  # initzialize the optimizer 

In [5]:
def train():
    model.train()  # put model in train model
    total_loss = 0
    for pos_rw, neg_rw in tqdm(loader):
        optimizer.zero_grad()  # set the gradients to 0
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))  # compute the loss for the batch
        loss.backward()
        optimizer.step()  # optimize the parameters
        total_loss += loss.item()
    return total_loss / len(loader)

In [6]:
n_epochs = 10
#for epoch in tqdm(range(1, n_epochs), total=n_epochs):
for epoch in range(1, n_epochs):
    loss = train()
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}')

  0%|          | 0/22 [00:00<?, ?it/s]

Epoch: 01, Loss: 8.1294


  0%|          | 0/22 [00:00<?, ?it/s]

Epoch: 02, Loss: 6.1000


  0%|          | 0/22 [00:00<?, ?it/s]

Epoch: 03, Loss: 4.9797


  0%|          | 0/22 [00:00<?, ?it/s]

Epoch: 04, Loss: 4.1610


  0%|          | 0/22 [00:00<?, ?it/s]

Epoch: 05, Loss: 3.4943


  0%|          | 0/22 [00:00<?, ?it/s]

Epoch: 06, Loss: 2.9755


  0%|          | 0/22 [00:00<?, ?it/s]

Epoch: 07, Loss: 2.5544


  0%|          | 0/22 [00:00<?, ?it/s]

Epoch: 08, Loss: 2.2199


  0%|          | 0/22 [00:00<?, ?it/s]

Epoch: 09, Loss: 1.9516


In [5]:
all_vectors = ""
for tensor in model(torch.arange(data.num_nodes, device=device)):
    s = "\t".join([str(value) for value in tensor.detach().cpu().numpy()])
    all_vectors += s + "\n"

In [7]:
# save the vectors
with open("../data/vectors.txt", "w") as f:
    f.write(all_vectors)

# save the labels
with open("../data/labels.txt", "w") as f:
    f.write("\n".join([str(label) for label in data.y.numpy()]))

: 