In [1]:
import numpy as np
import torch
from torch_geometric.data import Data
from tqdm import trange
from torch.utils.data import DataLoader
from torch_geometric.nn import Node2Vec
import argparse

In [2]:
graph = 'txt_graphs/cnr-2000.graph-txt'

In [3]:
from embeddings import read_graph

In [4]:
out_nodes, in_nodes, out_degree, in_degree = read_graph(graph)

  3%|▎         | 10103/325557 [00:00<00:03, 101024.56it/s]

325557 lines
reading..


100%|██████████| 325557/325557 [00:03<00:00, 99846.10it/s] 


finished reading, now preprocessing..


In [5]:
from torch_cluster import random_walk

In [6]:
edge_index = torch.tensor(np.vstack((out_nodes, in_nodes)),
                              dtype=torch.long)

In [7]:
len(edge_index[0])

3294208

In [8]:
len(out_degree)

325557

In [14]:
rw = random_walk(edge_index[0], edge_index[1], torch.tensor([0, 0, 0]),
                 5, 1, 1, len(out_degree))

In [15]:
rw

tensor([[  0, 219, 220, 219, 156, 166],
        [  0,   8,   9,   3,   9,  10],
        [  0,   1,   0,   8,  14,   8]])

In [11]:
edge_index[:, :10]

tensor([[  0,   0,   0,   0,   0,   1,   1,   1,   1,   1],
        [  1,   4,   8, 219, 220,   0,   7,   8, 219, 220]])

In [12]:
x = torch.tensor(np.vstack((out_degree, in_degree)).T, dtype=torch.float)

In [13]:
data = Data(x=x, edge_index=edge_index)

In [9]:
from torch.utils.data import DataLoader

In [10]:
loader = DataLoader(torch.arange(data.num_nodes),
                    batch_size=64, shuffle=True,
                    num_workers=10)

In [11]:
model = Node2Vec(data.num_nodes, embedding_dim=64, walk_length=20,
                 context_size=20, walks_per_node=2, p=1, q=1,
                 num_negative_samples=20)

In [12]:
for pos, subset in enumerate(loader):
    print(pos, len(subset))
    if pos > 1:
        break

0 64
1 64
2 64


In [13]:
model, data = model.to('cuda'), data.to('cuda')

In [14]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
device = 'cuda'
model.train()
for pos, subset in enumerate(loader):
    if pos > 0:
        break
    optimizer.zero_grad()
    loss = model.loss(data.edge_index, subset.to(device))

In [15]:
loss

tensor(5.7392, device='cuda:0', grad_fn=<AddBackward0>)