In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from deepwalk.deepwalk import DeepWalk

In [2]:
class MLP(nn.Module):
    def __init__(self, num_layers, input_feat_dim, position_emb_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.num_layers = num_layers
        self.input_feat_dim = input_feat_dim
        self.position_emb_dim = position_emb_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        self.dropout = nn.Dropout(p=0.5)
        self.layers = nn.ModuleList()

        self.layers.append(nn.Linear(input_feat_dim + position_emb_dim, hidden_dim))

        for _ in range(num_layers - 2):
            self.layers.append(nn.Linear(hidden_dim, hidden_dim))

        self.layers.append(nn.Linear(hidden_dim, output_dim))
    
    def forward(self, x, position_emb):
        inp = torch.cat([x, position_emb], dim=-1)  
        for layer in self.layers[:-1]:
            inp = F.relu(layer(inp))
            inp = self.dropout(inp)
        out = self.layers[-1](inp)
        return out

In [3]:
from torch_geometric.datasets import Planetoid
dataset = Planetoid(root='./Cora', name='Cora')

data = dataset[0]

In [4]:
import networkx as nx

graph_nx = nx.Graph()
graph_nx.add_edges_from(data.edge_index.t().tolist())

In [5]:
deepwalk_model = DeepWalk(graph_nx, walk_length=80, walks_per_vertex=10)
deepwalk_model.train()
position_embeddings = deepwalk_model.get_embeddings()

Read 2M words
Number of words:  2709
Number of labels: 0
Progress: 100.0% words/sec/thread:  128333 lr:  0.000000 avg.loss:  3.686964 ETA:   0h 0m 0s
  return torch.tensor(embeddings)


In [6]:
data.x.shape

torch.Size([2708, 1433])

In [7]:
mlp = MLP(num_layers=3,
          input_feat_dim=data.x.shape[1],
          position_emb_dim=position_embeddings.shape[1],
          hidden_dim=64,
          output_dim=dataset.num_classes)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
mlp = mlp.to(device)
data = data.to(device)
optimizer = torch.optim.Adam(mlp.parameters(), lr=0.01, weight_decay=5e-4)

In [8]:
def train():
    mlp.train()
    optimizer.zero_grad()
    out = mlp(data.x, position_embeddings.to(device))  # forward pass
    loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

@torch.no_grad()
def test():
    mlp.eval()
    out = mlp(data.x, position_embeddings.to(device))
    pred = out.argmax(dim=1)

    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        correct = pred[mask] == data.y[mask]
        acc = int(correct.sum()) / int(mask.sum())
        accs.append(acc)
    return accs  # train_acc, val_acc, test_acc

# Training for 200 epochs
for epoch in range(1, 201):
    loss = train()
    train_acc, val_acc, test_acc = test()
    if epoch % 10 == 0 or epoch == 1:
        print(f'Epoch {epoch:03d}, Loss: {loss:.4f}, '
              f'Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')


Epoch 001, Loss: 1.9534, Train Acc: 0.2214, Val Acc: 0.1300, Test Acc: 0.1390
Epoch 010, Loss: 0.5619, Train Acc: 0.9929, Val Acc: 0.7240, Test Acc: 0.7060
Epoch 020, Loss: 0.1274, Train Acc: 1.0000, Val Acc: 0.7260, Test Acc: 0.7180
Epoch 030, Loss: 0.0295, Train Acc: 1.0000, Val Acc: 0.7340, Test Acc: 0.7100
Epoch 040, Loss: 0.0241, Train Acc: 1.0000, Val Acc: 0.7000, Test Acc: 0.7050
Epoch 050, Loss: 0.0780, Train Acc: 1.0000, Val Acc: 0.7220, Test Acc: 0.7340
Epoch 060, Loss: 0.0649, Train Acc: 1.0000, Val Acc: 0.7200, Test Acc: 0.7120
Epoch 070, Loss: 0.0611, Train Acc: 1.0000, Val Acc: 0.7180, Test Acc: 0.7220
Epoch 080, Loss: 0.0242, Train Acc: 1.0000, Val Acc: 0.6760, Test Acc: 0.6700
Epoch 090, Loss: 0.0438, Train Acc: 1.0000, Val Acc: 0.6960, Test Acc: 0.6960
Epoch 100, Loss: 0.0211, Train Acc: 1.0000, Val Acc: 0.7100, Test Acc: 0.7020
Epoch 110, Loss: 0.0174, Train Acc: 1.0000, Val Acc: 0.6920, Test Acc: 0.6920
Epoch 120, Loss: 0.0406, Train Acc: 1.0000, Val Acc: 0.7120, Tes