In [1]:
import argparse
import logging
import os
import time

import dgl
import dgl.function as fn
import dgl.nn.pytorch as dglnn
import networkx as nx
import numpy as np
import pandas as pd
import scipy.sparse as ssp
import torch
import torch.nn as nn
from tqdm import trange
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

Using backend: pytorch


In [2]:
import sys
sys.path.append("/nfs/zty/Graph/Dynamic-Graph")
sys.path.append(".")
from data_loader.minibatch import load_data
from model.utils import get_free_gpu

In [3]:
import importlib
import test_dgl
importlib.reload(test_dgl)
# from test_dgl import test_graph, construct_dglgraph

INFO:root:Namespace(gpu=True)
setGPU: Setting GPU to: 0
INFO:root:Namespace(gpu=True)
setGPU: Setting GPU to: 0


<module 'test_dgl' from '/nfs/zty/Graph/Dynamic-Graph/torch_model/test_dgl.py'>

In [4]:
edges, nodes = test_dgl.test_graph()

In [5]:
g = test_dgl.construct_dglgraph(edges, nodes)

In [6]:
g

DGLGraph(num_nodes=10, num_edges=45,
         ndata_schemes={'nfeat': Scheme(shape=(4,), dtype=torch.float32)}
         edata_schemes={'timestamp': Scheme(shape=(), dtype=torch.float32), 'efeat': Scheme(shape=(1,), dtype=torch.float32)})

In [7]:
g.ndata["nfeat"].requires_grad

True

In [8]:
def message_func(edges):
    # EdgesBatch: src.data, dst.data, data
    print("msg func1", edges.data["efeat"].shape)
    return {"emsg": edges.src["nfeat"]}

def message_func2(edges):
    # EdgesBatch: src.data, dst.data, data
    print("msg func2")
    return {"emsg": edges.src["nfeat"]}

In [9]:
def reduce_func(nodes):
    # NodesBatch: mailbox
    print("reduce func", nodes.mailbox["emsg"].shape)
    return {"nred": torch.mean(nodes.mailbox["emsg"], dim=1)}

In [10]:
g.register_message_func(message_func)
g.register_reduce_func(reduce_func)
g.update_all()

msg func1 torch.Size([45, 1])
reduce func torch.Size([1, 1, 4])
reduce func torch.Size([1, 2, 4])
reduce func torch.Size([1, 3, 4])
reduce func torch.Size([1, 4, 4])
reduce func torch.Size([1, 5, 4])
reduce func torch.Size([1, 6, 4])
reduce func torch.Size([1, 7, 4])
reduce func torch.Size([1, 8, 4])
reduce func torch.Size([1, 9, 4])


In [11]:
g.add_edge(8, 9)
g.edata["timestamp"][-1] = g.edata["timestamp"].max() + 1
g.update_all()

msg func1 torch.Size([46, 1])
reduce func torch.Size([1, 1, 4])
reduce func torch.Size([1, 2, 4])
reduce func torch.Size([1, 3, 4])
reduce func torch.Size([1, 4, 4])
reduce func torch.Size([1, 5, 4])
reduce func torch.Size([1, 6, 4])
reduce func torch.Size([1, 7, 4])
reduce func torch.Size([1, 8, 4])
reduce func torch.Size([1, 10, 4])


In [12]:
[(k, g.edata[k].shape) for k in g.edata.keys()]

[('timestamp', torch.Size([46])), ('efeat', torch.Size([46, 1]))]

In [13]:
[(k, g.ndata[k].shape) for k in g.ndata.keys()]

[('nfeat', torch.Size([10, 4])), ('nred', torch.Size([10, 4]))]

In [14]:
g.register_message_func(message_func2)
g.update_all()

msg func2
reduce func torch.Size([1, 1, 4])
reduce func torch.Size([1, 2, 4])
reduce func torch.Size([1, 3, 4])
reduce func torch.Size([1, 4, 4])
reduce func torch.Size([1, 5, 4])
reduce func torch.Size([1, 6, 4])
reduce func torch.Size([1, 7, 4])
reduce func torch.Size([1, 8, 4])
reduce func torch.Size([1, 10, 4])


In [15]:
import torch.nn.functional as F
max_degree = g.in_degrees().max()
def reduce_padding(nodes):
    # padding with neighbor degrees to max_degree
    deg = nodes.mailbox["emsg"].shape[1]
    return {"deg_embds": F.pad(nodes.mailbox["emsg"], (0, 0, 0, max_degree - deg), "constant", 0)}

In [16]:
g.register_reduce_func(reduce_padding)
g.update_all()
print(g.ndata["deg_embds"].shape)
g.ndata["deg_embds"]

msg func2
torch.Size([10, 10, 4])


tensor([[[ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000]],

        [[ 0.7466, -2.3899, -1.1321,  0.9805],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000]],

        [[ 0.7466, -2.3899, -1.1321,  0.9805],
         

In [17]:
list(g.ndata.keys())

['nfeat', 'nred', 'deg_embds']

In [18]:
# subgraph feature mutation doesn't reflect the parent graph feature
subg = g.subgraph([0, 1, 2])
subg

DGLGraph(num_nodes=3, num_edges=3,
         ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
         edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})

In [19]:
list(subg.ndata.keys())
subg.copy_from_parent()
subg.ndata["nfeat"][0] = torch.ones_like(subg.ndata["nfeat"][0])
subg.ndata["nfeat"]

tensor([[ 1.0000,  1.0000,  1.0000,  1.0000],
        [ 1.4682, -0.0885,  1.0883, -0.2492],
        [-0.4624, -0.5191,  0.5656, -0.1710]], grad_fn=<CopySlices>)

In [20]:
g.ndata["nfeat"]

Parameter containing:
tensor([[ 0.7466, -2.3899, -1.1321,  0.9805],
        [ 1.4682, -0.0885,  1.0883, -0.2492],
        [-0.4624, -0.5191,  0.5656, -0.1710],
        [-0.7000,  1.2897, -0.8471,  1.4051],
        [-0.9970,  0.8330, -0.0129, -2.2435],
        [ 0.3173, -1.4975,  1.1729,  0.9561],
        [ 0.1679,  1.3227, -1.3530,  1.4325],
        [-0.2166, -0.1543, -0.3290, -0.4450],
        [ 0.2871, -2.4716, -0.0759, -1.4017],
        [ 1.2816, -0.9382, -0.3057,  0.1287]], requires_grad=True)

In [21]:
def foo(g):
    gl = g.local_var()
    gl.ndata["foo"] = torch.ones((gl.number_of_nodes()))
    gl.ndata["nfeat"][0] = torch.zeros_like(gl.ndata["nfeat"][0])
# local_var makes overwrite the ndata 
foo(g)
print(list(g.ndata.keys()))
g.ndata["nfeat"]

['nfeat', 'nred', 'deg_embds']


Parameter containing:
tensor([[ 0.0000,  0.0000,  0.0000,  0.0000],
        [ 1.4682, -0.0885,  1.0883, -0.2492],
        [-0.4624, -0.5191,  0.5656, -0.1710],
        [-0.7000,  1.2897, -0.8471,  1.4051],
        [-0.9970,  0.8330, -0.0129, -2.2435],
        [ 0.3173, -1.4975,  1.1729,  0.9561],
        [ 0.1679,  1.3227, -1.3530,  1.4325],
        [-0.2166, -0.1543, -0.3290, -0.4450],
        [ 0.2871, -2.4716, -0.0759, -1.4017],
        [ 1.2816, -0.9382, -0.3057,  0.1287]], grad_fn=<CopySlices>)

In [22]:
g.edata["efeat"] = torch.ones_like(g.edata["efeat"])

In [27]:
def group_edges(edges):
    print(edges.data["efeat"].shape) # (bucket_size, degree, dim)
    buc, deg, dim = edges.data["efeat"].shape
    # Mask1. compute timestamp mask before multiply over all edges of src/dst node
    # ts = edges.data["timestamp"].unsqueeze(-1) # (bucket_size, degree, 1)
    # mask = ts.permute(0, 2, 1) <= ts # (bucket_size, degree, degree), computation complexity: bucket_size * degree^2
    # Mask2. if the input batch is always increasing along dim-1, we can use an lower triangular boolean matrix instead of computing a mask matrix
    orders = torch.argsort(edges.data["timestamp"], dim=1)
    assert torch.all(torch.eq(torch.arange(deg), orders)) # assert the timestamp is increasing
    mask = torch.tril(torch.ones(deg, deg))
    # print(mask)
    # BMM: Mask(bucket_size, deg, deg) * Efeat(bucket_size, deg, dim) * W(dim, dim2)
    if len(mask.shape) >= 3:
        mask_feat = torch.bmm(mask.float(), edges.data["efeat"]) # the same as sum over all valid neighbors
    else:
        mask_feat = torch.matmul(mask.float(), edges.data["efeat"])
    next_feat = torch.matmul(mask_feat, torch.ones(dim, 3)) # (bucket_size, deg, dim2)
    return {"next_feat": next_feat}
if len(g.edata["efeat"].shape) < 2: g.edata["efeat"] = g.edata["efeat"].unsqueeze(-1)
g.group_apply_edges(func=group_edges, group_by="src")
list(g.edata.keys())

torch.Size([1, 9, 1])
torch.Size([1, 8, 1])
torch.Size([1, 7, 1])
torch.Size([1, 6, 1])
torch.Size([1, 5, 1])
torch.Size([1, 4, 1])
torch.Size([1, 3, 1])
torch.Size([2, 2, 1])


['timestamp', 'efeat', 'next_feat']

In [28]:
g.edata["next_feat"]

tensor([[1., 1., 1.],
        [2., 2., 2.],
        [3., 3., 3.],
        [4., 4., 4.],
        [5., 5., 5.],
        [6., 6., 6.],
        [7., 7., 7.],
        [8., 8., 8.],
        [9., 9., 9.],
        [1., 1., 1.],
        [2., 2., 2.],
        [3., 3., 3.],
        [4., 4., 4.],
        [5., 5., 5.],
        [6., 6., 6.],
        [7., 7., 7.],
        [8., 8., 8.],
        [1., 1., 1.],
        [2., 2., 2.],
        [3., 3., 3.],
        [4., 4., 4.],
        [5., 5., 5.],
        [6., 6., 6.],
        [7., 7., 7.],
        [1., 1., 1.],
        [2., 2., 2.],
        [3., 3., 3.],
        [4., 4., 4.],
        [5., 5., 5.],
        [6., 6., 6.],
        [1., 1., 1.],
        [2., 2., 2.],
        [3., 3., 3.],
        [4., 4., 4.],
        [5., 5., 5.],
        [1., 1., 1.],
        [2., 2., 2.],
        [3., 3., 3.],
        [4., 4., 4.],
        [1., 1., 1.],
        [2., 2., 2.],
        [3., 3., 3.],
        [1., 1., 1.],
        [2., 2., 2.],
        [1., 1., 1.],
        [2

In [42]:
def onehop_conv(g, current_layer=1):
    # we compute src_feat_layer and dst_feat_layer features for each edge per layer
    
    def src_feat_conv(edges):
        '''Group by source nodes, computing the aggregation of destination ndoes.'''
        previous_layer = current_layer - 1
        if previous_layer <= 0:
            h_self = edges.src["nfeat"]
            h_neighs = edges.dst["nfeat"]
        else:
            h_self = edges.data["src_feat{}".format(previous_layer)]
            h_neighs = edges.data["dst_feat{}".format(previous_layer)]
        assert h_self.shape == h_neighs.shape # (bucket_size, deg, dim)
        _, deg, dim = h_self.shape
        # assert the timestamp is increasing
        orders = torch.argsort(edges.data["timestamp"], dim=1)
        assert torch.all(torch.eq(torch.arange(deg), orders)) 
        mask = torch.tril(torch.ones(deg, deg)) 
        # neighbor aggregation via mean/gcn/meanpooling/maxpooling/lstm
        # sum operation: (bucket_size, deg, dim) <= (deg, deg) * (bucket_size, deg, dim)
        mask_feat = torch.matmul(mask, h_neighs) 
        # mask_feat = mask_feat / torch.sum(mask, dim=1, keepdim=True) # mean operation     
        next_feat = torch.matmul(mask_feat, torch.ones(dim, 3))
        return {"src_feat{}".format(current_layer): next_feat}
    
    def dst_feat_conv(edges):
        '''Group by destionation nodes, computing the aggregation of source ndoes.'''
        previous_layer = current_layer - 1
        if previous_layer <= 0:
            h_self = edges.src["nfeat"]
            h_neighs = edges.dst["nfeat"]
        else:
            h_self = edges.data["src_feat{}".format(previous_layer)]
            h_neighs = edges.data["dst_feat{}".format(previous_layer)]
        h_self, h_neighs = h_neighs, h_self
        assert h_self.shape == h_neighs.shape # (bucket_size, deg, dim)
        _, deg, dim = h_self.shape
        # assert the timestamp is increasing
        orders = torch.argsort(edges.data["timestamp"], dim=1)
        assert torch.all(torch.eq(torch.arange(deg), orders)) 
        mask = torch.tril(torch.ones(deg, deg)) 
        # neighbor aggregation via mean/gcn/meanpooling/maxpooling/lstm
        # sum operation: (bucket_size, deg, dim) <= (deg, deg) * (bucket_size, deg, dim)
        mask_feat = torch.matmul(mask, h_neighs) 
        # mask_feat = mask_feat / torch.sum(mask, dim=1, keepdim=True) # mean operation     
        next_feat = torch.matmul(mask_feat, torch.ones(dim, 3))
        return {"dst_feat{}".format(current_layer): next_feat}
    g.group_apply_edges(group_by="src", func=src_feat_conv) # compute temporal embeddings for src nodes
    g.group_apply_edges(group_by="dst", func=dst_feat_conv) # compute temporal embeddings for dst nodes



In [31]:
[print(k, g.ndata[k].shape) for k in g.ndata.keys()]
[print(k, g.edata[k].shape) for k in g.edata.keys()]

nfeat torch.Size([10, 4])
nred torch.Size([10, 4])
deg_embds torch.Size([10, 10, 4])
timestamp torch.Size([46])
efeat torch.Size([46, 1])
next_feat torch.Size([46, 3])


[None, None, None]

In [43]:
g.ndata["nfeat"] = torch.ones_like(g.ndata["nfeat"])
onehop_conv(g, current_layer=1)
print(list(g.edata.keys()))
g.edata["src_feat1"], g.edata["dst_feat1"]

['timestamp', 'efeat', 'src_feat1', 'dst_feat1', 'src_feat2', 'dst_feat2']


(tensor([[ 4.,  4.,  4.],
         [ 8.,  8.,  8.],
         [12., 12., 12.],
         [16., 16., 16.],
         [20., 20., 20.],
         [24., 24., 24.],
         [28., 28., 28.],
         [32., 32., 32.],
         [36., 36., 36.],
         [ 4.,  4.,  4.],
         [ 8.,  8.,  8.],
         [12., 12., 12.],
         [16., 16., 16.],
         [20., 20., 20.],
         [24., 24., 24.],
         [28., 28., 28.],
         [32., 32., 32.],
         [ 4.,  4.,  4.],
         [ 8.,  8.,  8.],
         [12., 12., 12.],
         [16., 16., 16.],
         [20., 20., 20.],
         [24., 24., 24.],
         [28., 28., 28.],
         [ 4.,  4.,  4.],
         [ 8.,  8.,  8.],
         [12., 12., 12.],
         [16., 16., 16.],
         [20., 20., 20.],
         [24., 24., 24.],
         [ 4.,  4.,  4.],
         [ 8.,  8.,  8.],
         [12., 12., 12.],
         [16., 16., 16.],
         [20., 20., 20.],
         [ 4.,  4.,  4.],
         [ 8.,  8.,  8.],
         [12., 12., 12.],
         [16

In [45]:
onehop_conv(g, current_layer=2)
print(list(g.edata.keys()))
g.edata["src_feat2"], g.edata["dst_feat2"]

['timestamp', 'efeat', 'src_feat1', 'dst_feat1', 'src_feat2', 'dst_feat2']


(tensor([[ 12.,  12.,  12.],
         [ 24.,  24.,  24.],
         [ 36.,  36.,  36.],
         [ 48.,  48.,  48.],
         [ 60.,  60.,  60.],
         [ 72.,  72.,  72.],
         [ 84.,  84.,  84.],
         [ 96.,  96.,  96.],
         [108., 108., 108.],
         [ 24.,  24.,  24.],
         [ 48.,  48.,  48.],
         [ 72.,  72.,  72.],
         [ 96.,  96.,  96.],
         [120., 120., 120.],
         [144., 144., 144.],
         [168., 168., 168.],
         [192., 192., 192.],
         [ 36.,  36.,  36.],
         [ 72.,  72.,  72.],
         [108., 108., 108.],
         [144., 144., 144.],
         [180., 180., 180.],
         [216., 216., 216.],
         [252., 252., 252.],
         [ 48.,  48.,  48.],
         [ 96.,  96.,  96.],
         [144., 144., 144.],
         [192., 192., 192.],
         [240., 240., 240.],
         [288., 288., 288.],
         [ 60.,  60.,  60.],
         [120., 120., 120.],
         [180., 180., 180.],
         [240., 240., 240.],
         [300.

In [46]:
onehop_conv(g, current_layer=3)
print(list(g.edata.keys()))
g.edata["src_feat3"], g.edata["dst_feat3"]

['timestamp', 'efeat', 'src_feat1', 'dst_feat1', 'src_feat2', 'dst_feat2', 'src_feat3', 'dst_feat3']


(tensor([[  36.,   36.,   36.],
         [ 108.,  108.,  108.],
         [ 216.,  216.,  216.],
         [ 360.,  360.,  360.],
         [ 540.,  540.,  540.],
         [ 756.,  756.,  756.],
         [1008., 1008., 1008.],
         [1296., 1296., 1296.],
         [1620., 1620., 1620.],
         [ 108.,  108.,  108.],
         [ 288.,  288.,  288.],
         [ 540.,  540.,  540.],
         [ 864.,  864.,  864.],
         [1260., 1260., 1260.],
         [1728., 1728., 1728.],
         [2268., 2268., 2268.],
         [2880., 2880., 2880.],
         [ 216.,  216.,  216.],
         [ 540.,  540.,  540.],
         [ 972.,  972.,  972.],
         [1512., 1512., 1512.],
         [2160., 2160., 2160.],
         [2916., 2916., 2916.],
         [3780., 3780., 3780.],
         [ 360.,  360.,  360.],
         [ 864.,  864.,  864.],
         [1512., 1512., 1512.],
         [2304., 2304., 2304.],
         [3240., 3240., 3240.],
         [4320., 4320., 4320.],
         [ 540.,  540.,  540.],
        