In [1]:
import numpy as np
import torch

In [3]:
data_file = np.load("data/DGraph/raw/dgraphfin.npz")
type(data_file)

numpy.lib.npyio.NpzFile

In [49]:
import tomli

In [65]:
with open("config.toml", "rb") as file:
    configs = tomli.load(file)
configs

{'model': 'tgat',
 'device': 'cuda:0',
 'num_classes': 2,
 'hidden_size': 64,
 'num_layers': 2,
 'dropout': 0.0,
 'lr': 0.01,
 'metrics': ['AUC', 'AP'],
 'log_path': './logs.txt',
 'models': {'gcn': {'weight_decay': 5e-07}, 'tgat': {'weight_decay': 0.0}}}

In [68]:
configs['models'][configs['model']]

{'weight_decay': 0.0}

In [5]:
data_file = np.load("data/DGraph/raw/dgraphfin.npz")
data_file.files

['x',
 'y',
 'edge_index',
 'edge_type',
 'edge_timestamp',
 'train_mask',
 'valid_mask',
 'test_mask']

In [26]:
data_file['y']

array([2, 3, 2, ..., 2, 2, 2], dtype=int64)

In [27]:
np.concatenate((data_file['edge_index'], data_file['edge_timestamp'][:, None], 
               data_file['y'][:, None]), axis=-1)

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 4300999 and the array at index 2 has size 3700550

In [17]:
import data_processing
import cogdl
import pandas as pd

In [38]:
dataset = data_processing.DGraphDataset(to_undirected=False)[0]
sb = cogdl.datasets.build_dataset_from_name("cora")[0]
dataset, sb

(DynamicGraph(x=[3700550, 17], y=[3700550], train_mask=[3700550], val_mask=[3700550], test_mask=[3700550], edge_time=[4300999], edge_index=[2, 4300999]),
 Graph(x=[2708, 1433], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_index=[2, 10556]))

In [39]:
dataset.edge_index, dataset.edge_time.view(1, -1)

((tensor([ 476699,  347800,  154317,  ..., 1894383, 1895741, 1206795]),
  tensor([2915516, 1271242, 2104635,  ..., 1147595, 1314434, 2072636])),
 tensor([[254, 266, 240,  ..., 378, 594, 112]]))

In [40]:
reshaped = torch.stack((dataset.edge_index[0], dataset.edge_index[1]))
reshaped, dataset.edge_index

(tensor([[ 476699,  347800,  154317,  ..., 1894383, 1895741, 1206795],
         [2915516, 1271242, 2104635,  ..., 1147595, 1314434, 2072636]]),
 (tensor([ 476699,  347800,  154317,  ..., 1894383, 1895741, 1206795]),
  tensor([2915516, 1271242, 2104635,  ..., 1147595, 1314434, 2072636])))

In [41]:
edge = torch.cat((reshaped, dataset.edge_time.view(1, -1)), dim=0)  # process node time
edge, dataset.edge_index

(tensor([[ 476699,  347800,  154317,  ..., 1894383, 1895741, 1206795],
         [2915516, 1271242, 2104635,  ..., 1147595, 1314434, 2072636],
         [    254,     266,     240,  ...,     378,     594,     112]]),
 (tensor([ 476699,  347800,  154317,  ..., 1894383, 1895741, 1206795]),
  tensor([2915516, 1271242, 2104635,  ..., 1147595, 1314434, 2072636])))

In [42]:
degree = pd.DataFrame(edge.T.numpy()).groupby(0).min().values
degree, dataset.edge_index

(array([[ 133034,     212],
        [2884152,     243],
        [3553283,     721],
        ...,
        [2327524,     763],
        [1798752,     763],
        [2080404,     763]], dtype=int64),
 (tensor([ 476699,  347800,  154317,  ..., 1894383, 1895741, 1206795]),
  tensor([2915516, 1271242, 2104635,  ..., 1147595, 1314434, 2072636])))

In [43]:
ids = pd.DataFrame(reshaped.T.numpy()).groupby(0).count().index.values
ids, dataset.edge_index

(array([      3,      13,      14, ..., 3699081, 3699082, 3699087],
       dtype=int64),
 (tensor([ 476699,  347800,  154317,  ..., 1894383, 1895741, 1206795]),
  tensor([2915516, 1271242, 2104635,  ..., 1147595, 1314434, 2072636])))

In [44]:
keys = { i: 0 for i in range(dataset.x.shape[0]) }
for i in range(len(ids)):
    keys[ids[i]] = degree[i][1]
node_time = np.array(list(keys.values()))
node_time[:10], dataset.edge_index

(array([  0,   0,   0, 212,   0,   0,   0,   0,   0,   0], dtype=int64),
 (tensor([ 476699,  347800,  154317,  ..., 1894383, 1895741, 1206795]),
  tensor([2915516, 1271242, 2104635,  ..., 1147595, 1314434, 2072636])))

In [45]:
undir = torch.cat((reshaped, reshaped[[1, 0], :]), dim=1)
undir[0, :], undir[1, :], dataset.edge_index

(tensor([ 476699,  347800,  154317,  ..., 1147595, 1314434, 2072636]),
 tensor([2915516, 1271242, 2104635,  ..., 1894383, 1895741, 1206795]),
 (tensor([ 476699,  347800,  154317,  ..., 1894383, 1895741, 1206795]),
  tensor([2915516, 1271242, 2104635,  ..., 1147595, 1314434, 2072636])))

In [46]:
torch.cat((dataset.edge_time, dataset.edge_time), dim=0)

tensor([254, 266, 240,  ..., 378, 594, 112])

In [44]:
torch.tensor(data_file['edge_index']).long().T, dataset.edge_index, torch.randint(0, 5, (2, 5))


(tensor([[ 476699,  347800,  154317,  ..., 1894383, 1895741, 1206795],
         [2915516, 1271242, 2104635,  ..., 1147595, 1314434, 2072636]]),
 (tensor([ 476699,  347800,  154317,  ..., 1894383, 1895741, 1206795]),
  tensor([2915516, 1271242, 2104635,  ..., 1147595, 1314434, 2072636])),
 tensor([[3, 1, 3, 2, 0],
         [3, 2, 2, 3, 4]]))

In [None]:
model = GCN(in_feats=dataset.num_features, hidden_size=64,
            out_feats=2, dropout=0.0, num_layers=2)

In [5]:
model.train()
output = model(dataset)
output

tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]], grad_fn=<ScatterAddBackward0>)

In [6]:
output[dataset.val_mask].shape

torch.Size([183862, 2])

In [7]:
dataset.y[dataset.val_mask].shape

torch.Size([183862])

In [8]:
dataset.x

tensor([[ 0.,  5., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [ 0.,  5., -1.,  ..., -1., -1., -1.],
        ...,
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.]])

In [9]:
x = dataset.x
x = (x-x.mean(0))/x.std(0)
dataset.x = x
dataset.x[:5]

tensor([[-0.6881,  0.7298, -0.6371, -0.6409, -0.9152, -0.5998, -0.5686, -0.7932,
         -0.5690, -0.9180, -0.3009, -0.8426, -0.8469, -0.8494, -0.8061, -0.6192,
         -0.6086],
        [-2.0435, -1.5475, -0.6371, -0.6409, -0.9152, -0.5998, -0.5686, -0.7932,
         -0.5690, -0.9180, -0.3009, -0.8426, -0.8469, -0.8494, -0.8061, -0.6192,
         -0.6086],
        [-0.6881,  0.7298, -0.6371, -0.6409, -0.9152, -0.5998, -0.5686, -0.7932,
         -0.5690, -0.9180, -0.3009, -0.8426, -0.8469, -0.8494, -0.8061, -0.6192,
         -0.6086],
        [ 0.6673,  0.7298, -0.6371, -0.6409, -0.9152, -0.5998, -0.5686, -0.7932,
         -0.5690, -0.9180, -0.3009, -0.8426, -0.8469, -0.8494, -0.8061, -0.6192,
         -0.6086],
        [ 0.6673,  1.4890, -0.6371, -0.6409, -0.9152, -0.5998, -0.5686, -0.7932,
         -0.5690, -0.9180, -0.3009, -0.8426, -0.8469, -0.8494, -0.8061, -0.6192,
         -0.6086]])

In [10]:
dataset.y.dim()==2

False

In [16]:
split_idx = {'train':dataset.train_mask, 'valid':dataset.val_mask, 'test':dataset.test_mask}
split_idx['train'].shape[1]

IndexError: tuple index out of range

In [17]:
dataset.x.size(-1), dataset.num_features

(17, 17)

In [1]:
from models.gcn_mlp_drop import MlpDropGCN, AdaptiveBbGCN

In [11]:
from torch import Tensor
def _multi_dropout(x: Tensor, probability: Tensor) -> Tensor:
    assert x.shape[0] == probability.shape[0]
    mask: Tensor = torch.rand_like(x) > probability
    return mask * x / (1.0 - probability)

In [13]:
import torch
x = torch.ones((5,2), requires_grad=True)
x

tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)

In [14]:
p = torch.rand(5,1)
p

tensor([[0.4638],
        [0.9735],
        [0.5411],
        [0.1210],
        [0.7201]])

In [16]:
y = _multi_dropout(x, p)

In [17]:
y

tensor([[1.8649, 1.8649],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 1.1377],
        [3.5724, 0.0000]], grad_fn=<DivBackward0>)

In [24]:
y.sum().backward()

In [25]:
x.grad

tensor([[1.8649, 1.8649],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 1.1377],
        [3.5724, 0.0000]])

In [26]:
y

tensor([[1.8649, 1.8649],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 1.1377],
        [3.5724, 0.0000]], grad_fn=<DivBackward0>)

In [27]:
Y = torch.nn.functional.dropout(x, p=0.5)
Y

tensor([[2., 0.],
        [2., 0.],
        [2., 0.],
        [2., 0.],
        [0., 2.]], grad_fn=<MulBackward0>)

In [28]:
x.grad.zero_()
Y.sum().backward()
x.grad

tensor([[2., 0.],
        [2., 0.],
        [2., 0.],
        [2., 0.],
        [0., 2.]])