In [1]:
import torch
import numpy as np
from torch_geometric.loader import TemporalDataLoader
from torch_geometric.data import TemporalData
from collections import defaultdict
from tgb.linkproppred.dataset_pyg import PyGLinkPropPredDataset

In [5]:
def sort_tensors_by_time(
    timestamps: torch.Tensor, src: torch.Tensor, dst: torch.Tensor
) -> (torch.Tensor, torch.Tensor, torch.Tensor):
    """
    Sorts the given tensors based on the values in train_data_t in ascending order.

    Parameters:
        timestamps: The tensor containing time data.
        src: The tensor containing source data.
        dst: The tensor containing destination data.

    Returns:
        tuple: Sorted timestamps, src, dst tensors.
    """
    sorted_indices = torch.argsort(timestamps)
    sorted_t = timestamps[sorted_indices]
    sorted_src = src[sorted_indices]
    sorted_dst = dst[sorted_indices]

    return sorted_t, sorted_src, sorted_dst

# load the dataset
dataset = PyGLinkPropPredDataset(name="tgbl-review", root="data")
data = dataset.get_TemporalData()

Dataset tgbl-review version 2 not found.
Please download the latest version of the dataset.
[93mDownload started, this might take a while . . . [0m
Dataset title: tgbl-review
Dataset directory is  /Users/shahrad/projs/TGB_baseline/tgb/data/tgbl_review
[92mDownload completed [0m
Dataset directory is  /Users/shahrad/projs/TGB_baseline/tgb/data/tgbl_review
file not processed, generating processed file
number of lines counted 4873540
numpy allocated


4873541it [00:08, 559098.90it/s]


In [8]:

assert dataset.eval_metric == "mrr"
train_mask = dataset.train_mask
val_mask = dataset.val_mask
test_mask = dataset.test_mask
train_data = data[train_mask]
val_data = data[val_mask]
test_data = data[test_mask]

train_data.t, train_data.src, train_data.dst = sort_tensors_by_time(train_data.t, train_data.src, train_data.dst)
val_data.t, val_data.src, val_data.dst = sort_tensors_by_time(val_data.t, val_data.src, val_data.dst)
test_data.t, test_data.src, test_data.dst = sort_tensors_by_time(test_data.t, test_data.src, test_data.dst)

# find the union of nodes in the train, val, and test sets
nodes = set()
nodes.update([a.item() for a in train_data.src])
nodes.update([a.item() for a in train_data.dst])
nodes.update([a.item() for a in val_data.src])
nodes.update([a.item() for a in val_data.dst])
nodes.update([a.item() for a in test_data.src])
nodes.update([a.item() for a in test_data.dst])


Number of nodes: 352637


In [10]:
max(nodes)

352636

In [None]:
from tqdm import tqdm
train_loader = TemporalDataLoader(train_data, batch_size=200)
val_loader = TemporalDataLoader(val_data, batch_size=200)
test_loader = TemporalDataLoader(test_data, batch_size=200)

for batch in tqdm(train_loader, desc="Training"):
    for src, dst in zip(batch.src, batch.dst):
        # FIXME: this might not work
        print(src.item(), dst.item())
        assert src.item() in nodes

In [1]:
from temporal_collaborative import TCF
import torch
import numpy as np
from torch_geometric.loader import TemporalDataLoader
from torch_geometric.data import TemporalData
from collections import defaultdict
from tgb.linkproppred.dataset_pyg import PyGLinkPropPredDataset

In [2]:
temporal_collaborative_filtering = TCF("tgbl-review")

raw file found, skipping download
Dataset directory is  /Users/shahrad/projs/TGB_baseline/tgb/data/tgbl_review
loading processed file
done here


In [3]:
temporal_collaborative_filtering.sim_track

tensor(indices=tensor([], size=(2, 0)),
       values=tensor([], size=(0,)),
       size=(352637, 352637), nnz=0, layout=torch.sparse_coo)

In [4]:
temporal_collaborative_filtering.sim_track[0]

tensor(indices=tensor([], size=(1, 0)),
       values=tensor([], size=(0,)),
       size=(352637,), nnz=0, layout=torch.sparse_coo)

In [5]:
temporal_collaborative_filtering.bank[0]

tensor(indices=tensor([], size=(1, 0)),
       values=tensor([], size=(0,)),
       size=(352637,), nnz=0, layout=torch.sparse_coo)

In [3]:
temporal_collaborative_filtering.train()

Training the temporal collaborative filtering model for O(352637^2) edges.


Training:   0%|          | 0/17070 [00:00<?, ?it/s]

Training:   0%|          | 0/17070 [00:00<?, ?it/s]


TypeError: Cannot assign to a sparse tensor