# Import dependencies

In [56]:
import torch
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")

tensor([1.], device='mps:0')


In [68]:
import pickle
from typing import Dict

import numpy as np
import torch
from torch_geometric.loader import TemporalDataLoader
from tqdm import tqdm

from tgb.linkproppred.dataset_pyg import PyGLinkPropPredDataset
from tgb.linkproppred.evaluate import Evaluator

from inductive_baseline import *

# Load the review dataset

Note: the dataset consists of a bipartite graph. Hence, the Adamic Radar index does not work properly as a heuristic method

In [70]:
dataset_name = "tgbl-review"
initial_decay = 0.999

print(f"Running popularity baseline for {dataset_name} with decay {initial_decay}")
dataset = PyGLinkPropPredDataset(name=dataset_name, root="datasets")
train_mask = dataset.train_mask
val_mask = dataset.val_mask
test_mask = dataset.test_mask

data = dataset.get_TemporalData()
assert dataset.eval_metric == "mrr"

train_data = data[train_mask]
val_data = data[val_mask]
test_data = data[test_mask]

train_data.t, train_data.src, train_data.dst = sort_tensors_by_time(train_data.t , train_data.src , train_data.dst )
val_data.t, val_data.src, val_data.dst = sort_tensors_by_time(val_data.t , val_data.src , val_data.dst )
test_data.t, test_data.src, test_data.dst = sort_tensors_by_time(test_data.t , test_data.src , test_data.dst )

train_loader = TemporalDataLoader(train_data, batch_size=BATCH_SIZE)
val_loader = TemporalDataLoader(val_data, batch_size=BATCH_SIZE)
test_loader = TemporalDataLoader(test_data, batch_size=BATCH_SIZE)

neg_sampler = dataset.negative_sampler
dataset.load_val_ns()

best_mrr = 0.0
mrr_per_decay = {}

evaluator = Evaluator(name=dataset_name)
# Grid search over decay hyperparameter
decay = initial_decay
best_decay = decay

Running popularity baseline for tgbl-review with decay 0.999
raw file found, skipping download
Dataset directory is  /Users/shahrad/projs/TGB_baseline/tgb/datasets/tgbl_review
loading processed file


## Hyper-parameter tuning for the inductive model

In [66]:
while True:
    if decay > 1.0:
        break
    popularity = train(train_loader, num_nodes=data.num_nodes, decay=initial_decay)
    mrr = test(
        val_loader,
        neg_sampler,
        split_mode="val",
        popularity=popularity,
        decay=decay,
        evaluator=evaluator,
    )
    print(f"MRR: {mrr} for decay {decay}")
    mrr_per_decay[decay] = mrr
    if mrr > best_mrr:
        best_mrr = mrr
        best_decay = decay
    else:
        break
    if decay >= 0.99:
        decay += 0.001
    else:
        decay += 0.01
print(f"Best MRR: {best_mrr} for decay {best_decay}")

  0%|          | 0/17070 [00:00<?, ?it/s]

100%|██████████| 17070/17070 [00:15<00:00, 1099.54it/s]
100%|██████████| 3654/3654 [00:24<00:00, 151.94it/s]


Naive MRR: 0.29850074648857117
MRR: 0.29850074648857117 for decay 0.995


100%|██████████| 17070/17070 [00:16<00:00, 1060.41it/s]
100%|██████████| 3654/3654 [00:24<00:00, 147.69it/s]


Naive MRR: 0.3029438257217407
MRR: 0.3029438257217407 for decay 0.996


100%|██████████| 17070/17070 [00:15<00:00, 1083.00it/s]
100%|██████████| 3654/3654 [00:24<00:00, 149.46it/s]


Naive MRR: 0.3078403174877167
MRR: 0.3078403174877167 for decay 0.997


100%|██████████| 17070/17070 [00:16<00:00, 1057.70it/s]
100%|██████████| 3654/3654 [00:24<00:00, 148.41it/s]


Naive MRR: 0.3131766617298126
MRR: 0.3131766617298126 for decay 0.998


100%|██████████| 17070/17070 [00:15<00:00, 1072.13it/s]
100%|██████████| 3654/3654 [00:24<00:00, 150.69it/s]


Naive MRR: 0.31807658076286316
MRR: 0.31807658076286316 for decay 0.999


100%|██████████| 17070/17070 [00:15<00:00, 1073.84it/s]
100%|██████████| 3654/3654 [00:23<00:00, 152.77it/s]


Naive MRR: 0.317225843667984
MRR: 0.317225843667984 for decay 1.0
Best MRR: 0.31807658076286316 for decay 0.999


In [72]:
dataset.load_test_ns()
# Test set
train_val_data = data[train_mask | val_mask]
train_val_loader = TemporalDataLoader(train_val_data, batch_size=BATCH_SIZE)
popularity = train(train_val_loader, num_nodes=data.num_nodes, decay=0.997)
mrr = test(
    test_loader,
    neg_sampler,
    split_mode="test",
    popularity=popularity,
    decay=best_decay,
    evaluator=evaluator,
)
print(f"MRR on test set: {mrr}")

100%|██████████| 20724/20724 [00:18<00:00, 1100.40it/s]
100%|██████████| 3645/3645 [00:24<00:00, 148.29it/s]


Naive MRR: 0.38915905356407166
MRR on test set: 0.38915905356407166
