In [1]:
import os

os.environ['https_proxy'] = 'http://127.0.0.1:15777'
os.environ['http_proxy'] = 'http://127.0.0.1:15777'

In [2]:
from ogb.nodeproppred import PygNodePropPredDataset
import pandas as pd
from dataset.dataloader import *
from dataset.embedding import *
from torch_geometric.loader import NeighborLoader, DataLoader
import torch_geometric.transforms as T

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [3]:
dataset = PygNodePropPredDataset(name='ogbn-arxiv', root='./data')
split_idx = dataset.get_idx_split()
graph = dataset[0]
data = {
        'train_idx': split_idx['train'], # 90941
        'valid_idx': split_idx['valid'], # 29799
        'test_idx': split_idx['test'],   # 48603
        'graph': graph
        }
graph.y.shape

torch.Size([169343, 1])

In [5]:
def naive_search(a, b=None):
    for i in range(graph.edge_index.shape[1]):
        if b is not None:
            if graph.edge_index[0][i] == a and graph.edge_index[1][i] == b:
                print(i)
                return
        else:
            if graph.edge_index[0][i] == a:
                print(i, graph.edge_index[1][i])
        
naive_search(13091)


7499 tensor(42537)
7500 tensor(124512)


In [39]:
batch.edge_index

EdgeIndex([[1024, 1025, 1026,  ..., 3291, 3292, 3293],
           [   1,    1,    2,  ..., 2459, 2461, 2462]],
          sparse_size=(3294, 3294), nnz=2374, sort_order=col)

In [40]:
import random
from typing import List, Tuple


class BiasedRandomWalker:

    def __init__(self, db, p: float = 1.2, q: float = 2.0):
        self.db = db
        self.ret_p = p
        self.io_q = q

        self.connected_nodes = self._get_connected_nodes()

    def _get_connected_nodes(self):

        txn = self.db.CreateReadTxn()
        vit = txn.GetVertexIterator()

        connected_nodes = []
        while vit.IsValid():
            if vit.GetNumOutEdges()[0] > 0:
                connected_nodes.append(vit.GetId())
            vit.Next()

        txn.Commit()
        return connected_nodes

    def _normalize(self, weights):
        tot = sum(weights)
        return [p / tot for p in weights]

    def get_probs_uniform(self, txn, vit) -> Tuple[List[int], List[float]]:
        nexts = vit.ListDstVids()[0]
        probs = [1 / len(nexts)] * len(nexts)
        return nexts, probs

    def get_probs_biased(self, txn, vit, prev: int) -> Tuple[List[int], List[float]]:
        curr_nbrs = vit.ListDstVids()[0]

        nexts = []
        unnormalized_probs = []
        for next in curr_nbrs:
            nexts.append(next)
            if next == prev:
                unnormalized_probs.append(1 / self.ret_p)
            elif txn.GetVertexIterator(next).HasEdge(prev):
                unnormalized_probs.append(1)
            else:
                unnormalized_probs.append(1 / self.io_q)

        probs = self._normalize(unnormalized_probs)
        return nexts, probs

    def walk(self, start: int, length: int) -> List[int]:

        txn = self.db.CreateReadTxn()
        vit = txn.GetVertexIterator(start)

        trace = [vit.GetId()]
        current_len = 1
        
        prev = None
        
        while current_len < length:
            if prev is None:
                nexts, probs = self.get_probs_uniform(txn, vit)
            else:
                nexts, probs = self.get_probs_biased(txn, vit, prev)

            target = random.choices(nexts, probs)[0]
            trace.append(target)

            vit.Goto(vid=target, nearest=False)
            current_len += 1

        txn.Commit()
        return trace


ImportError: cannot import name 'random_walk' from 'torch_geometric.utils' (/home/loping151/anaconda3/envs/dm/lib/python3.10/site-packages/torch_geometric/utils/__init__.py)