In [6]:
import argparse
import os.path as osp

import torch
import torch.nn.functional as F

from torch_geometric.datasets import Flickr
from torch_geometric.loader import GraphSAINTRandomWalkSampler
from torch_geometric.nn import GraphConv
from torch_geometric.typing import WITH_TORCH_SPARSE
from torch_geometric.utils import degree

if not WITH_TORCH_SPARSE:
    quit("This example requires 'torch-sparse'")

path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Flickr')
dataset = Flickr(path)
data = dataset[0]
row, col = data.edge_index
data.edge_weight = 1. / degree(col, data.num_nodes)[col]  # Norm by in-degree.

parser = argparse.ArgumentParser()
parser.add_argument('--use_normalization', action='store_true')
args = parser.parse_args()

loader = GraphSAINTRandomWalkSampler(data, batch_size=6000, walk_length=2,
                                     num_steps=5, sample_coverage=100,
                                     save_dir=dataset.processed_dir,
                                     num_workers=4)


class Net(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        in_channels = dataset.num_node_features
        out_channels = dataset.num_classes
        self.conv1 = GraphConv(in_channels, hidden_channels)
        self.conv2 = GraphConv(hidden_channels, hidden_channels)
        self.conv3 = GraphConv(hidden_channels, hidden_channels)
        self.lin = torch.nn.Linear(3 * hidden_channels, out_channels)

    def set_aggr(self, aggr):
        self.conv1.aggr = aggr
        self.conv2.aggr = aggr
        self.conv3.aggr = aggr

    def forward(self, x0, edge_index, edge_weight=None):
        x1 = F.relu(self.conv1(x0, edge_index, edge_weight))
        x1 = F.dropout(x1, p=0.2, training=self.training)
        x2 = F.relu(self.conv2(x1, edge_index, edge_weight))
        x2 = F.dropout(x2, p=0.2, training=self.training)
        x3 = F.relu(self.conv3(x2, edge_index, edge_weight))
        x3 = F.dropout(x3, p=0.2, training=self.training)
        x = torch.cat([x1, x2, x3], dim=-1)
        x = self.lin(x)
        return x.log_softmax(dim=-1)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(hidden_channels=256).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


def train():
    model.train()
    model.set_aggr('add' if args.use_normalization else 'mean')

    total_loss = total_examples = 0
    for data in loader:
        data = data.to(device)
        optimizer.zero_grad()

        if args.use_normalization:
            edge_weight = data.edge_norm * data.edge_weight
            out = model(data.x, data.edge_index, edge_weight)
            loss = F.nll_loss(out, data.y, reduction='none')
            loss = (loss * data.node_norm)[data.train_mask].sum()
        else:
            out = model(data.x, data.edge_index)
            loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])

        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_nodes
        total_examples += data.num_nodes
    return total_loss / total_examples

@torch.no_grad()
def test():
    model.eval()
    model.set_aggr('mean')

    out = model(data.x.to(device), data.edge_index.to(device))
    pred = out.argmax(dim=-1)
    correct = pred.eq(data.y.to(device))

    accs = []
    for _, mask in data('train_mask', 'val_mask', 'test_mask'):
        accs.append(correct[mask].sum().item() / mask.sum().item())
    return accs


for epoch in range(1, 51):
    loss = train()
    accs = test()
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train: {accs[0]:.4f}, '
          f'Val: {accs[1]:.4f}, Test: {accs[2]:.4f}')

ImportError: cannot import name 'WITH_TORCH_SPARSE' from 'torch_geometric.typing' (/home/jacklu/anaconda3/envs/pyg-gpu/lib/python3.8/site-packages/torch_geometric/typing.py)

In [1]:
from data import get_dataset

import time
import utils
import random
import argparse
import numpy as np
import torch
import torch.nn.functional as F
from early_stop import EarlyStopping, Stop_args
from model import TransformerModel
from lr import PolynomialDecayLR
import os.path
import torch.utils.data as Data
import argparse
import networkx as nx
import pandas as pd
import dgl

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
import utils
import dgl
import torch
from ogb.nodeproppred import DglNodePropPredDataset
import scipy.sparse as sp
import os.path
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from dgl.data import  CoraFullDataset, AmazonCoBuyComputerDataset, AmazonCoBuyPhotoDataset,CoauthorCSDataset,CoauthorPhysicsDataset
import random
# from cache_sample import cache_sample_rand_csr

def get_dataset(dataset, pe_dim):
    if dataset in {"ogbn-arxiv", "ogbn-products", "ogbn-proteins", "ogbn-papers100M", "ogbn-mag"}:
        if dataset == "ogbn-arxiv":
            dataset = DglNodePropPredDataset(name="ogbn-arxiv")
        elif dataset == "ogbn-products":
            dataset = DglNodePropPredDataset(name="ogbn-products")
        elif dataset == "ogbn-proteins":
            dataset = DglNodePropPredDataset(name="ogbn-proteins")
        elif dataset == "ogbn-papers100M":
            dataset = DglNodePropPredDataset(name="ogbn-papers100M")
        elif dataset == "ogbn-mag":
            dataset = DglNodePropPredDataset(name="ogbn-mag")
        split_idx = dataset.get_idx_split()
        graph, labels = dataset[0]
        features = graph.ndata['feat']
        adj = graph.adj(scipy_fmt="csr")
        # adj = cache_sample_rand_csr(adj, s_len)
        # print(labels)

        idx_train = split_idx['train']
        idx_val = split_idx['valid']
        idx_test = split_idx['test']

        graph = dgl.from_scipy(adj)
        lpe = utils.laplacian_positional_encoding(graph, pe_dim) 
        features = torch.cat((features, lpe), dim=1)
        adj = utils.sparse_mx_to_torch_sparse_tensor(adj)
        # print(adj)
        print(labels)
        # labels = torch.argmax(labels, 1)
        labels = labels.reshape(-1)
        print(labels)

    elif dataset in {"pubmed", "corafull", "computer", "photo", "cs", "physics","cora", "citeseer"}:



        file_path = "dataset/"+dataset+".pt"

        data_list = torch.load(file_path)

        # data_list = [adj, features, labels, idx_train, idx_val, idx_test]
        adj = data_list[0]
        print(adj)
        features = data_list[1]
        labels = data_list[2]

        idx_train = data_list[3]
        idx_val = data_list[4]
        idx_test = data_list[5]

        if dataset == "pubmed":
            graph = PubmedGraphDataset()[0]
        elif dataset == "corafull":
            graph = CoraFullDataset()[0]
        elif dataset == "computer":
            graph = AmazonCoBuyComputerDataset()[0]
        elif dataset == "photo":
            graph = AmazonCoBuyPhotoDataset()[0]
        elif dataset == "cs":
            graph = CoauthorCSDataset()[0]
        elif dataset == "physics":
            graph = CoauthorPhysicsDataset()[0]
        elif dataset == "cora":
            graph = CoraGraphDataset()[0]
        elif dataset == "citeseer":
            graph = CiteseerGraphDataset()[0]

        graph = dgl.to_bidirected(graph)
        # graph = dgl.graph(graph)

        lpe = utils.laplacian_positional_encoding(graph, pe_dim) 
        # lpe = utils.randomwalk_positional_encoding(graph)
        
        features = torch.cat((features, lpe), dim=1)



    elif dataset in {"aminer", "reddit", "Amazon2M"}:

 
        file_path = './dataset/'+dataset+'.pt'

        data_list = torch.load(file_path)

        #adj, features, labels, idx_train, idx_val, idx_test

        adj = data_list[0]
        
        #print(type(adj))
        features = torch.tensor(data_list[1], dtype=torch.float32)
        labels = torch.tensor(data_list[2])
        idx_train = torch.tensor(data_list[3])
        idx_val = torch.tensor(data_list[4])
        idx_test = torch.tensor(data_list[5])

        graph = dgl.from_scipy(adj)


        lpe = utils.laplacian_positional_encoding(graph, pe_dim) 
       
        features = torch.cat((features, lpe), dim=1)

        adj = utils.sparse_mx_to_torch_sparse_tensor(adj)
        

        labels = torch.argmax(labels, -1)
        

    return adj, features, labels, idx_train, idx_val, idx_test

In [10]:
adj, features, labels, idx_train, idx_val, idx_test = get_dataset('cora', 3)

tensor(indices=tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
                       [ 633, 1862, 2582,  ...,  598, 1473, 2706]]),
       values=tensor([0.3333, 0.2887, 0.3333,  ..., 0.0870, 0.2500, 0.2500]),
       size=(2708, 2708), nnz=10556, layout=torch.sparse_coo)
  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.



	DGLGraph.adjacency_matrix(transpose, scipy_fmt="csr").

  lap_pos_enc = torch.from_numpy(EigVec[:, 1:pos_enc_dim+1]).float()


In [26]:
path = "/home/public/tokenize/Token_net/cora_t_num=100_w_len=5.pt"
pt = torch.load(path)

In [27]:
with open('cora_t_num=100_w_len=5.txt', 'w') as fp: 
    for node in range(features.shape[0]):
        walk = pt[node].tolist()
        walk.sort()
        print(walk)
        for sub_list in walk:
            fp.write(str(sub_list)+"\n")
    print('done')

[[0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, -1, -1, -1, -1, -1], [0, 633, -1, -1, -1, -1], [0, 633, -1, -1, -1, -1], [0, 633, -1, -1, -1, -1], [0, 633, -1, -1, -1, -1], [0, 633, -1, -1, -1, -1], [0, 633, -1, -1, -1, -1], [0, 633, -1, -1, -1, -1], [0, 633, 0, -1, -1, -1], [0, 633, 0, -1, -1, -1], [0, 633, 0, 633, -1, -1], [0, 633, 0, 633, 0, 633], [0, 633, 0, 633, 1701, -1], [0, 633, 0, 633, 1701, 232], [0, 633, 0, 633, 1866, 633], [0, 633, 0, 1862, 926, 1862], [0, 633, 0, 1862, 926, 1862], [0, 633, 0, 1862, 1701, -1], [0, 633, 0, 2582, 0, 2582], [0, 633, 0, 2582, 1166, 2582], [0, 633, 1701, 226, 1701, 1820], [0, 633, 1701, 231, 232

In [11]:
edge_index = adj._indices()
edge_index[0]
g = dgl.graph((edge_index[0], edge_index[1]))

[874,
 1445,
 2606,
 2008,
 1853,
 1986,
 2570,
 607,
 930,
 2358,
 622,
 2125,
 1860,
 30,
 433,
 952,
 1371,
 561,
 2625,
 48,
 1567,
 1937,
 1938,
 2276,
 2565,
 338,
 914,
 1472,
 143,
 477,
 1463,
 1019,
 1691,
 570,
 0,
 2162,
 2438,
 788,
 1272,
 1017,
 981,
 1939,
 2607,
 681,
 869,
 1977,
 401,
 1016,
 1126,
 575,
 1741,
 1432,
 2320,
 2244,
 1192,
 1424,
 2245,
 313,
 2543,
 811,
 2605,
 916,
 795,
 2296,
 2574,
 2349,
 2495,
 182,
 1603,
 351,
 1708,
 327,
 1270,
 1933,
 1119,
 374,
 1364,
 1940,
 590,
 1673,
 1159,
 1636,
 386,
 482,
 2243,
 2247,
 546,
 436,
 1558,
 2696,
 1057,
 2242,
 1496,
 1000,
 205,
 2412,
 982,
 285,
 1807,
 230,
 1439,
 1811,
 2529,
 673,
 267,
 2555,
 2212,
 398,
 1645,
 2668,
 171,
 579,
 1548,
 938,
 229,
 923,
 1929,
 967,
 598,
 2213,
 1321,
 172,
 1861,
 932,
 1403,
 1099,
 1820,
 2669,
 1928,
 2659,
 792,
 1400,
 2425,
 1870,
 1024,
 2038,
 1608,
 2065,
 1107,
 1868,
 2629,
 1434,
 1814,
 892,
 710,
 382,
 1066,
 2108,
 2078,
 411,
 1950,
 1