In [6]:
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid,HGBDataset,AttributedGraphDataset
from torch_geometric.utils import to_undirected,add_self_loops
import scipy.sparse as sp
data_name='Cora'
data = AttributedGraphDataset('./data/attr-graphs/', data_name,transform=T.NormalizeFeatures())[0]
# data = HGBDataset('../data/', 'ACM', transform=T.NormalizeFeatures())

In [9]:
data

ACM()

In [11]:
data.data


HeteroData(
  [1mpaper[0m={
    x=[3025, 1902],
    y=[3025],
    train_mask=[3025],
    test_mask=[3025]
  },
  [1mauthor[0m={ x=[5959, 1902] },
  [1msubject[0m={ x=[56, 1902] },
  [1mterm[0m={ num_nodes=1902 },
  [1m(paper, cite, paper)[0m={ edge_index=[2, 5343] },
  [1m(paper, ref, paper)[0m={ edge_index=[2, 5343] },
  [1m(paper, to, author)[0m={ edge_index=[2, 9949] },
  [1m(author, to, paper)[0m={ edge_index=[2, 9949] },
  [1m(paper, to, subject)[0m={ edge_index=[2, 3025] },
  [1m(subject, to, paper)[0m={ edge_index=[2, 3025] },
  [1m(paper, to, term)[0m={ edge_index=[2, 255619] },
  [1m(term, to, paper)[0m={ edge_index=[2, 255619] }
)

In [155]:
import torch
from pygod.utils import gen_attribute_outliers, gen_structure_outliers

data, ya = gen_attribute_outliers(data, n=100, k=50)
data, ys = gen_structure_outliers(data, m=10, n=10)
data.y = torch.logical_or(ys, ya).int()

In [156]:
from pygod.models import DOMINANT

model = DOMINANT()

In [157]:
model.fit(data)

DOMINANT(act=<function relu at 0x7ff800830b00>, alpha=0.8, contamination=0.1,
     dropout=0.3, epoch=5, gpu=None, hid_dim=64, lr=0.005, num_layers=4,
     verbose=False, weight_decay=0.0)

In [158]:
outlier_scores = model.decision_function(data)
print('Raw scores:')
print(outlier_scores)

Raw scores:
[0.5421635  0.23841503 0.22310963 ... 0.24928817 0.60058576 0.54586416]


In [159]:
from pygod.utils.metric import \
    eval_roc_auc, \
    eval_recall_at_k, \
    eval_precision_at_k

k = 200

auc_score = eval_roc_auc(data.y.numpy(), outlier_scores)
recall_at_k = eval_recall_at_k(data.y.numpy(), outlier_scores,
                               k=k, threshold=model.threshold_)
precision_at_k = eval_precision_at_k(data.y.numpy(), outlier_scores,
                                     k=k, threshold=model.threshold_)

print('AUC Score:', auc_score)
print(f'Recall@{k}:', recall_at_k)
print(f'Precision@{k}:', precision_at_k)

AUC Score: 0.916250880039181
Recall@200: 0.5025641025641026
Precision@200: 0.49


In [131]:
data

Data(x=[5196, 8189], edge_index=[2, 343486], y=[5196])

In [163]:
data.edge_index=to_undirected(data.edge_index)

In [164]:
# data.edge_index=add_self_loops(data.edge_index)[0]

In [165]:
data.x.size()

torch.Size([2708, 1433])

In [166]:
data.has_isolated_nodes()

False

In [167]:
data.is_undirected()

True

In [168]:
data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708])

In [136]:
from ogb.nodeproppred import PygNodePropPredDataset

dataset = PygNodePropPredDataset(name = 'ogbn-arxiv') 

split_idx = dataset.get_idx_split()
train_idx, valid_idx, test_idx = split_idx["train"], split_idx["valid"], split_idx["test"]
graph = dataset[0] # pyg graph object



In [137]:
graph

Data(num_nodes=169343, edge_index=[2, 1166243], x=[169343, 128], node_year=[169343, 1], y=[169343, 1])

In [138]:
graph.has_isolated_nodes()

True

In [139]:
graph.is_undirected()

False

In [142]:
from ogb.nodeproppred import DglNodePropPredDataset
import dgl
dataset = DglNodePropPredDataset(name = 'ogbn-arxiv')

split_idx = dataset.get_idx_split()
train_idx, valid_idx, test_idx = split_idx["train"], split_idx["valid"], split_idx["test"]
graph, label = dataset[0] # graph: dgl graph object, label: torch tensor of shape (num_nodes, num_tasks)

In [143]:
graph

Graph(num_nodes=169343, num_edges=1166243,
      ndata_schemes={'year': Scheme(shape=(1,), dtype=torch.int64), 'feat': Scheme(shape=(128,), dtype=torch.float32)}
      edata_schemes={})

In [145]:
dgl.add_reverse_edges(graph)

Graph(num_nodes=169343, num_edges=2332486,
      ndata_schemes={'year': Scheme(shape=(1,), dtype=torch.int64), 'feat': Scheme(shape=(128,), dtype=torch.float32)}
      edata_schemes={})

In [147]:
dgl.to_bidirected(graph,copy_ndata=True)

Graph(num_nodes=169343, num_edges=2315598,
      ndata_schemes={'year': Scheme(shape=(1,), dtype=torch.int64), 'feat': Scheme(shape=(128,), dtype=torch.float32)}
      edata_schemes={})

# load and save ACM dataset

In [21]:
import scipy.io as sio
import scipy.sparse as sp
import numpy as np

In [20]:
path = '../data/acm/acm_graph.txt'

In [22]:
data = np.loadtxt('../data/acm/acm.txt')
n, _ = data.shape

In [26]:
data.shape

(3025, 1870)

In [27]:
idx = np.array([i for i in range(n)], dtype=np.int32)
idx_map = {j: i for i, j in enumerate(idx)}
edges_unordered = np.genfromtxt(path, dtype=np.int32)

In [32]:
edges_unordered

array([[   0,    8],
       [   0,   20],
       [   0,   51],
       ...,
       [3024, 2948],
       [3024, 2983],
       [3024, 2991]], dtype=int32)

In [31]:
edges_unordered.flatten()

array([   0,    8,    0, ..., 2983, 3024, 2991], dtype=int32)

In [34]:
edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
                    dtype=np.int32).reshape(edges_unordered.shape)


In [35]:
adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                    shape=(n, n), dtype=np.float32)

In [40]:
label=np.loadtxt('../data/acm/acm_label.txt', dtype=int)

In [42]:
feat=sp.csr_matrix(data)
label=label
mat_dict={}
mat_dict['Network']=adj
mat_dict['Attributes']=feat
mat_dict['Label']=label

In [44]:
save_path='../data/ACM.mat'
sio.savemat(save_path,mat_dict)