In [2]:
import torch
from torch_geometric.datasets import Planetoid
import torch_geometric.utils as pyg_utils
import networkx as nx
import matplotlib.pyplot as plt

import torch_geometric.transforms as T

In [3]:
# the [0] at the end is used to access the attributes
data = Planetoid('./data/Cora', 'Cora', transform=T.NormalizeFeatures())[0]

In [4]:
# Define the dataset name ('Cora', 'CiteSeer', or 'PubMed')
dataset_name = 'Cora'

# Load the dataset
dataset = Planetoid(root='./data', name=dataset_name)

# Access the dataset attributes
data = dataset[0]

In [5]:
data_details = {
    "num_nodes" : data.num_nodes, 
    "num_edges" : data.num_edges, 
    "num_edge_features": dataset.num_edge_features,
    "num_node_features": dataset.num_node_features,
    "num_node_classes": dataset.num_classes   
}

print(data_details)

{'num_nodes': 2708, 'num_edges': 10556, 'num_edge_features': 0, 'num_node_features': 1433, 'num_node_classes': 7}


In [6]:
############ Visualization ####################
# Convert PyTorch Geometric data to NetworkX graph ---

# graph = pyg_utils.to_networkx(data, to_undirected=True)

# Draws the graph ----

# plt.figure(figsize=(8, 8))
# nx.draw(graph, node_size=10, node_color='b', edge_color='gray', with_labels=False)
# plt.title('CORA Dataset Graph')
# plt.show()

# AnomalyDAE (Anomaly Detection Autoencoder) model


In [7]:
# 1) Data preprocessing: Convert the PyTorch Geometric Data object into a format suitable for training an autoencoder. 

# 2) Define the Autoencoder Architecture: Define the architecture of the AnomalyDAE model. This typically involves defining the encoder and decoder networks.

# 3) Loss function: Define a suitable loss function for training the AnomalyDAE model. 

# 4) Training: Train the AnomalyDAE model using the CORA dataset. 

# 5) Anomaly Detection: Use the trained AnomalyDAE model to detect anomalies in the dataset

In [8]:
from pygod.generator import gen_contextual_outlier, gen_structural_outlier

data, ya = gen_contextual_outlier(data, n=100, k=50)
data, ys = gen_structural_outlier(data, m=10, n=10)
data.y = torch.logical_or(ys, ya).long()


In [9]:
from torch.nn import Sigmoid
from pygod import detector
from pygod.detector import AnomalyDAE

import torch.nn.functional as F
from pygod.detector import DeepDetector
from pygod.nn import AnomalyDAEBase

In [10]:
from pygod.detector import AnomalyDAE
import torch.sparse 

model = AnomalyDAE()

model.fit(data=data)

AnomalyDAE(act=<function relu at 0x000001FD4EE21280>, alpha=0.5,
           backbone=None, batch_size=2708, compile_model=False,
           contamination=0.1, dropout=0.0, emb_dim=64, epoch=5, eta=1.0,
           gpu=None, hid_dim=64, lr=0.004, num_layers=4,
           num_neigh=[-1, -1, -1, -1], save_emb=False, theta=1.0,
           verbose=0, weight_decay=0.0)

In [12]:
## predictions for anomalyDAE
pred, score, prob, conf = model.predict(data,
                                                return_pred=True,
                                                return_score=True,
                                                return_prob=True,
                                                return_conf=True)

In [20]:
print(pred[11], prob[11], conf[11]) # prints prediction for outlier (1=outlier), probability of it being an outlier, and confidence

tensor(1) tensor(0.8265) tensor(1.)
