# imports

In [121]:
import torch
from torch_geometric.datasets import Planetoid
import torch_geometric.utils as pyg_utils
import networkx as nx
import matplotlib.pyplot as plt

import torch_geometric.transforms as T

In [122]:
# the [0] at the end is used to access the attributes
data = Planetoid('./data/Cora', 'Cora', transform=T.NormalizeFeatures())[0]



In [123]:
# Define the dataset name ('Cora', 'CiteSeer', or 'PubMed')
dataset_name = 'Cora'

# Load the dataset
dataset = Planetoid(root='./data', name=dataset_name)

# Access the dataset attributes
data = dataset[0]

In [124]:
# Dataset information
# print(f'Dataset: {dataset_name}')
# print(f'Number of nodes: {data.num_nodes}')
# print(f'Number of edges: {data.num_edges}')
# print(f'Number of node features: {data.num_node_features}')
# print(f'Number of node classes: {dataset.num_classes}')

data_details = {
    "num_nodes" : data.num_nodes, 
    "num_edges" : data.num_edges, 
    "num_edge_features": dataset.num_edge_features,
    "num_node_features": dataset.num_node_features,
    "num_node_classes": dataset.num_classes   
}

print(data_details)

{'num_nodes': 2708, 'num_edges': 10556, 'num_edge_features': 0, 'num_node_features': 1433, 'num_node_classes': 7}


In [125]:
############ Visualization ####################
# Convert PyTorch Geometric data to NetworkX graph ---

# graph = pyg_utils.to_networkx(data, to_undirected=True)

# Draws the graph ----

# plt.figure(figsize=(8, 8))
# nx.draw(graph, node_size=10, node_color='b', edge_color='gray', with_labels=False)
# plt.title('CORA Dataset Graph')
# plt.show()

# AnomalyDAE (Anomaly Detection Autoencoder) model


In [126]:
# 1) Data preprocessing: Convert the PyTorch Geometric Data object into a format suitable for training an autoencoder. 

# 2) Define the Autoencoder Architecture: Define the architecture of the AnomalyDAE model. This typically involves defining the encoder and decoder networks.

# 3) Loss function: Define a suitable loss function for training the AnomalyDAE model. 

# 4) Training: Train the AnomalyDAE model using the CORA dataset. 

# 5) Anomaly Detection: Use the trained AnomalyDAE model to detect anomalies in the dataset

In [127]:
from pygod.generator import gen_contextual_outlier, gen_structural_outlier

data, ya = gen_contextual_outlier(data, n=100, k=50)
data, ys = gen_structural_outlier(data, m=10, n=10)
data.y = torch.logical_or(ys, ya).long()


In [131]:
from torch.nn import Sigmoid
from pygod import detector
from pygod.detector import AnomalyDAE

import torch.nn.functional as F
from pygod.detector import DeepDetector
from pygod.nn import AnomalyDAEBase

class AnomalyDAE(DeepDetector):
    def __init__(self,
                 emb_dim=64,
                 hid_dim=64,
                 num_layers=4,
                 dropout=0.,
                 weight_decay=0.,
                 act=F.relu,
                 backbone=None,
                 alpha=0.5,
                 theta=1.,
                 eta=1.,
                 contamination=0.1,
                 lr=0.004,
                 epoch=5,
                 gpu=-1,
                 batch_size=0,
                 num_neigh=-1,
                 verbose=0,
                 save_emb=False,
                 compile_model=False,
                 **kwargs):

        if backbone is not None or num_layers != 4:
            warnings.warn("Backbone and num_layers are not used in AnomalyDAE")

        super(AnomalyDAE, self).__init__(hid_dim=hid_dim,
                                         num_layers=num_layers,
                                         dropout=dropout,
                                         weight_decay=weight_decay,
                                         act=act,
                                         backbone=backbone,
                                         contamination=contamination,
                                         lr=lr,
                                         epoch=epoch,
                                         gpu=gpu,
                                         batch_size=batch_size,
                                         num_neigh=num_neigh,
                                         verbose=verbose,
                                         save_emb=save_emb,
                                         compile_model=compile_model,
                                         **kwargs)

        self.emb_dim = emb_dim
        self.alpha = alpha
        self.theta = theta
        self.eta = eta

    def process_graph(self, data):
        AnomalyDAEBase.process_graph(data)

    def init_model(self, **kwargs):
        if self.save_emb:
            self.emb = torch.zeros(self.num_nodes,
                                   self.hid_dim)

        return AnomalyDAEBase(in_dim=self.in_dim,
                              num_nodes=self.num_nodes,
                              emb_dim=self.emb_dim,
                              hid_dim=self.hid_dim,
                              dropout=self.dropout,
                              act=self.act,
                              **kwargs).to(self.device)

    def forward_model(self, data):
        batch_size = data.batch_size
        node_idx = data.n_id

        x = data.x.to(self.device)
        s = data.s.to(self.device)
        edge_index = data.edge_index.to(self.device)

        x_, s_ = self.model(x, edge_index, batch_size)

        # positive weight conversion
        weight = 1 - self.alpha
        pos_weight_a = self.eta / (1 + self.eta)
        pos_weight_s = self.theta / (1 + self.theta)

        score = self.model.loss_func(x[:batch_size],
                                     x_[:batch_size],
                                     s[:batch_size, node_idx],
                                     s_[:batch_size],
                                     weight,
                                     pos_weight_a,
                                     pos_weight_s)

        loss = torch.mean(score)

        return loss, score.detach().cpu()

In [133]:
res = AnomalyDAE()

print(res)

AnomalyDAE(act=<function relu at 0x000002659D874180>, alpha=0.5,
           backbone=None, batch_size=0, compile_model=False,
           contamination=0.1, dropout=0.0, emb_dim=64, epoch=5, eta=1.0,
           gpu=None, hid_dim=64, lr=0.004, num_layers=4,
           num_neigh=[-1, -1, -1, -1], save_emb=False, theta=1.0,
           verbose=0, weight_decay=0.0)
