In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric
from torch_geometric.nn import GCNConv
import random,os
import numpy as np
import scipy.io as sio
import scipy.sparse as sp
import networkx as nx
import dgl
import pandas as pd
import math

import argparse
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
from torch_geometric.utils import to_dense_adj
from torch_geometric.nn import GCN
from pygod.nn.decoder import DotProductDecoder
from pygod.nn.functional import double_recon_loss


In [None]:
def set_seed(seed=3407):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seed()

In [None]:
import math

import torch

from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module

import torch.nn as nn
import torch.nn.functional as F

set_seed()
class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """

    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj):
        support = torch.mm(input, self.weight)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'

In [None]:
set_seed()

class Encoder(nn.Module):
    def __init__(self, nfeat, nhid, dropout):
        super(Encoder, self).__init__()

        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nhid)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = F.relu(self.gc2(x, adj))

        return x

class Attribute_Decoder(nn.Module):
    def __init__(self, nfeat, nhid, dropout):
        super(Attribute_Decoder, self).__init__()

        self.gc1 = GraphConvolution(nhid, nhid)
        self.gc2 = GraphConvolution(nhid, nfeat)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = F.relu(self.gc2(x, adj))

        return x

class Structure_Decoder(nn.Module):
    def __init__(self, nhid, dropout):
        super(Structure_Decoder, self).__init__()

        self.gc1 = GraphConvolution(nhid, nhid)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        # x = F.sigmoid(x @ x.T)
        x_norm= torch.norm(x, p=2, dim=1, keepdim=True)
        x = x/x_norm
        x[torch.isnan(x)] = 0
        return x@x.T
        # return x

class RE_Reconstruction(nn.Module):
    def __init__(self, feat_size, hidden_size, dropout):
        super(Dominant, self).__init__()
        
        self.attr_encoder = Encoder(feat_size, hidden_size, dropout)
        self.struct_encoder = Encoder(feat_size, hidden_size, dropout)
        self.attr_decoder = Attribute_Decoder(feat_size, hidden_size, dropout)
        self.struct_decoder = Structure_Decoder(hidden_size, dropout)
    
    def forward(self, x, adj):
        # encode
        x1 = self.attr_encoder(x, adj)
        x2 = self.struct_encoder(x, adj)
        # decode feature matrix
        x_hat = self.attr_decoder(x1, adj)
        # decode adjacency matrix
        struct_reconstructed = self.struct_decoder(x2, adj)
        # return reconstructed matrices
        return struct_reconstructed, x_hat, x

In [None]:
import dgl

ds = 'reddit'
set_seed()
graph = dgl.load_graphs('/data/zhengling/datasets/{}/{}'.format(ds, ds))[0][0]
device = 'cuda:0'

x = graph.ndata['feature'].to(torch.float32).to(device)
num_nodes = x.shape[0]
y = graph.ndata['label'].detach().numpy()
edges = graph.edges()
edges = [e.tolist() for e in edges]
edges = torch.LongTensor(edges).to(device)
adj_true = to_dense_adj(edges, max_num_nodes=num_nodes)[0].to(device)

In [None]:
from pygod.nn.functional import double_recon_loss
set_seed()

def loss_func(adj, A_hat, attrs, X_hat, src_nodes, dst_nodes, norm_index, anom_index, alpha, beta):
    # Attribute reconstruction loss
    diff_attribute = torch.pow(X_hat - attrs, 2)
    x_diff = X_hat - attrs
    attribute_reconstruction_errors = torch.sqrt(torch.sum(diff_attribute, 1))
    # norm_attribute_errors = torch.mean(torch.sqrt(torch.sum(diff_attribute[norm_index], 1))).detach().cpu().numpy()
    # anom_attribute_errors = torch.mean(torch.sqrt(torch.sum(diff_attribute[anom_index], 1))).detach().cpu().numpy()
    attribute_cost = torch.mean(attribute_reconstruction_errors)

    # structure reconstruction loss
    diff_structure = torch.pow(A_hat - adj, 2)
    a_diff = A_hat - adj
    structure_reconstruction_errors = torch.sqrt(torch.sum(diff_structure, 1))
    # norm_structure_errors = torch.mean(torch.sqrt(torch.sum(diff_structure[norm_index], 1))).detach().cpu().numpy()
    # anom_structure_errors = torch.mean(torch.sqrt(torch.sum(diff_structure[anom_index], 1))).detach().cpu().numpy()
    structure_cost = torch.mean(structure_reconstruction_errors)

    src_x = attrs[src_nodes]
    dst_x = attrs[dst_nodes]
    src_x_norm = src_x / (torch.norm(src_x, dim=1, keepdim=True) + 1e-8)
    dst_x_norm = dst_x / (torch.norm(dst_x, dim=1, keepdim=True) + 1e-8)
    cos_sim_x = torch.sum(src_x_norm * dst_x_norm, dim=1)
    
    src_hat = X_hat[src_nodes]
    dst_hat = X_hat[dst_nodes]
    src_hat_norm = src_hat / (torch.norm(src_hat, dim=1, keepdim=True) + 1e-8)
    dst_hat_norm = dst_hat / (torch.norm(dst_hat, dim=1, keepdim=True) + 1e-8)
    cos_sim_hat = torch.sum(src_hat_norm * dst_hat_norm, dim=1)

    diff_sim = torch.pow(cos_sim_x - cos_sim_hat, 2)
    sim_reconstruction_errors = torch.sqrt(torch.sum(diff_sim))
    sim_cost = torch.mean(sim_reconstruction_errors)
    
    
    sim_reconstruction_errors = torch.sqrt(torch.sum(diff_sim))
    sim_cost = torch.mean(sim_reconstruction_errors)

    cost =  alpha * attribute_reconstruction_errors + beta * structure_reconstruction_errors + (1 - alpha - beta) * sim_reconstruction_errors
    diff_feats = torch.cat((x_diff, a_diff), dim=1)

    # return cost, structure_cost, attribute_cost, diff_feats, norm_attribute_errors, norm_structure_errors, anom_attribute_errors, anom_structure_errors
    # return cost, structure_cost, attribute_cost, norm_attribute_errors, norm_structure_errors, anom_attribute_errors, anom_structure_errors
    return cost, structure_cost, attribute_cost, sim_cost, diff_feats

In [None]:
train_mask = torch.tensor(graph.ndata['train_masks'][:,0]).bool().to(device)
print(train_mask.shape)
valid_mask = torch.tensor(graph.ndata['val_masks'][:,0]).bool().to(device)
test_mask = torch.tensor(graph.ndata['test_masks'][:, 0]).bool().to(device) 

In [None]:
src_nodes = edges[0]
dst_nodes = edges[1]

In [None]:
adj = edges
adj_label = adj_true
attrs = x
set_seed()

epoch_num = 100
alphas = [i * 0.1 for i in range(11)]
y = graph.ndata['label'].to(device)
# alphas = [0.8]
# betas = [0.0]
betas = [i * 0.1 for i in range(11)]
losses = []
diff_norm_feats = []
diff_norm_adjs = []
diff_anom_feats = []
diff_anom_adjs = []
best_valid_auc = -1
max_aucs = []
for alpha in alphas:
    for beta in betas: 
        if alpha + beta > 1: continue
        model = RE_Reconstruction(feat_size = attrs.size(1), hidden_size = 256, dropout = 0.3).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr = 5e-4)
        train_aucs = []
        valid_aucs = []
        test_aucs = []
        all_aucs = []
        for epoch in range(epoch_num):
            auc = 0
            model.train()
            optimizer.zero_grad()
            A_hat, X_hat, latent_x = model(attrs, adj_true)
            # loss, struct_loss, feat_loss, diff_feats, diff_norm_feat, diff_norm_adj, diff_anom_feat, diff_anom_adj = loss_func(adj_label, A_hat, attrs, X_hat, alpha, y==0, y==1)
            # loss, struct_loss, feat_loss, diff_norm_feat, diff_norm_adj, diff_anom_feat, diff_anom_adj = loss_func(adj_label, A_hat, attrs, X_hat, alpha, y==0, y==1)
            loss, struct_loss, feat_loss, sim_loss, diff_feats, = loss_func(adj_label, A_hat, attrs, X_hat, src_nodes, dst_nodes, y==0, y==1, alpha, beta)
            # print(diff_anom_adj, torch.mean(diff_anom_adj))
            # loss, diff_feats = loss_func(adj_label, A_hat, attrs, X_hat, alpha)
            l = torch.mean(loss)
            l.backward()
            optimizer.step()  
            losses.append(l.detach().cpu())
            # diff_norm_feats.append(diff_norm_feat)
            # diff_norm_adjs.append(diff_norm_adj)
            # diff_anom_feats.append(diff_anom_feat)
            # diff_anom_adjs.append(diff_anom_adj)      

            if (epoch+1)%1 == 0 or epoch == epoch_num - 1:
                if (epoch + 1) % 10 == 0 or epoch == epoch_num-1:
                    print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(l.item()), "train/struct_loss=", "{:.5f}".format(struct_loss.item()),"train/feat_loss=", "{:.5f}".format(feat_loss.item()), "train/sim_loss=", "{:.5f}".format(sim_loss.item()))
                model.eval()
                A_hat, X_hat, latent_x = model(attrs, adj_true)
                # loss, struct_loss, feat_loss, diff_feats, diff_norm_feat, diff_norm_adj, diff_anom_feat, diff_anom_adj = loss_func(adj_label, A_hat, attrs, X_hat, alpha, y==0, y==1)
                loss, struct_loss, feat_loss, sim_loss, diff_feats = loss_func(adj_label, A_hat, attrs, X_hat, src_nodes, dst_nodes, y==0, y==1, alpha, beta)
                # loss, diff_feats = loss_func(adj_label, A_hat, attrs, X_hat, alpha)
                auc = roc_auc_score(y[train_mask].detach().cpu().numpy(), loss[train_mask].detach().cpu().numpy())
                # print("Epoch:", '%04d' % (epoch + 1), 'train: Auc', auc)
                # print(torch.where(X_hat > 0))
                train_aucs.append(auc)
                v_a = roc_auc_score(y[valid_mask].detach().cpu().numpy(), loss[valid_mask].detach().cpu().numpy())
                # print("Epoch:", '%04d' % (epoch + 1), 'valid: Auc', v_a)
                valid_aucs.append(v_a)
                if v_a > best_valid_auc:
                    best_valid_auc = v_a
                    torch.save(model.state_dict(), 'checkpoint_' + ds + '.pt')
                test_auc = roc_auc_score(y[test_mask].detach().cpu().numpy(), loss[test_mask].detach().cpu().numpy())
                all_auc = roc_auc_score(y.detach().cpu().numpy(), loss.detach().cpu().numpy())
                all_aucs.append(all_auc)
                test_aucs.append(test_auc)
        print('alpha : ', alpha, 'beta: ', beta,  max(train_aucs), max(valid_aucs), max(test_aucs), max(all_aucs))
        print('valid_aucs: ', valid_aucs)
        print('test_aucs: ', test_aucs)
        max_aucs.append([alpha, beta, max(train_aucs), max(valid_aucs), max(test_aucs), max(all_aucs)])
  
    

In [None]:
train_aucs = [i[2] for i in max_aucs ]
valid_aucs = [i[3] for i in max_aucs ]
test_aucs = [i[4] for i in max_aucs ]
all_aucs = [i[5] for i in max_aucs]  
print(max(train_aucs), max(valid_aucs), max(test_aucs), max(all_aucs))

In [None]:
set_seed()
# model = Dominant(feat_size = 10, hidden_size = 256, dropout = 0.3).to(device)
model = RE_Reconstruction(feat_size = attrs.size(1), hidden_size = 256, dropout = 0.3).to(device)
model.load_state_dict(torch.load('checkpoint_' + ds + '.pt'))
model.eval()
A_hat, X_hat, latent_x = model(attrs, adj_true)

In [None]:
set_seed()
loss, struct_loss, feat_loss, sim_loss, diff_feats = loss_func(adj_label, A_hat, attrs, X_hat, src_nodes, dst_nodes, y==0, y==1, 0.8, 0)
print(roc_auc_score(y[test_mask].detach().cpu().numpy(), loss[test_mask].detach().cpu().numpy()))

In [None]:
min = torch.min(loss)
max = torch.max(loss)

loss_score = (loss-min)/(max-min)
print(loss_score)

In [None]:
# 基于 latent_x, X_hat-x, A_hat-A, sim_hat - sim 进行聚类
# 或者只是基于 loss 进行一个排序呢？
new_sim_matrix = np.zeros([num_nodes, num_nodes])
print(new_sim_matrix.shape)

In [None]:
anomaly_scores = loss.detach().cpu().numpy()
anomaly_scores_index = np.argsort(anomaly_scores)
anomaly_scores_sorted = np.sort(anomaly_scores)
print(anomaly_scores_sorted)

In [None]:
import networkx as nx
edges_for_nx = edges.detach().cpu().numpy().T

G = nx.Graph()
G.add_edges_from(edges_for_nx)
print(G.number_of_edges(), G.number_of_nodes())

In [None]:
# 不能说 异常程度相近的就算异常
# 通过 latent_x, diff_pattern 的相似度
latent_x_sim = latent_x@latent_x.T

In [None]:
diff_feats = X_hat - attrs
diff_feats_sim = diff_feats@diff_feats.T
print(diff_feats_sim.shape)

In [None]:
for r in [a/10 for a in range(11)]:
    tmp_sim_matrix = r * latent_x_sim + (1-r) * diff_feats_sim
    print(tmp_sim_matrix.shape)
    tmp_sim_matrix = tmp_sim_matrix.detach().cpu().numpy()
    cnt = 0
    cnt1 = 0
    nearest_index = {}
    for i in range(num_nodes):
        nearest_index[i] = np.where(np.argsort(tmp_sim_matrix[i])==num_nodes-1)
        if y[i]== y[nearest_index[i]]:
            cnt += 1
        if G.has_edge(i,y[nearest_index[i]]) or G.has_edge(y[nearest_index[i]], i):
            cnt1 += 1
    print(cnt, cnt1)
    new_edges = [[], []]
    for i in nearest_index:
        new_edges[0].append(i)
        new_edges[1].append(nearest_index[i][0][0])
    new_edges = torch.tensor(new_edges).to(device)
    final_edges = torch.cat([edges, new_edges], dim=1)
    new_graph = dgl.graph((final_edges[0].detach().cpu().numpy(), final_edges[1].detach().cpu().numpy()))
    new_graph.ndata['train_mask'] = graph.ndata['train_masks']
    new_graph.ndata['val_mask'] = graph.ndata['val_masks']
    new_graph.ndata['test_mask'] = graph.ndata['test_masks']
    # graph.ndata['mark'] = torch.tensor(marks).bool()
    new_graph.ndata['label'] = y.cpu()
    new_graph.ndata['feature'] = attrs.cpu()
    new_graph.ndata['label_1'] = loss_score.cpu()

    dgl.save_graphs('datasets/new_' + ds + '_' + str(int(r*10)), [new_graph])