<a href="https://colab.research.google.com/github/SecretPasta/DAGFCN/blob/main/DAGFCN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Imports

In [None]:
from __future__ import print_function
from __future__ import division

#from .layers import *
#from .models import *
#from .utils import *

import math

import torch

from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module

import torch.nn as nn
import torch.nn.functional as F
#from layers import GraphConvolution

import time
import argparse
import numpy as np

import torch
import torch.nn.functional as F
import torch.optim as optim
#from utils import load_data
#from models import GCN
from sklearn.metrics import roc_curve, auc

import numpy as np
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
import sys
import torch
import sklearn

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Layers

In [None]:
class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """

    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj=None):
        if adj is not None:
            support = torch.mm(input, self.weight)
            output = torch.spmm(adj, support)
        else:
                output = torch.mm(input, self.weight)
        if self.bias is not None:
                return output + self.bias
        else:
                return output


    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'

#Models

In [None]:
class GCN(nn.Module):
    def __init__(self, nfeat, nhid , nclass, dropout):
        super(GCN, self).__init__()

        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nfeat, nhid)
        self.gc3 = GraphConvolution(nhid, nclass)
        self.gc4 = GraphConvolution(nfeat, nclass)
        self.dropout = dropout


    def forward(self, x, adj):

        a_skip0 = self.gc2(x)
        a_skip0 = F.dropout(a_skip0, self.dropout, training=self.training)
        a_skip1 = self.gc4(x)
        a_skip1 = F.dropout(a_skip1, self.dropout, training=self.training)

        m = nn.RReLU()
        x = m(self.gc1(x, adj) + a_skip0)
        x = self.gc3(x, adj) + a_skip1

        self.w1 = self.gc1.weight
        self.w2 = self.gc2.weight
        self.w3 = self.gc3.weight
        self.w4 = self.gc4.weight

        return x, self.w1, self.w2, self.w3, self.w4

#Utils

In [None]:

def parse_index_file(filename):
    """Parse index file."""
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -0.5).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx1 = r_mat_inv.dot(mx)
    mx = mx1.dot(r_mat_inv)
    return mx

def load_data(dataset_str):
    """
    Loads input data from gcn/data directory

    ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
        (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
    ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
    ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
    ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
        object;
    ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object.

    All objects above must be saved using python pickle module.

    :param dataset_str: Dataset name
    :return: All data input files loaded (as well the training/test data).
    """
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("/content/drive/MyDrive/Final Project/GFCN-master/GFCN/data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))
#/content/drive/MyDrive/Final Project/GFCN-master/GFCN/data
    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("/content/drive/MyDrive/Final Project/GFCN-master/GFCN/data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range-min(test_idx_range), :] = ty
        ty = ty_extended


    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    features = sklearn.preprocessing.scale(np.array(features.todense()))

    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    adj =normalize(adj)

    labels = np.vstack((ally, ty)).astype(np.int32)
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    size_classes = np.sum(labels, axis = 0)
    abnormal_class = np.argmin(size_classes)

    labels = np.argmax(labels, axis=1)
    labels = np.where(labels == abnormal_class,0,1)

    idx = np.array(range(len(labels)))
    np.random.shuffle(idx)
    idx_normal = idx[idx != 0]  # Normal classes index

    num_node = adj.shape[0]
    num_train = int(num_node * 0.1)
    num_val = int(num_node * 0.01)
    all_idx = list(range(num_node))
    idx_train = all_idx[ : num_train]
    idx_val = all_idx[num_train : num_train + num_val]
    idx_test = all_idx[num_train + num_val : ]


    features = torch.FloatTensor(features)
    labels = torch.LongTensor(labels)
    adj = sparse_mx_to_torch_sparse_tensor(adj)
    idx_train = torch.LongTensor(idx_train)
    idx_test = torch.LongTensor(idx_test)
    idx_val = torch.LongTensor(idx_val)

    return adj, features, labels, idx_train, idx_test, idx_val


#Train

In [None]:

# Filtering out Jupyter arguments
args_to_keep = ['--no-cuda', '--fastmode', '--seed', '--epochs', '--lr', '--weight_decay', '--hidden', '--dropout', '--alpha', '--beta']
args = [arg for arg in sys.argv if any(arg.startswith(a) for a in args_to_keep)]

# Training settings
parser = argparse.ArgumentParser()
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='Disables CUDA training.')
parser.add_argument('--fastmode', action='store_true', default=False,
                    help='Validate during training pass.')
parser.add_argument('--seed', type=int, default=123, help='Random seed.')
parser.add_argument('--epochs', type=int, default=200,
                    help='Number of epochs to train.')
parser.add_argument('--lr', type=float, default=0.1,
                    help='Initial learning rate.')
parser.add_argument('--weight_decay', type=float, default=0,
                    help='Weight decay (L2 loss on parameters).')
parser.add_argument('--hidden', type=int, default=128,
                    help='Number of hidden units.')
parser.add_argument('--dropout', type=float, default=0.5,
                    help='Dropout rate (1 - keep probability).')
parser.add_argument('--alpha', type=float, default=5,
                    help='class-balanced parameter.')
parser.add_argument('--beta', type=float, default=1e-1,
                    help='l2 regularization parameter).')

args = parser.parse_args(args)
args.cuda = not args.no_cuda and torch.cuda.is_available()

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

# Load data (Assuming load_data function is defined elsewhere)
adj, features, labels, idx_train, idx_test, idx_val = load_data("pubmed")  #cora, citeseer, pubmed

# Model and optimizer (Assuming GCN class is defined elsewhere)
model = GCN(nfeat=features.shape[1],
            nhid=args.hidden,
            nclass=labels.max().item() + 1,
            dropout=args.dropout)
optimizer = optim.Adam(model.parameters(),
                       lr=args.lr, weight_decay=args.weight_decay)

if args.cuda:
    model.cuda()
    features = features.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()

weights = [args.alpha, 1]
class_weights = torch.FloatTensor(weights)

def train(epoch):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    output, w1, w2, w3, w4 = model(features, adj)

    w1 = torch.pow(torch.norm(w1), 2)
    w2 = torch.pow(torch.norm(w2), 2)
    w3 = torch.pow(torch.norm(w3), 2)
    w4 = torch.pow(torch.norm(w4), 2)
    l2_reg = w1 + w2 + w3 + w4

    loss = torch.nn.CrossEntropyLoss(weight=class_weights)
    loss_train = loss(output[idx_train], labels[idx_train]) + args.beta*l2_reg
    loss_train.backward()
    optimizer.step()

    if not args.fastmode:
        # Evaluate validation set performance separately,
        # deactivates dropout during validation run.
        model.eval()
        output, w1, w2, w3, w4 = model(features, adj)

    loss_val = loss(output[idx_val], labels[idx_val]) + args.beta*l2_reg
    print('Epoch: {:04d}'.format(epoch + 1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'time: {:.4f}s'.format(time.time() - t))

def test():
    model.eval()
    output, _, _, _, _ = model(features, adj)

    loss = torch.nn.CrossEntropyLoss(weight = class_weights)
    loss_test = loss(output[idx_test], labels[idx_test])

    scores = F.softmax(output, dim=1)
    fpr, tpr, t = roc_curve(labels[idx_test].detach().numpy(), scores[idx_test, 0].detach().numpy(), pos_label = 0)
    roc_auc = auc(fpr, tpr)
    return roc_auc

# Train model
t_total = time.time()
max_auc = 0
for epoch in range(args.epochs):
    train(epoch)
    roc_auc = test()
    if roc_auc > max_auc:
        max_auc = roc_auc
AUC = max_auc
print('AUC: {:04f}'.format(AUC))


  objects.append(pkl.load(f, encoding='latin1'))
  return torch.sparse.FloatTensor(indices, values, shape)


Epoch: 0001 loss_train: 62.2663 loss_val: 71.3841 time: 1.0726s
Epoch: 0002 loss_train: 118.2968 loss_val: 113.6076 time: 1.0804s
Epoch: 0003 loss_train: 85.8701 loss_val: 80.6220 time: 1.1385s
Epoch: 0004 loss_train: 48.2872 loss_val: 46.4118 time: 1.1745s
Epoch: 0005 loss_train: 36.7624 loss_val: 37.0893 time: 0.9658s
Epoch: 0006 loss_train: 36.9681 loss_val: 37.8715 time: 0.8581s
Epoch: 0007 loss_train: 35.5215 loss_val: 35.4922 time: 1.1151s
Epoch: 0008 loss_train: 30.2127 loss_val: 30.2288 time: 0.9429s
Epoch: 0009 loss_train: 25.1002 loss_val: 25.4411 time: 0.8694s
Epoch: 0010 loss_train: 21.9964 loss_val: 22.4839 time: 0.6892s
Epoch: 0011 loss_train: 20.2803 loss_val: 19.9656 time: 0.6421s
Epoch: 0012 loss_train: 18.4419 loss_val: 18.7817 time: 0.5717s
Epoch: 0013 loss_train: 17.5624 loss_val: 18.0375 time: 0.5570s
Epoch: 0014 loss_train: 17.0243 loss_val: 16.8227 time: 0.5456s
Epoch: 0015 loss_train: 15.3597 loss_val: 15.5449 time: 0.5496s
Epoch: 0016 loss_train: 13.8409 loss_v