In [1]:
import networkx
import torch
import numpy as np
import pandas as pd
from sklearn.metrics import *
from torch_geometric.loader import NeighborSampler, NeighborLoader
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GATConv, ResGatedGraphConv, GATv2Conv, SAGEConv, GENConv, DeepGCNLayer, PairNorm, GINConv,GCNConv
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
import torch.nn.functional as F
from imblearn.under_sampling import RandomUnderSampler
pd.options.mode.use_inf_as_na = True
from collections import Counter
from sklearn.feature_selection import SelectFromModel
import torch.nn as nn
import time
import pickle
from torch.nn import LayerNorm, Linear, ReLU
from torch_scatter import scatter
from tqdm import tqdm
from torch_geometric.loader import RandomNodeSampler
import math
import copy
from sklearn.metrics import f1_score
from torch.optim import lr_scheduler
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

In [2]:
import dgl
import dgl.nn as dglnn
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.data.utils import *

In [3]:
import warnings
# action参数可以设置为ignore，一位一次也不喜爱你是，once表示为只显示一次
warnings.filterwarnings(action='ignore')

In [4]:
class Transition_layer(torch.nn.Module):
    def __init__(self, act, norm, lin):
        super().__init__()
        self.act = act
        self.norm = norm
        self.lin = lin
    def forward(self, x):
        x = self.norm(x)
        x = self.act(x)
        x = self.lin(x)
        return x
        
class DenseGAT(torch.nn.Module):
    def __init__(self, in_channels, num_class, num_layers=6, num_blocks=3, growth_rate=10, theta=0.5):
        super().__init__()
        self.num_layers = num_layers
        self.blocks = num_blocks
        self.theta = theta
        self.growth_rate = growth_rate
        self.base_rate = growth_rate
        self.in_channels = in_channels
        self.out_channels = num_class
        self.linear_layers = torch.nn.ModuleList()
        self.transition_layers = torch.nn.ModuleList()
        self.block_layers = torch.nn.ModuleList()
        self.node_encoder = Linear(in_channels, growth_rate)
        self.linear_layers.append(self.node_encoder)
        
        for i in range(self.blocks):
            # block
            layers = torch.nn.ModuleList()
            # 2^(i - 1) * k0
            self.growth_rate = int(math.pow(2, i) * self.base_rate)
            print(self.growth_rate)
            for j in range(1, self.num_layers + 1):
                conv = SAGEConv(in_channels + (j - 1) * self.growth_rate, self.growth_rate, aggr='mean')
                norm = LayerNorm(self.growth_rate)
                act = ReLU()
                layer = DeepGCNLayer(conv, norm, act, block='dense')
                layers.append(layer)
            self.block_layers.append(layers)
            
            # transition
            hidden_channels = in_channels +  self.num_layers * self.growth_rate
            out_channels = int(hidden_channels * self.theta)
            transition_norm = LayerNorm(hidden_channels, elementwise_affine=True)
            transition_act = ReLU()
            transition_lin = Linear(hidden_channels, out_channels)
            transitionLayer = Transition_layer(transition_act, transition_norm, transition_lin)
            self.transition_layers.append(transitionLayer)
            in_channels = copy.copy(out_channels)
        
        self.lin_last = Linear(in_channels, self.out_channels)
        self.linear_layers.append(self.lin_last)
        
    def forward(self, x, edge_index):
#         x = self.linear_layers[0](x)
        for i in range(self.blocks):
            # block layer
            for layer in self.block_layers[i]:
                x = layer(x, edge_index)
            # transition layer
            x = self.transition_layers[i](x)
        x = self.linear_layers[-1](x)
        return x

In [5]:
from sklearn.utils import class_weight
def train():
    total_loss = total_correct = total_examples = 0
    start_time = time.time()
    for batch in train_loader:
        batch = batch.to(device)
        y = batch.y[:batch.batch_size]
        optimizer.zero_grad()
        y_hat = model(batch.x.to(device), batch.edge_index.to(device))[:batch.batch_size]
        loss = F.cross_entropy(y_hat, y)
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * batch.batch_size
        total_correct += int((y_hat.argmax(dim=-1) == y).sum())
        total_examples += batch.batch_size
    end_time = time.time()  
    
    return total_loss / total_examples, total_correct / total_examples

def inferrence(model, subgraph_loader):
    total_loss = total_correct = total_examples = 0
    xs = []
    y = []
    pbar = tqdm(total=len(subgraph_loader))
    for batch in subgraph_loader:
        y_hat = model(batch.x, batch.edge_index.to(device))[:batch.batch_size]
        xs.append(y_hat.cpu())
        y_true = batch.y[:batch.batch_size]
        y.append(y_true.cpu())
        pbar.update(1)
    pbar.close()
    y_hat = torch.cat(xs, 0)
    y_hat = y_hat.detach().numpy()
    y_hat = np.argmax(y_hat, -1)
    y = torch.cat(y, 0)
    cr1 = classification_report(y, y_hat, digits=4)
    cf = confusion_matrix(y, y_hat)
#     print(roc_auc_score(y, y_hat))
    print(cr1)
    print(cf)
    return  y, y_hat

def get_metrics(y_true, y_pred):
    cr = classification_report(y_true, y_pred, output_dict=True, digits=4)
    df = pd.DataFrame(cr).transpose()
    cm = confusion_matrix(y_true, y_pred)
    print(df)
    print(cm)

In [6]:
train_lg = load_graphs("./mydata/unswnb15_train_data_lg.bin")[0][0]

In [7]:
test_lg = load_graphs("./mydata/unswnb15_test_data_lg.bin")[0][0]

In [6]:
data, _ = load_graphs("dgl_cic2017data50w.bin")

In [None]:
train_lg, test_lg = data[0], data[1]

In [None]:
train_lg = dgl.add_self_loop(train_lg)
test_lg = dgl.add_self_loop(test_lg)

In [11]:
train_lg, test_lg

(Graph(num_nodes=358047, num_edges=916065844,
       ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'h': Scheme(shape=(40,), dtype=torch.float32)}
       edata_schemes={}),
 Graph(num_nodes=153454, num_edges=169209818,
       ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'h': Scheme(shape=(40,), dtype=torch.float32)}
       edata_schemes={}))

In [12]:
train_lg.num_nodes()

358047

In [5]:
data, _ = load_graphs("dgl_cic2017data3.bin")

In [7]:
train_lg, test_lg = data[0], data[1]

In [8]:
src_index = test_lg.edges()[0].numpy()
dst_index = test_lg.edges()[1].numpy()
edge_index = [src_index, dst_index]
edge_index = torch.LongTensor(edge_index)

In [9]:
test_data = Data()
test_data.x = test_lg.ndata['x']
test_data.y = test_lg.ndata['y']
test_data.edge_index = edge_index

In [11]:
src_index = train_lg.edges()[0].numpy()
dst_index = train_lg.edges()[1].numpy()
edge_index = [src_index, dst_index]
edge_index = torch.LongTensor(edge_index)

In [12]:
train_data = Data()
train_data.x = train_lg.ndata['x']
train_data.y = train_lg.ndata['y']
train_data.edge_index = edge_index

In [19]:
train_data, test_data

(Data(x=[459589, 78], y=[459589], edge_index=[2, 339778283]),
 Data(x=[196967, 78], y=[196967], edge_index=[2, 60788929]))

In [20]:
train_data.num_nodes = train_lg.num_nodes()
test_data.num_nodes = test_lg.num_nodes()

In [21]:
train_data.num_nodes = train_lg.num_nodes()

In [13]:
torch.save(train_data, 'cic2017_dgl_to_pyg_train_data50')

In [10]:
torch.save(test_data, 'cic2017_dgl_to_pyg_test_data50')