In [11]:
import sys
sys.path.append("..")
import torch
import pandas as pd
import numpy as np
import networkx as nx
import seaborn as sns
import matplotlib.pyplot as plt
from glob import glob
from tqdm import tqdm
from reserch_utils_HT import network_to_image
from models.set_model import GNN_base
from data.data_loader import gnn_data_loader_cv
from torch_geometric.loader import DataLoader

In [12]:
model = GNN_base("GIN", 4, num_node_feature=1)

In [None]:
from experiments.cross_validation import split_data, train
from sklearn.model_selection import KFold
from torch.utils.data.dataset import Subset
import torch.nn as nn
import torch.optim as optim
import torch
from experiments.gnn_train_utils import gnn_train_val_1epoch

def train_10(model_name, dataset_name, num_epoch):
    # 交差検証
    fold = KFold(n_splits=10, shuffle=True, random_state=0)
    # 精度格納用リスト
    
    model = GNN_base(model_name, 4, num_node_feature=1)
    data, num_class = gnn_data_loader_cv(dataset_name, f"../graph_data/{dataset_name}/graph_tensor/")
    valid_accs = []
    for fold_idx, (train_idx, valid_idx) in enumerate(fold.split(data)):
        break

    class Config():
        def __init__(self, model_name):
            self.model_name = model_name
            self.batchsize=20

    config = Config(model_name)
    train_data_loader, valid_data_loader = split_data(data, train_idx, valid_idx, config)


    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)


    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    for epoch in range(num_epoch):
        train_loss, train_acc, val_loss, val_acc = gnn_train_val_1epoch(
            model, train_data_loader, valid_data_loader, device, optimizer, criterion
        )
        if epoch % 5 == 0:
            print(f"epoch={epoch} : trainAcc={train_acc}, valAcc={val_acc}")
    print(f"epoch={epoch} : trainAcc={train_acc}, valAcc={val_acc}")
    return model

In [None]:
parameter = {
    "BA": {
        20 : [1,2,3,4,5,6,7,8,9,10],
        30 : [1,2,3,4,5,6,7,8,9,10],
        50 : [1,2,3,4,5,6,7,8,9,10],
        70 : [1,2,3,4,5,6,7,8,9,10],
        100: [1,2,3,4,5,6,7,8,9,10,15,20,25,30],
        200: [1,2,3,4,5,6,7,8,9,10,15,20,25,30],
        300: [1,2,3,4,5,6,7,8,9,10,15,20,25,30],
        500: [1,2,3,4,5,6,7,8,9,10,15,20,25,30],
        700: [1,2,3,4,5,6,7,8,9,10,15,20,25,30],
        1000: [1,2,3,4,5,6,7,8,9,10,15,20,25,30,50,100,200],
        2000: [1,2,3,4,5,6,7,8,9,10,15,20,25,30,50,100,200]
    },
    "Growth": {
        20 : [1,2,3,4,5,6,7,8,9,10],
        30 : [1,2,3,4,5,6,7,8,9,10],
        50 : [1,2,3,4,5,6,7,8,9,10],
        70 : [1,2,3,4,5,6,7,8,9,10],
        100: [1,2,3,4,5,6,7,8,9,10,15,20,25,30],
        200: [1,2,3,4,5,6,7,8,9,10,15,20,25,30],
        300: [1,2,3,4,5,6,7,8,9,10,15,20,25,30],
        500: [1,2,3,4,5,6,7,8,9,10,15,20,25,30],
        700: [1,2,3,4,5,6,7,8,9,10,15,20,25,30],
        1000: [1,2,3,4,5,6,7,8,9,10,15,20,25,30,50,100,200],
        2000: [1,2,3,4,5,6,7,8,9,10,15,20,25,30,50,100,200]
    },
    "Attach":{
        20 : [20 * i for i in np.linspace(0.5, 25, 10)],
        30 : [30 * i for i in np.linspace(0.5, 25, 10)],
        50 : [50 * i for i in np.linspace(0.5, 25, 10)],
        70 : [70 * i for i in np.linspace(0.5, 25, 10)],
        
        100 : [100 * i for i in np.linspace(0.5, 25, 14)],
        200 : [200 * i for i in np.linspace(0.5, 25, 14)],
        300 : [300 * i for i in np.linspace(0.5, 25, 14)],
        500 : [500 * i for i in np.linspace(0.5, 25, 14)],
        700 : [700 * i for i in np.linspace(0.5, 25, 14)],
        1000 : [1000 * i for i in np.linspace(0.5, 25, 17)],
        2000 : [2000 * i for i in np.linspace(0.5, 25, 17)],
        
    },
    "Random": {
        20: np.logspace(-1.3, -0.1, 10),
        30: np.logspace(-1.3, -0.1, 10),
        50: np.logspace(-1.3, -0.1, 10),
        70: np.logspace(-1.3, -0.1, 10),
        100: np.logspace(-2, -0.7, 14),
        200: np.logspace(-2, -0.7, 14),
        300: np.logspace(-2, -0.7, 14),
        500: np.logspace(-2, -0.7, 14),
        700: np.logspace(-2, -0.7, 14),
        1000: np.logspace(-3., -0.9, 17),
        2000: np.logspace(-3., -0.9, 17)
    }
}

nodes = [20,30,50,70,100,200,300,500,700,1000,2000]

In [71]:
def make_pred_df(model, data, kind, n, p, cnt):
    """ ネットワークごとの予測ラベルと尤度データ作成 """
    kind_to_label = {"BA": 0, "Attach": 1, "Growth": 2, "Random": 3}
    softmax = torch.nn.Softmax(1)
    with torch.no_grad():
        pred = softmax(model(data))
    index = pred.argmax(dim=1) # pred index

    pred_df = pd.DataFrame()
    pred_df["seed"] = np.array(range(20)) + 10000 + cnt # seed
    pred_df["node"] = n
    pred_df["parameter"] = p
    pred_df["kind"] = kind
    pred_df["true_label"] = kind_to_label[kind]
    pred_df["pred"] = index.cpu()
    pred_df["probablility"] = [pred[i, idx].item() for i, idx in enumerate(index)]
    return pred_df


def robust_acc_df(model, resize, kind, parameter):
    kind_to_index = {"BA": 0, "Attach": 1, "Growth": 2, "Random": 3}
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    df = pd.DataFrame()

    for n in tqdm([20,30,50,70,100,200,300,500,700,1000,2000]):
        params = parameter[kind][n]
        for param_index, p in enumerate(params):
            # load data
            if kind == "Attach":
                p = int(p)
            data, _ = gnn_data_loader_cv("a", path=f"./robustness_data_tensor/{kind}/{n}/{p}/")
            data_loader = DataLoader(data, batch_size=20)
            
            cnt = 0
            for input_data in iter(data_loader):
                pred_df = make_pred_df(model, input_data.to(device), kind, n, p, cnt)
                df = df.append(pred_df, ignore_index=True)
                cnt += 20
    return df

def make_heatmap(dataset_name, resize, model_name, model, parameter, epoch, gpu=True):
    pred_df = pd.DataFrame()
    for kind in ["BA", "Attach", "Growth", "Random"]:
        pred_df = pred_df.append(robust_acc_df(model, resize, kind, parameter))
        pred_df.to_csv(f"./robustness_plot/{model_name}_{dataset_name}_{epoch}.csv")

In [None]:
resize=100
epoch = 20
for model_name in ["DGCNN", "GIN"]:
    for dataset_name in ["subset1", "poisson", "new_poisson", "new_parete"]:
        print(model_name, dataset_name)
        model = train_10(model_name, dataset_name, epoch)
        make_heatmap(dataset_name, resize, model_name, model, parameter, epoch, gpu=True)

DGCNN subset1
epoch=0 : trainAcc=0.42194444444444446, valAcc=0.6825
epoch=5 : trainAcc=0.8575, valAcc=0.8625
epoch=10 : trainAcc=0.8577777777777778, valAcc=0.93
epoch=15 : trainAcc=0.8777777777777778, valAcc=0.92


100%|██████████| 11/11 [00:21<00:00,  1.91s/it]
100%|██████████| 11/11 [00:18<00:00,  1.72s/it]
100%|██████████| 11/11 [00:21<00:00,  1.94s/it]
100%|██████████| 11/11 [00:21<00:00,  1.91s/it]


DGCNN poisson
epoch=0 : trainAcc=0.27, valAcc=0.2625
epoch=5 : trainAcc=0.7061111111111111, valAcc=0.91
epoch=10 : trainAcc=0.8672222222222222, valAcc=0.87
epoch=15 : trainAcc=0.9444444444444444, valAcc=0.9425


100%|██████████| 11/11 [00:21<00:00,  1.91s/it]
100%|██████████| 11/11 [00:18<00:00,  1.71s/it]
100%|██████████| 11/11 [00:21<00:00,  1.93s/it]
100%|██████████| 11/11 [00:20<00:00,  1.90s/it]


DGCNN new_poisson
epoch=0 : trainAcc=0.35638888888888887, valAcc=0.67
epoch=5 : trainAcc=0.7480555555555556, valAcc=0.78
epoch=10 : trainAcc=0.7530555555555556, valAcc=0.795
epoch=15 : trainAcc=0.7655555555555555, valAcc=0.81


100%|██████████| 11/11 [00:21<00:00,  1.92s/it]
100%|██████████| 11/11 [00:18<00:00,  1.71s/it]
100%|██████████| 11/11 [00:21<00:00,  1.94s/it]
100%|██████████| 11/11 [00:21<00:00,  1.91s/it]


DGCNN new_parete
epoch=0 : trainAcc=0.3680555555555556, valAcc=0.42
epoch=5 : trainAcc=0.6430555555555556, valAcc=0.6775
epoch=10 : trainAcc=0.7605555555555555, valAcc=0.79
epoch=15 : trainAcc=0.8261111111111111, valAcc=0.8825


100%|██████████| 11/11 [00:21<00:00,  1.93s/it]
100%|██████████| 11/11 [00:19<00:00,  1.74s/it]
100%|██████████| 11/11 [00:21<00:00,  1.96s/it]
100%|██████████| 11/11 [00:21<00:00,  1.94s/it]


GIN subset1
epoch=0 : trainAcc=0.8816666666666667, valAcc=0.9575
epoch=5 : trainAcc=0.9441666666666667, valAcc=0.9725
epoch=10 : trainAcc=0.9741666666666666, valAcc=0.9825
epoch=15 : trainAcc=0.9694444444444444, valAcc=0.9625


100%|██████████| 11/11 [00:20<00:00,  1.82s/it]
100%|██████████| 11/11 [00:17<00:00,  1.62s/it]
100%|██████████| 11/11 [00:20<00:00,  1.84s/it]
100%|██████████| 11/11 [00:19<00:00,  1.80s/it]


GIN poisson
epoch=0 : trainAcc=0.7291666666666666, valAcc=0.9325
epoch=5 : trainAcc=0.9116666666666666, valAcc=0.985
epoch=10 : trainAcc=0.9255555555555556, valAcc=0.9675
epoch=15 : trainAcc=0.9363888888888889, valAcc=0.985


100%|██████████| 11/11 [00:19<00:00,  1.81s/it]
100%|██████████| 11/11 [00:17<00:00,  1.61s/it]
100%|██████████| 11/11 [00:20<00:00,  1.84s/it]
100%|██████████| 11/11 [00:20<00:00,  1.84s/it]


GIN new_poisson
epoch=0 : trainAcc=0.6569444444444444, valAcc=0.8725
epoch=5 : trainAcc=0.8422222222222222, valAcc=0.945
epoch=10 : trainAcc=0.8666666666666667, valAcc=0.9625
epoch=15 : trainAcc=0.8952777777777777, valAcc=0.975


100%|██████████| 11/11 [00:20<00:00,  1.89s/it]
100%|██████████| 11/11 [00:18<00:00,  1.67s/it]
100%|██████████| 11/11 [00:20<00:00,  1.89s/it]
100%|██████████| 11/11 [00:20<00:00,  1.85s/it]


GIN new_parete
epoch=0 : trainAcc=0.5213888888888889, valAcc=0.6175
epoch=5 : trainAcc=0.6413888888888889, valAcc=0.4475
epoch=10 : trainAcc=0.7094444444444444, valAcc=0.8
epoch=15 : trainAcc=0.7027777777777777, valAcc=0.1925


In [None]:
resize=100
epoch = 50
for model_name in ["Deepsets"]:
    for dataset_name in ["subset1", "poisson", "new_poisson", "new_parete"]:
        print(model_name, dataset_name)
        model = train_10(model_name, dataset_name, epoch)
        make_heatmap(dataset_name, resize, model_name, model, parameter, epoch, gpu=True)

# heatmap data info #

In [28]:
from test_utils import network_info

def robustness_graph_info(kind):
    kind_to_index = {"BA": 0, "Attach": 1, "Growth": 2, "Random": 3}
    df = pd.DataFrame()
    cnt = 0
    for n in tqdm([20,30,50,70,100,130,200,300,500,1000,2000]):
        kind_to_parameters = {
            "BA": ba_growth_param(n),
            "Growth": ba_growth_param(n),
            "Attach": attach_params(n),
            "Random": [0.01, 0.02, 0.05, 0.07, 0.1, 0.15, 0.2]
        }

        for param_index, p in enumerate(kind_to_parameters[kind]):
            # network to torch tensor
            for i, path in enumerate(glob(f"./robustness_data_graph/{kind}/{n}/{p}/*")):
                # graph
                G = nx.read_adjlist(path)
                info = network_info(G)
                info["kind"] = kind
                info["parameter"] = p
                info["node"] = n
                
                param_name = ["node*0.1","node*0.25", "node*0.5", "node*0.75", "node", "node*2", "node*3", "node*4", "node*8", "node*10", "node*15"]
                if kind == "Attach":
                    info["param"] = param_name[param_index]
                else:
                    info["param"] = None
                
                df = df.append(pd.Series(info, name=cnt))
                cnt+=1
            
    return df

In [None]:
model

In [29]:
df = robustness_graph_info("BA")
df.to_csv("./robustness_plot/BA.csv")
df = robustness_graph_info("Attach")
df.to_csv("./robustness_plot/Attach.csv")
df = robustness_graph_info("Growth")
df.to_csv("./robustness_plot/Growth.csv")
df = robustness_graph_info("Random")
df.to_csv("./robustness_plot/Random.csv")

100%|██████████| 11/11 [1:00:53<00:00, 332.14s/it]
100%|██████████| 11/11 [17:17<00:00, 94.34s/it] 
100%|██████████| 11/11 [1:03:03<00:00, 343.99s/it] 
100%|██████████| 11/11 [1:41:02<00:00, 551.14s/it] 


In [30]:
df

Unnamed: 0,average,edges,max_degree,min_degree,Skewness,Kurtosis,average_cluster,average_shortest_path,kind,parameter,node,param
0,0.200,2.0,1.0,0.0,1.500000,0.250000,0.000000,,Random,0.01,20.0,
1,0.400,4.0,1.0,0.0,0.408248,-1.833333,0.000000,,Random,0.01,20.0,
2,0.200,2.0,1.0,0.0,1.500000,0.250000,0.000000,,Random,0.01,20.0,
3,0.100,1.0,1.0,0.0,2.666667,5.111111,0.000000,,Random,0.01,20.0,
4,0.000,0.0,0.0,0.0,0.000000,-3.000000,0.000000,,Random,0.01,20.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
3845,399.906,399906.0,459.0,347.0,0.103130,-0.006832,0.200006,1.799947,Random,0.20,2000.0,
3846,399.483,399483.0,460.0,336.0,0.005913,-0.096902,0.199782,1.800159,Random,0.20,2000.0,
3847,398.799,398799.0,458.0,338.0,-0.069766,0.037134,0.199493,1.800501,Random,0.20,2000.0,
3848,398.379,398379.0,470.0,340.0,0.091161,0.097604,0.199282,1.800711,Random,0.20,2000.0,
