# 各種統一実験

## optimizer
- Adam

## learning rate
- 0.01
- 0.0001

## aggregator
- sum
- mean

## batch size
- 32

## criterion
- Cross entoropy

In [1]:
import sys
sys.path.append("../codes")

import numpy as np
import pandas as pd
import torch
import torch.optim as optim

from functools import partial
from torch_geometric.data import DataLoader
from sklearn.model_selection import KFold
from torch.utils.data.dataset import Subset

from optuna_cv_utils import cv_train, cv_test, make_datasets
from set_data_folder import make_train_data

from model import GCN
from DGCNN import DGCNN_Model

In [2]:
def CV(data_list, num_epoch, model_name, adam_lr, pool="mean"):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    # 交差検証
    fold = KFold(
        n_splits=10, shuffle=True, random_state=0
    )

    valid_accs = []
    for fold_idx, (train_idx, valid_idx) in enumerate(fold.split(data_list)):
        # set model
        if model_name =="GCN":
            model = GCN(hidden_channels=64,
                        num_classes=4, num_node_feature=1, pooling=pool).to(device)
        elif model_name == "DGCNN":
            model = DGCNN_Model(num_features=1, num_classes=4).to(device)
            
        optimizer = optim.Adam(model.parameters(), lr=adam_lr)
        criterion = torch.nn.CrossEntropyLoss()
        
        # split data
        train_loader = DataLoader(
            Subset(data_list, train_idx),
            shuffle=True,
            batch_size=32,
        )
        valid_loader = DataLoader(
            Subset(data_list, valid_idx),
            shuffle=False,
            batch_size=32,
        )

        for epoch_idx in range(num_epoch):
            # train
            cv_train(model, train_loader, device, criterion, optimizer, model_name)
            # valid
            valid_acc = cv_test(model, valid_loader, device, model_name)

        valid_accs.append(valid_acc)

    return valid_accs

In [3]:
m_list = [2, 4, 6, 8, 10]
p_list = {"100": [0.04, 0.08, 0.11, 0.15, 0.19], "1000":[0.004, 0.008, 0.012, 0.016, 0.02]}
step_list = {"100": [200, 384, 564, 736, 900], "1000":[1996, 3984, 5964, 7936, 9900]}

df = pd.DataFrame(columns=["model", "node", "p_s", "aggre", "adam_lr", "ave_acc", "std_acc"])


cnt = 0
# poisson dataset
data_list = make_datasets()
for model_name in ["GCN", "DGCNN"]:
    for adam_lr in [0.01, 0.0001]:
        if model_name == "GCN":
            for p in ["mean", "sum"]:
                valid_accs = CV(data_list, 10, model_name, adam_lr, pool=p)

                info  = {"model": model_name, "node": "100", "p_s": f"poisson", "aggre": p,
                 "adam_lr": adam_lr, "ave_acc": np.average(valid_accs), "std_acc": np.std(valid_accs)
                }

                df = df.append(pd.Series(info, name=cnt))
                cnt += 1
                
                print(round(np.average(valid_accs),2))
        else:
            valid_accs = CV(data_list, 10, model_name, adam_lr)

            info  = {"model": model_name, "node": "100", "p_s": f"poisson", "aggre": "None",
             "adam_lr": adam_lr, "ave_acc": np.average(valid_accs), "std_acc": np.std(valid_accs)
            }

            df = df.append(pd.Series(info, name=cnt))
            cnt += 1
            print(round(np.average(valid_accs),2))

# subset dataset
for node in ["100"]:
    for m, p, step in zip(m_list, p_list[node], step_list[node]):
        # make train data folder
        p_s = [
            {"kind": "barabasi", "node": [node], "p": [str(m)]},
            {"kind": "noGrowth", "node": [node], "p": [str(step)]},
            {"kind": "noAttach", "node": [node], "p": [str(m)]},
            {"kind": "random", "node": [node], "p": [str(p)]}
        ]

        # train data folderの作成
        make_train_data(p_s, "../train_data/net").copy_data()
        # dataset の作成
        data_list = make_datasets()
        
        for model_name in ["GCN", "DGCNN"]:
            for adam_lr in [0.01, 0.0001]:
                if model_name == "GCN":
                    for p in ["mean", "sum"]:
                        valid_accs = CV(data_list, 10, model_name, adam_lr, pool=p)

                        info  = {"model": model_name, "node": node, "p_s": f"{m}_{p}_{step}", "aggre": p,
                         "adam_lr": adam_lr, "ave_acc": np.average(valid_accs), "std_acc": np.std(valid_accs)
                        }

                        df = df.append(pd.Series(info, name=cnt))
                        cnt += 1
                        print(round(np.average(valid_accs),2))
                else:
                    valid_accs = CV(data_list, 10, model_name, adam_lr)

                    info  = {"model": model_name, "node": node, "p_s": f"{m}_{p}_{step}", "aggre": "None",
                     "adam_lr": adam_lr, "ave_acc": np.average(valid_accs), "std_acc": np.std(valid_accs)
                    }

                    df = df.append(pd.Series(info, name=cnt))
                    cnt += 1
                    print(round(np.average(valid_accs),2))

100%|██████████| 5000/5000 [00:19<00:00, 250.27it/s]
100%|██████████| 5000/5000 [00:18<00:00, 267.69it/s]
100%|██████████| 5000/5000 [00:19<00:00, 255.97it/s]
100%|██████████| 5000/5000 [00:18<00:00, 267.65it/s]


0.36
0.3
0.32
0.33
0.25
0.92


100%|██████████| 5000/5000 [00:09<00:00, 522.33it/s]
100%|██████████| 5000/5000 [00:09<00:00, 521.35it/s]
100%|██████████| 5000/5000 [00:09<00:00, 524.89it/s]
100%|██████████| 5000/5000 [00:09<00:00, 510.58it/s]


0.97
0.88
0.86
0.94
0.52
0.92


100%|██████████| 5000/5000 [00:15<00:00, 315.83it/s]
100%|██████████| 5000/5000 [00:15<00:00, 326.66it/s]
100%|██████████| 5000/5000 [00:15<00:00, 314.14it/s]
100%|██████████| 5000/5000 [00:16<00:00, 302.04it/s]


0.95
0.48
0.57
0.81
0.32
0.92


100%|██████████| 5000/5000 [00:21<00:00, 229.88it/s]
100%|██████████| 5000/5000 [00:20<00:00, 245.48it/s]
100%|██████████| 5000/5000 [00:21<00:00, 229.49it/s]
100%|██████████| 5000/5000 [00:21<00:00, 233.46it/s]


0.41
0.26
0.72
0.9
0.25
0.98


100%|██████████| 5000/5000 [00:26<00:00, 186.65it/s]
100%|██████████| 5000/5000 [00:24<00:00, 201.71it/s]
100%|██████████| 5000/5000 [00:26<00:00, 187.21it/s]
100%|██████████| 5000/5000 [00:26<00:00, 185.64it/s]


0.25
0.25
0.75
0.76
0.25
0.98


100%|██████████| 5000/5000 [00:31<00:00, 156.51it/s]
100%|██████████| 5000/5000 [00:28<00:00, 172.85it/s]
100%|██████████| 5000/5000 [00:31<00:00, 157.69it/s]
100%|██████████| 5000/5000 [00:33<00:00, 151.12it/s]


0.36
0.25
0.47
0.54
0.25
0.99


In [5]:
df.to_csv("paper_result/re_result.csv", index=False)