# 4class 分類でハイパラチューニング

In [3]:
import sys
sys.path.append("../codes")

import numpy as np
import pandas as pd
import torch
import torch.optim as optim
import optuna


from functools import partial
from torch_geometric.data import DataLoader
from sklearn.model_selection import KFold
from torch.utils.data.dataset import Subset

from optuna_cv_utils import cv_train, cv_test, make_datasets
from set_data_folder import make_train_data

from model import GCN
from DGCNN import DGCNN_Model

In [4]:
def get_optimizer(trial, model):
    # optimizer をAdamとMomentum SGDで探索
    optimizer_names = ['Adam', "Adagrad"]
    optimizer_name = trial.suggest_categorical('optimizer', optimizer_names)

    # weight decayの探索
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)

    # optimizer_nameで分岐
    if optimizer_name == optimizer_names[0]: 
        adam_lr = trial.suggest_loguniform('adam_lr', 1e-5, 1e-1)
        optimizer = optim.Adam(model.parameters(), lr=adam_lr, weight_decay=weight_decay)
    elif optimizer_name == optimizer_names[1]:
        adam_lr = trial.suggest_loguniform('adam_lr', 1e-5, 1e-1)
        optimizer = optim.Adagrad(model.parameters(), lr=adam_lr, weight_decay=weight_decay)
    
    return optimizer

In [5]:
def objective(data_list, num_epoch, model_name, trial):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    # 交差検証
    fold = KFold(
        n_splits=10, shuffle=True, random_state=0
    )

    valid_accs = []
    for fold_idx, (train_idx, valid_idx) in enumerate(fold.split(data_list)):
        # set model
        if model_name =="GCN":
            model = GCN(hidden_channels=64, num_classes=4, num_node_feature=1).to(device)
        elif model_name == "DGCNN":
            model = DGCNN_Model(num_features=1, num_classes=4).to(device)
            
        optimizer = get_optimizer(trial, model)
        criterion = torch.nn.CrossEntropyLoss()
        
        # splot data
        train_loader = DataLoader(
            Subset(data_list, train_idx),
            shuffle=True,
            batch_size=50,
        )
        valid_loader = DataLoader(
            Subset(data_list, valid_idx),
            shuffle=False,
            batch_size=50,
        )

        for epoch_idx in range(num_epoch):
            # train
            cv_train(model, train_loader, device, criterion, optimizer, model_name)
            # valid
            valid_acc = cv_test(model, valid_loader, device, model_name)

        valid_accs.append(valid_acc)

        # print("fold {} : {}".format(fold_idx, valid_acc))
  
    # 返り値が最小となるようにハイパーパラメータチューニングが実行される
    return 1.0 - np.average(valid_accs)

In [4]:
m_list = [2, 4, 6, 8, 10]
p_list = {"100": [0.04, 0.08, 0.11, 0.15, 0.19], "1000":[0.004, 0.008, 0.012, 0.016, 0.02]}
step_list = {"100": [200, 384, 564, 736, 900], "1000":[1996, 3984, 5964, 7936, 9900]}

df = pd.DataFrame(columns=["model", "node", "p_s", "optimizer", "weight_decay", "adam_lr", "best_acc"])

cnt = 0
for node in ["100", "1000"]:
    for m, p, step in zip(m_list, p_list[node], step_list[node]):
        # make train data folder
        p_s = [
            {"kind": "barabasi", "node": [node], "p": [str(m)]},
            {"kind": "noGrowth", "node": [node], "p": [str(step)]},
            {"kind": "noAttach", "node": [node], "p": [str(m)]},
            {"kind": "random", "node": [node], "p": [str(p)]}
        ]

        # train data folderの作成
        make_train_data(p_s, "../train_data/net").copy_data()
        # dataset の作成
        data_list = make_datasets()
        for name in ["GCN", "DGCNN"]:
            # tuning
            study = optuna.create_study()
            f = partial(objective, data_list, 10, name)
            study.optimize(f, n_trials=10)
            study.trials_dataframe().to_csv(f"./paper_result/optuna/{name}_{node}_{m},{p},{step}.csv")

            # save best prams, best acc
            params = study.best_params
            params["model"] = name
            params["node"] = node
            params["best_acc"] = 1 - study.best_value
            params["p_s"] = f"{m}_{p}_{step}"
            df = df.append(pd.Series(params, name=cnt))
            cnt += 1

df.to_csv("paper_result/tuning_subset.csv")

100%|██████████| 5000/5000 [00:09<00:00, 505.36it/s]
100%|██████████| 5000/5000 [00:10<00:00, 490.02it/s]
100%|██████████| 5000/5000 [00:09<00:00, 511.85it/s]
100%|██████████| 5000/5000 [00:09<00:00, 513.06it/s]
[32m[I 2021-08-14 15:43:17,373][0m A new study created in memory with name: no-name-5bd03fc2-d1ec-4177-8ed9-5522381b24a6[0m
[32m[I 2021-08-14 15:45:58,026][0m Trial 0 finished with value: 0.72155 and parameters: {'optimizer': 'Adagrad', 'weight_decay': 0.00030609801422693726, 'adam_lr': 0.0008431232604630925}. Best is trial 0 with value: 0.72155.[0m
[32m[I 2021-08-14 15:48:45,816][0m Trial 1 finished with value: 0.5080500000000001 and parameters: {'optimizer': 'Adam', 'weight_decay': 3.6758389996665257e-06, 'adam_lr': 0.05012042217407817}. Best is trial 1 with value: 0.5080500000000001.[0m
[32m[I 2021-08-14 15:51:34,773][0m Trial 2 finished with value: 0.1894499999999999 and parameters: {'optimizer': 'Adam', 'weight_decay': 8.15871110411704e-07, 'adam_lr': 0.01945896

In [7]:
df = pd.DataFrame(columns=["model", "node", "p_s", "optimizer", "weight_decay", "adam_lr", "best_acc"])

cnt = 0
for node in ["1000"]:
    # dataset の作成
    data_list = make_datasets()
    for name in ["GCN", "DGCNN"]:
        # tuning
        study = optuna.create_study()
        f = partial(objective, data_list, 10, name)
        study.optimize(f, n_trials=10)
        study.trials_dataframe().to_csv(f"./paper_result/optuna/{name}_{node}_poisson.csv")

        # save best prams, best acc
        params = study.best_params
        params["model"] = name
        params["node"] = node
        params["best_acc"] = 1 - study.best_value
        params["p_s"] = "poisson"
        df = df.append(pd.Series(params, name=cnt))
        cnt += 1

100%|██████████| 5000/5000 [00:19<00:00, 255.83it/s]
100%|██████████| 5000/5000 [00:18<00:00, 267.45it/s]
100%|██████████| 5000/5000 [00:20<00:00, 248.02it/s]
100%|██████████| 5000/5000 [00:18<00:00, 269.81it/s]
[32m[I 2021-08-19 04:45:13,732][0m A new study created in memory with name: no-name-90ad4b7c-68ee-4d54-9ce5-f5048f9aaabf[0m
[32m[I 2021-08-19 04:48:22,155][0m Trial 0 finished with value: 0.6625 and parameters: {'optimizer': 'Adam', 'weight_decay': 7.397578534529133e-09, 'adam_lr': 0.00014095925290184784}. Best is trial 0 with value: 0.6625.[0m
[32m[I 2021-08-19 04:51:31,327][0m Trial 1 finished with value: 0.53735 and parameters: {'optimizer': 'Adam', 'weight_decay': 6.408172583369478e-10, 'adam_lr': 0.011448533338693107}. Best is trial 1 with value: 0.53735.[0m
[32m[I 2021-08-19 04:54:37,238][0m Trial 2 finished with value: 0.6691 and parameters: {'optimizer': 'Adam', 'weight_decay': 2.4895308817004086e-10, 'adam_lr': 0.00011957639111531239}. Best is trial 1 with v

In [8]:
df

Unnamed: 0,model,node,p_s,optimizer,weight_decay,adam_lr,best_acc
0,GCN,100,poisson,Adam,6.408173e-10,0.011449,0.46265
1,DGCNN,100,poisson,Adagrad,8.467073e-09,0.002991,0.9145


In [9]:
df.to_csv("paper_result/poisson100.csv")