In [1]:
import data_preparation
import util_data
import util_config

from maskgan.model import MaskGAN
from msg_id_dictionary import MSGIDDictionary
from maskgan.mask import StochasticMask
from maskgan.dataset import MSGIDSequence

from itertools import product
import torch
import os
import numpy as np

# MaskGAN Data Generation

In [6]:
def generate_data(epoch_idx: int):
    assert epoch_idx > 0
    util_config.CONFIG.reload_param_config()
    util_config.CONFIG.reload_path_config()
    param_config = util_config.CONFIG.get_param_config()
    path_config = util_config.CONFIG.get_path_config()

    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    labels_to_generate = param_config['maskgan']['labels_to_generate']
    seq_len = 128

    label_info_list = param_config['label_info_list']
    train_data, train_label, _, _, _, _, _ \
        = data_preparation.load_data(seq_len, label_info_list, 'before_aug', '', -1)

    _failure_type = 'request'
    print(f'Train Data Info.')
    print(f'Attack-free: {(train_label == 0).sum()}')
    print(f'Heartbeat: {(train_label == 1).sum()}')
    print(f'Ping: {(train_label == 2).sum()}')
    print(f'Request: {(train_label == 3).sum()}')

    one_hot_label_list = []
    for label in train_label:
        one_hot_label = np.zeros(4)
        one_hot_label[int(label)] = 1.0
        one_hot_label_list.append(one_hot_label)
    train_label = np.vstack(one_hot_label_list)

    msg_id_dict = MSGIDDictionary()
    masker = StochasticMask(0.3)
    msg_id_sequence = MSGIDSequence(train_data, train_label, masker, msg_id_dict, DEVICE)

    batch_size = 512
    gen_hidden_size = 64
    dis_hidden_size = 64
    gen_lr = 0.0001
    dis_lr = 0.005
    seq_len = 128
    gen_pre_epochs = 1
    dis_pre_epochs = 1
    adv_epochs = 1
    n_vocabs = msg_id_dict.get_n_vocabs()

    save_path = ''
    train_dataset = msg_id_sequence

    # Model
    mask_gan = MaskGAN(train_dataset, len(label_info_list), n_vocabs, batch_size, gen_hidden_size, dis_hidden_size,
                       gen_lr, dis_lr, seq_len, save_path, DEVICE)

    # Model load
    gan_type = f'Unrolled-0_wasserstein-False{param_config["exp_id"]}'
    maskgan_save_path = path_config['maskgan_model_save_dir_path_template'].format(f'{seq_len}', gan_type)
    maskgan_generator_save_file_name = path_config['maskgan_generator_save_file_name'].format('all', epoch_idx)
    generator_save_path = os.path.join(maskgan_save_path, maskgan_generator_save_file_name)
    mask_gan.generator.load_state_dict(torch.load(generator_save_path))
    print(f'{maskgan_generator_save_file_name} has been loaded.')

    valid_n_vocabs = len(util_data.INT_TO_MSG_ID_CONVERTER)
    # Sample
    generated_data = []
    counter = 0
    for input_tensor, target_tensor, mask_tensor, label_tensor in mask_gan.train_dataloader:
        generated_tensor, _ \
            = mask_gan.generator.sample_data(input_tensor, target_tensor, mask_tensor, label_tensor)
        
        generated_npy = generated_tensor.to('cpu').numpy()

        generated_npy = generated_npy[(generated_npy >= len(util_data.INT_TO_MSG_ID_CONVERTER)).sum(1) == 0]
        generated_data.append(generated_npy)

        counter += generated_npy.shape[0]
        if counter >= param_config['maskgan']['num_of_generated_data']:
            break
    generated_data = np.vstack(generated_data)
    generated_data = data_preparation._filter_unique(generated_data)
    print(f'# of generated data ({_failure_type}): {len(generated_data)}')

    # Generated data 저장
    additional_info = f'({epoch_idx})'
    data_type = f"{_failure_type}_{param_config['maskgan']['num_of_generated_data']}{additional_info}.npy"
    generated_data_path = path_config['maskgan_generated_data_file_path_template'].format(seq_len, gan_type, data_type)
    util_config.make_dirs(generated_data_path)
    np.save(generated_data_path, generated_data)


generate_data(19)

Train Data Info.
Attack-free: 0
Heartbeat: 0
Ping: 0
Request: 149797
adv_trained_gen_all_19 has been loaded.
# of generated data (request): 50075


# RankGAN

In [4]:
from rankgan.model import RankGAN
from rankgan.dataset import MSGIDSequence

import util_config
import util_data
import data_preparation

import numpy as np
import torch


def generate_data(epoch_idx: int):
    assert epoch_idx > 0
    util_config.CONFIG.reload_param_config()
    util_config.CONFIG.reload_path_config()
    param_config = util_config.CONFIG.get_param_config()
    path_config = util_config.CONFIG.get_path_config()

    label_info_list = param_config['label_info_list']

    train_data, train_label, _, _, _, _, _ \
        = data_preparation.load_data(128, label_info_list, 'before_aug', '', -1)

    print(f'Train Data Info.')
    print(f'Attack-free: {(train_label == 0).sum()}')
    print(f'Heartbeat: {(train_label == 1).sum()}')
    print(f'Ping: {(train_label == 2).sum()}')
    print(f'Request: {(train_label == 3).sum()}')

    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_dataset = MSGIDSequence(train_data, DEVICE)

    n_vocabs = len(util_data.INT_TO_MSG_ID_CONVERTER)
    batch_size = 512
    ref_batch_size = 16
    gen_hidden_size = 64
    dis_hidden_size = 64
    seq_len = 128
    gen_lr = 0.0001
    ran_lr = 0.001
    save_path = ''
    n_rollouts = 4

    msg_set, msg_id_counts = np.unique(train_data, return_counts=True)
    msg_id_prob_dist = msg_id_counts / msg_id_counts.sum()

    print(msg_set)
    print(msg_id_prob_dist)

    gen_pre_epochs = 1
    dis_pre_epochs = 1
    adv_epochs = 1

    rank_gan = RankGAN(train_dataset, n_vocabs, batch_size, ref_batch_size,
                       gen_hidden_size, dis_hidden_size, n_rollouts, seq_len,
                       gen_lr, ran_lr, save_path,
                       msg_set, msg_id_prob_dist, DEVICE)

    # Model load
    seq_len = 128
    gan_type = f'Unrolled-0_wasserstein-False{param_config["exp_id"]}'
    rankgan_save_path = path_config['rankgan_model_save_dir_path_template'].format(f'{seq_len}', gan_type)
    rankgan_generator_save_file_name = path_config['rankgan_generator_save_file_name'].format('all', epoch_idx)
    generator_save_path = os.path.join(rankgan_save_path, rankgan_generator_save_file_name)
    rank_gan.generator.load_state_dict(torch.load(generator_save_path))
    print(f'{rankgan_generator_save_file_name} has been loaded.')

    # Sample
    _failure_type = 'request'
    generated_data = []
    counter = 0
    while True:
        generated_tensor, _ = rank_gan.generator.generate_samples(20000)
        generated_tensor = generated_tensor.permute(1, 0)
        generated_npy = generated_tensor.to('cpu').numpy()
        generated_npy = data_preparation._filter_unique(generated_npy)
        generated_data.append(generated_npy)
        counter += generated_npy.shape[0]
        print(counter)
        if counter >= param_config['rankgan']['num_of_generated_data']:
            break
    generated_data = np.vstack(generated_data)
    generated_data = data_preparation._filter_unique(generated_data)
    print(f'# of generated data ({_failure_type}): {len(generated_data)}')

    # Generated data 저장
    additional_info = f'({epoch_idx})'
    data_type = f"{_failure_type}_{param_config['rankgan']['num_of_generated_data']}{additional_info}.npy"
    generated_data_path = path_config['rankgan_generated_data_file_path_template'].format(seq_len, gan_type, data_type)
    util_config.make_dirs(generated_data_path)
    np.save(generated_data_path, generated_data)


generate_data(19)

Train Data Info.
Attack-free: 0
Heartbeat: 159795
Ping: 0
Request: 0
[ 0  1  2  3  5  6  7  8  9 10 11 12 14 18 19 20 21 22 23 24 25 26 27]
[0.00479633 0.00478269 0.00478289 0.00479623 0.03796666 0.05147709
 0.13030328 0.13030724 0.06867642 0.02741633 0.04452912 0.04628332
 0.05672243 0.04721548 0.13034557 0.04720037 0.05798944 0.04722256
 0.00240318 0.02140604 0.02140936 0.00239472 0.00957325]
adv_trained_gen_all_19 has been loaded.
80
162
243
324
414
494
589
674
766
849
927
1014
1109
1190
1278
1357
1444
1520
1601
1693
1768
1849
1932
2021
2103
2187
2280
2355
2451
2540
2627
2704
2790
2876
2961
3048
3139
3227
3316
3400
3479
3566
3650
3737
3822
3903
3985
4056
4130
4205
4302
4382
4469
4550
4634
4717
4803
4900
4993
5077
5156
5240
5317
5396
5468
5537
5621
5709
5793
5880
5970
6050
6138
6229
6315
6407
6489
6569
6658
6746
6840
6913
6996
7068
7155
7240
7331
7425
7501
7579
7658
7747
7818
7903
7987
8076
8160
8254
8339
8422
8515
8604
8688
8771
8857
8939
9029
9124
9209
9295
9387
9469
9558
9643
9727

# StepGAN

In [7]:
from stepgan.dataset import MSGIDSequence, X_LEN, Y_LEN
from stepgan.model import StepGAN

import util_data
import util_config
import data_preparation

import torch
import numpy as np

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def generate_data(epoch_idx: int):
    assert epoch_idx > 0
    util_config.CONFIG.reload_param_config()
    util_config.CONFIG.reload_path_config()
    param_config = util_config.CONFIG.get_param_config()
    path_config = util_config.CONFIG.get_path_config()

    seq_len = 128

    label_info_list = param_config['label_info_list']
    train_data, train_label, _, _, _, _, _ \
        = data_preparation.load_data(seq_len, label_info_list, 'before_aug', '', -1)

    """
    filtered_train_data = [train_data[(train_label == 0) | (train_label == 1)]]
    filtered_train_label = [train_label[(train_label == 0) | (train_label == 1)]]

    ping_data = train_data[train_label == 2]
    ping_label = train_label[train_label == 2]
    perm = np.random.permutation(ping_data.shape[0])
    ping_data = ping_data[perm][:2000]
    ping_label = ping_label[perm][:2000]
    filtered_train_data.append(ping_data)
    filtered_train_label.append(ping_label)

    request_data = train_data[train_label == 3]
    request_label = train_label[train_label == 3]
    perm = np.random.permutation(request_data.shape[0])
    request_data = request_data[perm][:2000]
    request_label = request_label[perm][:2000]
    filtered_train_data.append(request_data)
    filtered_train_label.append(request_label)

    train_data = np.vstack(filtered_train_data)
    train_label = np.hstack(filtered_train_label)
    """
    _failure_type = 'request'

    print(f'Train Data Info.')
    print(f'Attack-free: {(train_label == 0).sum()}')
    print(f'Heartbeat: {(train_label == 1).sum()}')
    print(f'Ping: {(train_label == 2).sum()}')
    print(f'Request: {(train_label == 3).sum()}')

    one_hot_label_list = []
    for label in train_label:
        one_hot_label = np.zeros(4)
        one_hot_label[int(label)] = 1.0
        one_hot_label_list.append(one_hot_label)
    train_label = np.vstack(one_hot_label_list)

    msg_id_sequence = MSGIDSequence(train_data, train_label, DEVICE)

    batch_size = 512
    gen_hidden_size = 64
    dis_hidden_size = 64
    gen_lr = 0.0001
    dis_lr = 0.005
    enc_seq_len = X_LEN
    dec_seq_len = Y_LEN
    gen_pre_epochs = 1
    dis_pre_epochs = 1
    adv_epochs = 1
    n_vocabs = len(util_data.INT_TO_MSG_ID_CONVERTER)
    save_path = ''
    train_dataset = msg_id_sequence

    # Model
    step_gan = StepGAN(train_dataset, len(label_info_list), n_vocabs, batch_size, gen_hidden_size, dis_hidden_size,
                       gen_lr, dis_lr, enc_seq_len, dec_seq_len, save_path, DEVICE)

    # Model load
    gan_type = f'Unrolled-0_wasserstein-False{param_config["exp_id"]}'
    stepgan_save_path = path_config['stepgan_model_save_dir_path_template'].format(f'{seq_len}', gan_type)
    stepgan_generator_save_file_name = path_config['stepgan_generator_save_file_name'].format('all', epoch_idx)
    generator_save_path = os.path.join(stepgan_save_path, stepgan_generator_save_file_name)
    print(generator_save_path)
    step_gan.generator.load_state_dict(torch.load(generator_save_path))
    print(f'{stepgan_generator_save_file_name} has been loaded.')

    valid_n_vocabs = len(util_data.INT_TO_MSG_ID_CONVERTER)
    # Sample
    generated_data = []
    counter = 0
    for x_tensor, y_tensor, label_tensor in step_gan.train_dataloader:
        generated_tensor, _ \
            = step_gan.generator.sample_data(x_tensor, y_tensor, label_tensor)
        generated_tensor = torch.hstack([x_tensor, generated_tensor])
        
        generated_npy = generated_tensor.to('cpu').numpy()
        generated_data.append(generated_npy)

        counter += generated_npy.shape[0]
        if counter >= param_config['stepgan_model']['num_of_generated_data']:
            break
    generated_data = np.vstack(generated_data)
    generated_data = data_preparation._filter_unique(generated_data)
    print(f'# of generated data ({_failure_type}): {len(generated_data)}')

    # Generated data 저장
    additional_info = f'({epoch_idx})'
    data_type = f"{_failure_type}_{param_config['leakgan_model']['num_of_generated_data']}{additional_info}.npy"
    generated_data_path = path_config['leakgan_generated_data_file_path_template'].format(seq_len, gan_type, data_type)
    util_config.make_dirs(generated_data_path)
    np.save(generated_data_path, generated_data)


generate_data(19)

Train Data Info.
Attack-free: 0
Heartbeat: 0
Ping: 0
Request: 149797
D:/tasks/Projects/2022/무인이동체/논문작업/hitl\data2/model_save/stepgan/1.0_128/Unrolled-0_wasserstein-False(step1)\adv_trained_gen_all_19
adv_trained_gen_all_19 has been loaded.
# of generated data (request): 50175


# LeakGAN

In [4]:
from leakgan.model import LeakGAN
from leakgan.dataset import MSGIDSequence

import util_config
import util_data
import data_preparation

import numpy as np
import torch


def generate_data(epoch_idx: int):
    util_config.CONFIG.reload_param_config()
    util_config.CONFIG.reload_path_config()
    param_config = util_config.CONFIG.get_param_config()
    path_config = util_config.CONFIG.get_path_config()

    labels_to_generate = param_config['leakgan_model']['labels_to_generate']
    seq_len = 128

    label_info_list = param_config['label_info_list']
    train_data, train_label, _, _, _, _, _ \
        = data_preparation.load_data(128, label_info_list, 'before_aug_none', '')

    print(f'Train Data Info.')
    print(f'Attack-free: {(train_label == 0).sum()}')
    print(f'Heartbeat: {(train_label == 1).sum()}')
    print(f'Ping: {(train_label == 2).sum()}')
    print(f'Request: {(train_label == 3).sum()}')

    one_hot_label_list = []
    for label in train_label:
        one_hot_label = np.zeros(len(labels_to_generate))
        one_hot_label[int(label)] = 1.0
        one_hot_label_list.append(one_hot_label)
    train_label = np.vstack(one_hot_label_list)

    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_dataset = MSGIDSequence(train_data, train_label, DEVICE)

    n_vocabs = len(util_data.INT_TO_MSG_ID_CONVERTER)
    batch_size = 1024
    gen_hidden_size = 64
    dis_hidden_size = 64
    seq_len = 128
    gen_lr = 0.0001
    dis_lr = 0.001
    save_path = ''
    n_rollouts = 1

    msg_set, msg_id_counts = np.unique(train_data, return_counts=True)
    msg_id_prob_dist = msg_id_counts / msg_id_counts.sum()

    print(msg_set)
    print(msg_id_prob_dist)

    gen_pre_epochs = 1
    dis_pre_epochs = 1
    adv_epochs = 1

    leak_gan = LeakGAN(train_dataset, n_vocabs, len(label_info_list), batch_size,
                       gen_hidden_size, dis_hidden_size, n_rollouts, seq_len,
                       gen_lr, dis_lr, save_path,
                       msg_set, msg_id_prob_dist, DEVICE)

    # Model load
    seq_len = 128
    gan_type = f'Unrolled-0_wasserstein-False{param_config["exp_id"]}'
    leakgan_save_path = path_config['leakgan_model_save_dir_path_template'].format(f'{seq_len}', gan_type)
    leakgan_generator_save_file_name = path_config['leakgan_generator_save_file_name'].format('all', epoch_idx)
    generator_save_path = os.path.join(leakgan_save_path, leakgan_generator_save_file_name)
    leak_gan.generator.load_state_dict(torch.load(generator_save_path))
    print(f'{leakgan_generator_save_file_name} has been loaded.')

    leakgan_discriminator_save_file_name = path_config['leakgan_discriminator_save_file_name'].format('all', epoch_idx)
    discriminator_save_path = os.path.join(leakgan_save_path, leakgan_discriminator_save_file_name)
    leak_gan.discriminator.load_state_dict(torch.load(discriminator_save_path))
    print(f'{leakgan_discriminator_save_file_name} has been loaded.')

    # Sample
    valid_n_vocabs = len(util_data.INT_TO_MSG_ID_CONVERTER)
    # Sample
    for idx, _failure_type in enumerate(labels_to_generate):
        if idx == 0 or idx == 1:
            continue
        generated_data = []
        counter = 0

        one_hot_label = np.zeros(len(labels_to_generate))
        one_hot_label[int(idx)] = 1.0
        one_hot_labels = np.repeat([one_hot_label], 1000, 0)
        label_tensor = torch.Tensor(one_hot_labels).to(DEVICE)

        while True:
            generated_tensor = leak_gan.generate_samples(label_tensor)
            generated_npy = generated_tensor.to('cpu').numpy()
            generated_npy = generated_npy[(generated_npy >= len(util_data.INT_TO_MSG_ID_CONVERTER)).sum(1) == 0]
            generated_data.append(generated_npy)

            counter += generated_npy.shape[0]
            if counter >= param_config['leakgan_model']['num_of_generated_data']:
                break
        generated_data = np.vstack(generated_data)
        generated_data = data_preparation._filter_unique(generated_data)
        print(f'# of generated data ({_failure_type}({idx})): {len(generated_data)}')
    
        # Generated data 저장
        additional_info = f'({epoch_idx})'
        data_type = f"{_failure_type}_{param_config['leakgan_model']['num_of_generated_data']}{additional_info}.npy"
        generated_data_path = path_config['leakgan_generated_data_file_path_template'].format(seq_len, gan_type, data_type)
        util_config.make_dirs(generated_data_path)
        np.save(generated_data_path, generated_data)


generate_data(0)

Train Data Info.
Attack-free: 158730
Heartbeat: 159795
Ping: 159723
Request: 149797
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 14 18 19 20 21 22 23 24 25 26 27]
[0.00213425 0.00213303 0.00213426 0.21975201 0.19948206 0.02716992
 0.0357983  0.07856859 0.078656   0.0448746  0.0160769  0.0295594
 0.02042431 0.03269328 0.02114356 0.07877068 0.02115052 0.04204499
 0.0210471  0.00106922 0.01000146 0.00999583 0.00107055 0.0042492 ]
adv_trained_gen_all_0 has been loaded.
adv_trained_dis_all_0 has been loaded.
# of generated data (ping(2)): 50000
# of generated data (request(3)): 50000
