In [1]:
import warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'


def warn(*args, **kwargs):
    pass


warnings.warn = warn


In [2]:
import pickle
import typing
import numpy as np
from const import *
import pandas as pd
import tensorflow as tf
from sklearn.metrics import log_loss


In [3]:

def Critic(in_feature: int) -> tf.keras.models.Sequential:
    model = tf.keras.models.Sequential(
        layers=[
            tf.keras.layers.Dense(units=128, input_shape=(in_feature,)),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.LeakyReLU(alpha=0.2),

            tf.keras.layers.Dense(units=128),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.LeakyReLU(alpha=0.2),

            tf.keras.layers.Dense(units=1)
        ],
        name='Critic'
    )

    return model


def Generator(latent_dim: int, out_feature: int) -> tf.keras.models.Sequential:
    model = tf.keras.models.Sequential(
        layers=[
            tf.keras.layers.Dense(units=128, input_shape=(latent_dim,)),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.LeakyReLU(alpha=0.2),

            tf.keras.layers.Dense(units=128),
            tf.keras.layers.BatchNormalization(),
            tf.keras.layers.LeakyReLU(alpha=0.2),

            tf.keras.layers.Dense(units=out_feature)
        ],
        name='Generator'
    )

    return model


def Blackbox(path: str) -> any:
    with open(path, 'rb') as handle:
        return pickle.load(handle)


In [4]:
def load_dataset(path: str) -> pd.DataFrame:
    X = pd.read_feather(path)
    # Select only probe attack
    X = X.drop(columns=['label'])[X['label'] == 1].reset_index(
        drop=True).astype('float32')
    return X


def mapping(fake: np.ndarray) -> np.ndarray:
    fake = pd.DataFrame(fake, columns=content_feature, dtype='float32')
    real = dataset.sample(n=fake.shape[0]).reset_index(
        drop=True).astype('float32')
    real.loc[:, content_feature] = fake.loc[:, content_feature]

    return real


In [5]:
tf.keras.utils.disable_interactive_logging()
dataset = load_dataset('dataset/train.feather')
latent_dim = 13
critic = Critic(13)
gen = Generator(latent_dim, 13)
# blackbox = tf.keras.models.load_model('models/DNN.h5')
blackbox = Blackbox('models/RandomForest.pickle')

opt_critic = tf.keras.optimizers.RMSprop(learning_rate=1e-4)
opt_gen = tf.keras.optimizers.RMSprop(learning_rate=1e-4)

n_epochs = 50
n_batch = 64
n_critics = 5
lambada = 0.5
clip_min = -0.01
clip_max = 0.01


In [6]:
@tf.function
def train_step(real_sample):
    for _ in range(n_critics):
        noise = tf.random.normal((n_batch, latent_dim))
        fake = gen(noise, training=True)
        with tf.GradientTape() as tape:
            critic_real = critic(real_sample, training=True)
            critic_fake = critic(fake, training=True)
            loss_critic = tf.reduce_mean(
                critic_fake) - tf.reduce_mean(critic_real)
        grad_critic = tape.gradient(loss_critic, critic.trainable_weights)
        opt_critic.apply_gradients(zip(grad_critic, critic.trainable_weights))
        for var in critic.trainable_variables:
            var.assign(tf.clip_by_value(var, clip_min, clip_max))

    # training generator
    noise = tf.random.normal((n_batch, latent_dim))
    out_map = tf.numpy_function(mapping, [fake], Tout=tf.float32)
    out_blackbox = tf.numpy_function(blackbox.predict_proba, [
                                     out_map], Tout=tf.double)
    out_blackbox = tf.cast(out_blackbox, tf.float32)
    out_map.set_shape((n_batch, 122))
    # out_blackbox = blackbox(out_map, training=False)
    target = tf.zeros(n_batch, dtype='int64')  # zero is normal
    loss_blackbox = ids_loss(target, out_blackbox)
    with tf.GradientTape() as tape:
        out_gen = gen(noise, training=True)
        out_critic = critic(out_gen, training=False)
        loss_gen = tf.reduce_mean(-out_critic + lambada * loss_blackbox)

    grad_gen = tape.gradient(loss_gen, gen.trainable_weights)
    opt_gen.apply_gradients(zip(grad_gen, gen.trainable_weights))
    return loss_critic, loss_gen, loss_blackbox


In [7]:
ids_loss = tf.keras.losses.SparseCategoricalCrossentropy()
# ids_loss = log_loss

train_data = dataset.loc[:, content_feature]
train_data = tf.data.Dataset.from_tensor_slices(
    tf.convert_to_tensor(train_data.values))
train_data = train_data.shuffle(buffer_size=1024).batch(n_batch)

for epoch in range(n_epochs):
    for step, X_real in enumerate(train_data):
        loss_critic, loss_gen, loss_blackbox = train_step(X_real)

        if step % 100 == 0:
            print('Epoch: {}, Loss Critic: {}, Loss Gen: {}, Loss Blackbox: {}'.format(
                epoch, loss_critic, loss_gen, loss_blackbox))


Epoch: 0, Loss Critic: -1.2382515706121922e-06, Loss Gen: 1.691176414489746, Loss Blackbox: 3.3818564414978027
Epoch: 0, Loss Critic: -2.871228934964165e-05, Loss Gen: 1.5456184148788452, Loss Blackbox: 3.0909979343414307
Epoch: 0, Loss Critic: -4.205667937640101e-05, Loss Gen: 1.4383256435394287, Loss Blackbox: 2.8763558864593506
Epoch: 0, Loss Critic: -4.717709089163691e-05, Loss Gen: 1.648705005645752, Loss Blackbox: 3.2970826625823975
Epoch: 0, Loss Critic: -5.415566556621343e-05, Loss Gen: 1.695448398590088, Loss Blackbox: 3.3905577659606934
Epoch: 0, Loss Critic: -6.978135206736624e-05, Loss Gen: 1.6379575729370117, Loss Blackbox: 3.2755346298217773
Epoch: 0, Loss Critic: -7.844326319172978e-05, Loss Gen: 1.798757553100586, Loss Blackbox: 3.597114086151123
Epoch: 0, Loss Critic: -8.467642328469083e-05, Loss Gen: 1.693077564239502, Loss Blackbox: 3.3857433795928955
Epoch: 1, Loss Critic: -9.073060209630057e-05, Loss Gen: 1.5572346448898315, Loss Blackbox: 3.1140644550323486
Epoch:

KeyboardInterrupt: 

In [8]:
def detection_accuracy(num_sample, num_sample_detected):
    return num_sample_detected / num_sample


def attack_success_rate(detection_rate_org, detection_rate_adv):
    return detection_rate_org - detection_rate_adv


def evade_increase_rate(detection_rate_org, detection_rate_adv):
    return 1 - detection_rate_adv / detection_rate_org


In [9]:
num_sample = len(dataset)

org_pred = blackbox.predict_proba(dataset)
org_pred = org_pred.round().argmax(axis=1)
num_sample_detected_org = 0
for s in org_pred:
    if s == 1:
        num_sample_detected_org += 1

noise = tf.random.normal([num_sample, latent_dim])
adv_sample = gen(noise, training=False)
adv_sample = mapping(adv_sample)
adv_sample = pd.DataFrame(adv_sample, columns=dataset.columns)
# adv_pred = blackbox.predict_proba(adv_sample)
adv_pred = blackbox.predict_proba(adv_sample)
adv_pred = adv_pred.round().argmax(axis=1)
num_sample_detected_adv = 0
for s in adv_pred:
    if s == 1:
        num_sample_detected_adv += 1


detection_rate_org = detection_accuracy(num_sample, num_sample_detected_org)
detection_rate_adv = detection_accuracy(num_sample, num_sample_detected_adv)


In [10]:
asr = attack_success_rate(detection_rate_org, detection_rate_adv)
eir = evade_increase_rate(detection_rate_org, detection_rate_adv)


In [11]:
print("Attack success rate: {}, Evade increase rate: {}".format(asr, eir))


Attack success rate: 0.0007403052670541532, Evade increase rate: 0.0007403536277327261
