In [None]:
import collections
import math
import os

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tqdm import tqdm

from attacks import GM
from classifiers import CIFAR_CNN
from deepexplain.tensorflow import DeepExplain
from trainers import Trainer
from utils import save, unpickle, preprocess, scale, params_maker

os.environ["CUDA_VISIBLE_DEVICES"] = "2"

datadir = './CIFAR10_data/'
batches = [datadir + batch for batch in os.listdir(datadir)]

for i in tqdm(range(5)):
    batch = unpickle(batches[i])

    if i == 0:
        data = batch[b'data'].astype(np.float32)
        cifar = np.transpose(np.reshape(data, [-1, 3, 32, 32]), [0, 2, 3, 1])
        labels = batch[b'labels']
    else:
        data = batch[b'data'].astype(np.float32)
        cifar = np.concatenate((cifar, np.transpose(np.reshape(data, [-1, 3, 32, 32]), [0, 2, 3, 1])), axis=0)
        labels = np.concatenate((labels, batch[b'labels']), axis=0)

scaled_cifar = cifar / 127.5 - 1.0

test_batch = unpickle(batches[5])
cifar_test = np.transpose(np.reshape(test_batch[b'data'], [-1, 3, 32, 32]), [0, 2, 3, 1])
scaled_cifar_test = cifar_test / 127.5 - 1.0
labels_test = np.array(test_batch[b'labels'])

data_train = (scaled_cifar, labels)
data_test = (scaled_cifar_test, labels_test)

cifar_mean = np.mean(cifar, axis=(0, 1, 2))


## Accuracy

In [None]:
def eval_accuracy(logdir):
    tf.reset_default_graph()
    
    sess = tf.InteractiveSession()

    DNN = CIFAR_CNN(logdir, activation=tf.nn.relu)
    DNN.load(sess)

    gm = GM(DNN, eps=0.4, step_size=2, n_steps=40, norm='2', loss_type='xent')

    train_acc = DNN.evaluate(sess, data_train)
    test_acc = DNN.evaluate(sess, data_test)
    adv_acc = DNN.evaluate(sess, (gm.attack(sess, data_test, batch_size=500), data_test[1]))

    sess.close()

    res = collections.OrderedDict([('Train', train_acc), ('Test', test_acc), ('Adv', adv_acc)])

    savefile = logdir.replace('tf_logs', 'results/accuracy')[:-1] + '.pickle'
    savedir = '/'.join(savefile.split('/')[:-1])

    if not os.path.exists(savedir):
        os.makedirs(savedir)

    save(res, savefile)


## ROAR KAR

In [None]:
def random_remove(images, percentile, keep=False):
    images = np.copy(images)

    mask = np.random.binomial(1, (100 - percentile) / 100, size=images.shape[:-1])

    if keep:
        images[mask == 1] = cifar_mean
    else:
        images[mask == 0] = cifar_mean

    return images


def remove(images, attributions, percentile, keep=False, random=False):
    '''
    images       : tensor of shape [N,H,W,C]
    attributions : tensor of shape [N,H,W]
    percentile   : scalar between 0 and 100, inclusive
    keep         : if true keep q percent; otherwise remove q percent
    '''

    images = np.copy(images)

    thresholds = np.percentile(attributions, 100 - percentile, axis=(1, 2), keepdims=True)

    if keep:
        images[attributions < thresholds] = cifar_mean
    else:
        images[attributions > thresholds] = cifar_mean

    return images


def occlude_dataset(DNN, de, attribution, loss, percentiles, test=False, keep=False, random=False, batch_size=1000,
                    savedir='./'):
    if test:
        Xs = cifar_test
        ys = labels_test
    else:
        Xs = cifar
        ys = labels

    total_batch = math.ceil(len(Xs) / batch_size)

    if not random:

        hmaps = []

        for i in tqdm(range(total_batch)):

            batch_xs = Xs[i * batch_size:(i + 1) * batch_size]
            batch_xs_scaled = scale(batch_xs)

            if 'edge' in attribution:
                attrs = Canny(batch_xs)
            else:
                attrs = preprocess(de.explain(attribution, loss, DNN.X, batch_xs_scaled), 0, 100, use_abs=True)

            # Add small random noise so np.percentile works correctly
            attrs += np.random.uniform(low=0.0, high=1e-8, size=attrs.shape)

            hmaps.append(attrs)

        hmaps = np.concatenate(hmaps, axis=0)

    for percentile in tqdm(percentiles):

        dataset = []

        for i in range(total_batch):

            batch_xs, batch_ys = Xs[i * batch_size:(i + 1) * batch_size], ys[i * batch_size:(i + 1) * batch_size]

            if random:
                occluded_images = random_remove(batch_xs, percentile, keep)
            else:
                batch_attrs = hmaps[i * batch_size:(i + 1) * batch_size]
                occluded_images = remove(batch_xs, batch_attrs, percentile, keep)

            dataset.append(scale(occluded_images))

        save(np.concatenate(dataset, axis=0),
             savedir + '{}_{}_{}.pickle'.format('test' if test else 'train', attribution, percentile))


def eval_roar_kar(logdir, keep, train_only=False):
    def get_savedir():

        savedir = logdir.replace('tf_logs', 'KAR' if keep else 'ROAR')

        if not os.path.exists(savedir):
            os.makedirs(savedir)

        return savedir

    percentiles = [10, 30, 50, 70, 90]

    attribution_methods = [
        ('Random', 'zero'),
        ('Saliency', 'saliency'),
        ('Grad * Input', 'grad*input'),
    ]

    attribution_methods = collections.OrderedDict(attribution_methods)

    if not train_only:

        tf.reset_default_graph()

        sess = tf.InteractiveSession()

        with DeepExplain(session=sess, graph=sess.graph) as de:

            DNN = CIFAR_CNN(logdir, activation=tf.nn.relu)
            DNN.load(sess)

            loss = tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=tf.stop_gradient(tf.one_hot(DNN.yi, depth=DNN.n_classes)), logits=DNN.logits)

            for k, v in attribution_methods.items():
                batch_size = 2500
                occlude_dataset(DNN, de, v, loss, percentiles, False, keep, k == 'Random', batch_size, get_savedir())
                occlude_dataset(DNN, de, v, loss, percentiles, True, keep, k == 'Random', batch_size, get_savedir())

        sess.close()

    ress = collections.OrderedDict([(k, []) for k in attribution_methods.keys()])

    for _ in range(3):

        for k, v in attribution_methods.items():

            res = []

            for p in percentiles:
                occdir = get_savedir() + '{}_{}_{}.pickle'.format('{}', v, p)
                data_train = (unpickle(occdir.format('train')), labels)
                data_test = (unpickle(occdir.format('test')), labels_test)

                tf.reset_default_graph()

                DNN = CIFAR_CNN('tf_logs/exp3/ROAR_KAR/', activation=tf.nn.relu)

                sess = tf.InteractiveSession()
                sess.run(tf.global_variables_initializer())

                trainer = Trainer(sess, DNN, data_train)
                trainer.train(20, p_epochs=30)

                acc = DNN.evaluate(sess, data_test)

                print('{}{} | Accuracy : {:.5f}'.format(k, p, acc))

                res.append(acc)

                sess.close()

            ress[k].append(res)

    res_mean = {k: np.mean(v, axis=0) for k, v in ress.items()}

    savefile = logdir.replace('tf_logs', 'results/{}'.format('kar' if keep else 'roar'))[:-1] + '.pickle'
    savedir = '/'.join(savefile.split('/')[:-1])

    print('Saving results at ' + savefile)

    if not os.path.exists(savedir):
        os.makedirs(savedir)

    if not os.path.isfile(savefile):

        save(res_mean, savefile)

    else:

        res_prev = unpickle(savefile)

        for k, v in res_mean.items():
            res_prev[k] = v

        save(res_prev, savefile)


## Train

In [None]:
def logdir_maker(attack_params):
    return 'tf_logs/exp3/adv/GM/{}/'.format('_'.join([str(v) for k, v in attack_params]))


def run(attack_params=None, train_first=True, run_roar_kar=True, keep=False, rk_train_only=False):
    if attack_params:
        logdir = logdir_maker(attack_params)
    else:
        logdir = 'tf_logs/exp3/standard/'

    print(logdir)

    if train_first:
        tf.reset_default_graph()

        DNN = CIFAR_CNN(logdir, activation=tf.nn.relu, attack=GM, attack_params=dict(attack_params))

        sess = tf.InteractiveSession()
        sess.run(tf.global_variables_initializer())

        trainer = Trainer(sess, DNN, data_train)
        trainer.train(n_epochs=20, p_epochs=5)

        sess.close()

        eval_accuracy(logdir)

    if run_roar_kar: eval_roar_kar(logdir, keep, rk_train_only)


In [None]:
run(train_first=False, run_roar_kar=True, keep=False)
run(train_first=False, run_roar_kar=True, keep=True)

In [None]:
attack_paramss = params_maker(['eps', 'step_size', 'n_steps', 'norm', 'loss_type'],
                              [[round(0.08 * i, 2) for i in range(1, 21)], [2], [40], ['2'], ['xent']])

for attack_params in attack_paramss:
    run(attack_params, train_first=False, run_roar_kar=True, keep=False)


In [None]:
attack_paramss = params_maker(['eps', 'step_size', 'n_steps', 'norm', 'loss_type'],
                              [[round(0.08 * i, 2) for i in range(6, 21)], [2], [40], ['2'], ['cw']])

for attack_params in attack_paramss:
    run(attack_params, train_first=False, run_roar_kar=True, keep=False)


In [None]:
attack_paramss = params_maker(['eps', 'step_size', 'n_steps', 'norm', 'loss_type'],
                              [[round(0.08 * i, 2) for i in range(1, 21)], [2], [40], ['2'], ['xent']])

for attack_params in attack_paramss:
    run(attack_params, train_first=False, run_roar_kar=True, keep=True)


In [None]:
attack_paramss = params_maker(['eps', 'step_size', 'n_steps', 'norm', 'loss_type'],
                              [[round(0.08 * i, 2) for i in range(1, 21)], [2], [40], ['2'], ['cw']])

for attack_params in attack_paramss:
    run(attack_params, train_first=False, run_roar_kar=True, keep=True)


In [None]:
attack_paramss = params_maker(['eps', 'step_size', 'n_steps', 'norm', 'loss_type'],
                              [[round(0.002 * i, 3) for i in range(1, 21)], [0.02], [40], ['inf'], ['xent']])

for attack_params in attack_paramss:
    run(attack_params, train_first=False, run_roar_kar=True, keep=False)


In [None]:
attack_paramss = params_maker(['eps', 'step_size', 'n_steps', 'norm', 'loss_type'],
                              [[round(0.002 * i, 3) for i in range(8, 21)], [0.02], [40], ['inf'], ['cw']])

for attack_params in attack_paramss:
    run(attack_params, train_first=False, run_roar_kar=True, keep=False)


In [None]:
attack_paramss = params_maker(['eps', 'step_size', 'n_steps', 'norm', 'loss_type'],
                              [[round(0.002 * i, 3) for i in range(1, 21)], [0.02], [40], ['inf'], ['xent']])

for attack_params in attack_paramss:
    run(attack_params, train_first=False, run_roar_kar=True, keep=True)


In [None]:
attack_paramss = params_maker(['eps', 'step_size', 'n_steps', 'norm', 'loss_type'],
                              [[round(0.002 * i, 3) for i in range(1, 21)], [0.02], [40], ['inf'], ['cw']])

for attack_params in attack_paramss:
    run(attack_params, train_first=False, run_roar_kar=True, keep=True)
