This notebook tries to extend the results from the adversarial spheres paper: https://arxiv.org/pdf/1801.02774.pdf

In particular, instead of generating from spheres, we generate from Gaussians
(with the hope that the high dimensionality makes them effectively spheres).

Additionally, study the influence of the scale of the samples.
We study when the samples have norm $O(1)$ (paper) and $O(\sqrt{D})$ (real life).

### Import dependencies

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from art.estimators.classification import TensorFlowV2Classifier
from art.attacks.evasion import ProjectedGradientDescent

tf.config.list_physical_devices(device_type=None)

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU')]

### Data generation utils

In [2]:
def generate_normal_data(dim, s1, s2, num_samples):
    assert num_samples % 2 == 0
    
    xs = np.concatenate([
        np.random.normal(scale=s1, size=(num_samples // 2, dim)),
        np.random.normal(scale=s2, size=(num_samples // 2, dim))
    ])
    
    ys = np.concatenate([
        np.zeros(shape=num_samples // 2, dtype=np.int64),
        np.ones(shape=num_samples // 2, dtype=np.int64)
    ])
    
    perm = np.random.permutation(num_samples)
    xs = xs[perm]
    ys = ys[perm]
    
    return xs, ys

generate_normal_data(dim=2, s1=0.3, s2=0.7, num_samples=4)

(array([[-0.35991261, -0.92676281],
        [ 0.90548135, -0.64958558],
        [ 0.06259854, -0.20744439],
        [-0.44962108,  0.43866714]]),
 array([1, 1, 0, 0]))

### Experiment setup

In [3]:
def run_experiment(
    dim,
    s1,
    s2,
    model_layers,
    train_batch_size,
    train_epochs,
    attack_norm,
    attack_eps,
    attack_eps_step,
    attack_iters,
    attack_batch_size=1024,
    num_train=5 * 10 ** 4,
    num_test=10 ** 4,
    seed=42
):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
    x_train, y_train = generate_normal_data(
        dim=dim, s1=s1, s2=s2,
        num_samples=num_train,
    )

    x_test, y_test = generate_normal_data(
        dim=dim, s1=s1, s2=s2,
        num_samples=num_test,
    )
    
    model = keras.Sequential(
        [keras.Input(shape=x_train[0].shape)]
        + model_layers
    )
    model.compile(
        loss="SparseCategoricalCrossentropy",
        optimizer="adam",
        metrics=["accuracy"]
    )
    model.fit(
        x_train,
        y_train,
        batch_size=train_batch_size,
        epochs=train_epochs,
        validation_split=0.1,
        verbose=1,
    )
    
    art_model = TensorFlowV2Classifier(
        model=model,
        input_shape=x_test[0].shape,
        nb_classes=2,
        loss_object=keras.losses.SparseCategoricalCrossentropy(),
        clip_values=[-x_test.max(), x_test.max()],
    )
    art_attack = ProjectedGradientDescent(
        art_model,
        norm=attack_norm,
        eps=attack_eps,
        eps_step=attack_eps_step,
        max_iter=attack_iters,
        batch_size=attack_batch_size,
    )
    
    x_train_adv = art_attack.generate(x_train[:num_test], y=y_train[:num_test])    
    x_test_adv = art_attack.generate(x_test, y=y_test)
    
    _, nat_train_acc = model.evaluate(x_train, y_train, verbose=0)
    _, nat_test_acc = model.evaluate(x_test, y_test, verbose=0)
    _, adv_train_acc = model.evaluate(x_train_adv, y_train[:num_test], verbose=0)
    _, adv_test_acc = model.evaluate(x_test_adv, y_test, verbose=0)
    
    print("Nat train acc :", nat_train_acc)
    print("Nat test  acc :", nat_test_acc)
    print("Adv train acc :", adv_train_acc)
    print("Adv test  acc :", adv_test_acc)
    
    return dict(
        model=model,
        train_data=(x_train, y_train),
        test_data=(x_test, y_test),
        x_train_adv=x_train_adv,
        x_test_adv=x_test_adv,
    )

In [4]:
D = 500
res = run_experiment(
    dim=D,
    s1=1.0, s2=1.3,
    model_layers=[
        layers.Dense(1000, activation="relu"),
        layers.Dense(1000, activation="relu"),
        layers.Dense(2, activation="softmax"),
    ],
    train_batch_size=128,
    train_epochs=10,
    attack_norm=2,
    attack_eps=0.18 * np.sqrt(D),
    attack_eps_step=0.01 * np.sqrt(D),
    attack_iters=40,
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


PGD - Batches: 0it [00:00, ?it/s]

PGD - Batches: 0it [00:00, ?it/s]

Nat train acc : 0.9826599955558777
Nat test  acc : 0.8935999870300293
Adv train acc : 0.17970000207424164
Adv test  acc : 0.05640000104904175


In [5]:
D = 500
res = run_experiment(
    dim=D,
    s1=1.0 / np.sqrt(D), s2=1.3 / np.sqrt(D),
    model_layers=[
        layers.Dense(1000, activation="relu"),
        layers.Dense(1000, activation="relu"),
        layers.Dense(2, activation="softmax"),
    ],
    train_batch_size=128,
    train_epochs=10,
    attack_norm=2,
    attack_eps=0.18,
    attack_eps_step=0.01,
    attack_iters=40,
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


PGD - Batches: 0it [00:00, ?it/s]

PGD - Batches: 0it [00:00, ?it/s]

Nat train acc : 0.9978600144386292
Nat test  acc : 0.9851999878883362
Adv train acc : 0.37929999828338623
Adv test  acc : 0.30160000920295715


In [6]:
D = 500
res = run_experiment(
    dim=D,
    s1=1.0, s2=1.3,
    model_layers=[
        layers.Dense(1000, activation="relu"),
        layers.BatchNormalization(),
        layers.Dense(1000, activation="relu"),
        layers.Dense(2, activation="softmax"),
    ],
    train_batch_size=128,
    train_epochs=10,
    attack_norm=2,
    attack_eps=0.18 * np.sqrt(D),
    attack_eps_step=0.01 * np.sqrt(D),
    attack_iters=40,
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


PGD - Batches: 0it [00:00, ?it/s]

PGD - Batches: 0it [00:00, ?it/s]

Nat train acc : 0.9947599768638611
Nat test  acc : 0.9749000072479248
Adv train acc : 0.4108999967575073
Adv test  acc : 0.3253999948501587


### Conclusions

A vanilla feed-forward NN does worse on norm-$O(\sqrt{D})$ data than norm-$O(1)$ data
(90% test acc vs 99% test acc).
However, this difference can be mitigated with a batchnorm layer.

A feed-forward NN w/ batchnorm on Gaussian data behaves very similarly to the spherical case.