# Adversarial Perturbations straight on JPEG Coefficients: Tutorial

## Initialization

In [None]:
from adversarial_attacks.config.config import Config
Config.VERBOSE = 0
Config.RECONSTRUCT_ORIGINAL_DATASETS = False # Set to True if original datasets should be recreated and not loaded from file
Config.RECONSTRUCT_ADVERSARIAL_DATASETS = False # Set to True if adversarial datasets should be recreated and not loaded from file
#Config.LOG_FILE = None  # Uncomment to show logs in the notebook

In [None]:
import adversarial_attacks.main
adversarial_attacks.main.init(use_cpu=False, run_eagerly=False, gpu_nrs=[0], tf_strategy='mirrored') 
# gpu_nrs allows to set which gpus to use; tf_strategy is used for (adversarial) training, options are mirrored or default; set use_cpu=True to run on cpu

In [None]:
import adversarial_attacks.utils.visualization as vis_utils
import adversarial_attacks.utils.general as general_utils
import adversarial_attacks.utils.transformation as transformation_utils
import adversarial_attacks.utils.jpeg
from adversarial_attacks.datasets.original import RGBDataset, JpegDataset, YCbCrDataset

from tqdm.auto import tqdm
from copy import deepcopy
import tensorflow as tf

## Datasets

The data can be loaded as RGB data, or JPEG data, or YCbCr Pixel data.
The attacks themself use RGB data as input, which is then converted to e.g. JPEG data within the attacks call function.

In [None]:
ds = 'cifar10' # 'imagenet' or 'cifar10' 

In [None]:
rgb_dataset = RGBDataset(ds, 'validation' if ds == 'imagenet' else 'test', shuffle=False, repeat=False, number_of_images=None) # number_of_images=None -> full dataset

We visualize two batches of size 8.

In [None]:
for images, labels in rgb_dataset.ds.batch(8).take(2): # rgb_dataset.ds is a tf.data.Dataset
    vis_utils.show_multiple_images(images)

For a JpegDataset, the entries look as follows:

In [None]:
jpeg_dataset = JpegDataset(ds, 'validation' if ds == 'imagenet' else 'test', shuffle=False, repeat=False, number_of_images=None, jpeg_quality=100, chroma_subsampling=False) # number_of_images=None -> full dataset
for (Y, Cb, Cr), labels in jpeg_dataset.ds.batch(8).take(1):
    print(tf.shape(Y), tf.shape(Cb), tf.shape(Cr))

In [None]:
jq = 100
chroma_subsampling = False

jpeg_dataset = JpegDataset(ds, 'validation' if ds == 'imagenet' else 'test', shuffle=False, repeat=False, number_of_images=None, jpeg_quality=jq, chroma_subsampling=chroma_subsampling) # number_of_images=None -> full dataset
for (Y, Cb, Cr), labels in jpeg_dataset.ds.batch(8).take(1):
    rgb = transformation_utils.jpeg_to_rgb_batch((Y, Cb, Cr), ds, jpeg_quality=jq, chroma_subsampling=chroma_subsampling)
    vis_utils.show_multiple_images(rgb)

## Models and (Adversarial) Training

You can either load one of our models our define a new one. To load an existing tf model, use the adversarial_attacks.models.models.Model class.
The model must then be saved under {Config.MODEL_PATH}/{ds}/{load_model_name}.

In [None]:
from adversarial_attacks.models.models import Model 

In [None]:
load_model_name, save_model_name = 'Resnet', 'Resnet_Test'
m = Model(ds, load_model_name=load_model_name, save_model_name=save_model_name)

This model can then be trained with the original dataset using the train_original_ds function. All our models expect RGB input. Thus, the RGB dataset is used.
The model will be saved automatically under save_model_name if the val_loss is improved.

In [None]:
ds_train, ds_test = RGBDataset(ds, 'train', shuffle=True, repeat=True), RGBDataset(ds, 'validation' if ds == 'imagenet' else 'test', shuffle=True, repeat=True)
#m.train_original_ds(ds_train, ds_test, batch_size=8, epochs=1, optimizer_lambda=lambda: tf.keras.optimizers.legacy.SGD(0.1, momentum=0.9, decay=0.0001))

An own model can best be defined by inheriting from the adversarial_attacks.models.models.Model class. 
Make sure that the Model expects 0...255 input and returns logits. In our models, we included tensorflow's preproccess input function in the model.
In this case, we use a model predefined and pretrained in Tensorflow. 

In [None]:
class TutorialModel(Model):
    def __init__(self, save_model_name='tutorial_model', load_model_name=None, save_model=False, jpeg_compression_quality=None):
        """
        When load_model_name is not None, the save model will be used instead of building a new model.
        If jpeg_compression_quality is not None, JPEG compression will be added to the start of the model. Note that this is not differentiable.
        """
        super().__init__(dataset_name='imagenet', save_model_name=save_model_name, load_model_name=load_model_name, save_model=save_model, jpeg_compression_quality=jpeg_compression_quality)
        
    def build_model(self):
        inp = tf.keras.Input(shape=(224, 224, 3))
        preprocessed_input = inp
        if self.jpeg_compression_quality is not None:
            preprocessed_input = jpeg_compression_for_rgb_model(self.ds_name, preprocessed_input,
                                                                self.jpeg_compression_quality)
        
        preprocessed_input = tf.keras.applications.efficientnet.preprocess_input(preprocessed_input)
        tf_inception_v3 = tf.keras.applications.efficientnet.EfficientNetB0(include_top=True, weights='imagenet',
                                                                         input_shape=(224, 224, 3),
                                                                         classifier_activation=None,
                                                                         input_tensor=preprocessed_input)
        return tf_inception_v3
        

In [None]:
m = TutorialModel(save_model_name='tutorial_model')

The model can also be adversarially trained. Every Combination of our attacks can be used. Here, we used two JPEG and one RGB attack, all are weighted equally.

In [None]:
from adversarial_attacks.attacks.rgb import RGBBIM
from adversarial_attacks.attacks.jpeg import JpegBIM
from adversarial_attacks.utils.frequency_masks import lambdas

lambda_unmasked = lambdas['unmasked'][0]
lambda_medium = lambdas['medium'][0]

In [None]:
chroma_subsampling = False
fix_zero=False
jq = 100

dynamic_attacks = {
    JpegBIM(ds, None, model=m, eps_Y = 0.9, eps_Cb = 0.9, eps_Cr=0.9, alpha_Y=0.9/4., alpha_Cb=0.9/4, alpha_Cr=0.9/4., T=7, lambda_Y=lambda_medium, lambda_Cb=lambda_medium, lambda_Cr=lambda_medium, chroma_subsampling=chroma_subsampling, fix_zero_coefficients=fix_zero, jpeg_quality=jq, random_start=True): 1., 
    JpegBIM(ds, None, model=m, eps_Y = 0.4, eps_Cb = 0.4, eps_Cr=0.4, alpha_Y=0.4/4., alpha_Cb=0.4/4, alpha_Cr=0.4/4., T=7, lambda_Y=lambda_unmasked, lambda_Cb=lambda_unmasked, lambda_Cr=lambda_unmasked, chroma_subsampling=chroma_subsampling, fix_zero_coefficients=fix_zero, jpeg_quality=jq, random_start=True): 1.,
    RGBBIM(ds, None, model=m, epsilon=8., alpha=2., T=7): 1.
}

In [None]:
from adversarial_attacks.datasets.adversarial_training_datasets import AdversarialTrainingDataset

batch_size = 10
ds_train_rgb = adversarial_attacks.datasets.original.RGBDataset(ds, 'train', augmentation=0, shuffle=False, cache=False)
ds_test_rgb = adversarial_attacks.datasets.original.RGBDataset(ds, 'validation' if ds == 'imagenet' else 'test', shuffle=False, cache=False)
adversarial_ds_train = AdversarialTrainingDataset(ds_train_rgb, dynamic_attacks, batch_size, shuffle=True)
adversarial_ds_test = AdversarialTrainingDataset(ds_test_rgb, dynamic_attacks, batch_size, shuffle=False)

In [None]:
m.train_adversarial_ds(adversarial_ds_train, adversarial_ds_test, epochs=2, optimizer_lambda=lambda: tf.keras.optimizers.RMSprop(1e-3))

## Attacks and Experiments

Now, we will show some examples of attacked images and then, how success rates and perceptual distances can be measured for experiments.
When only executing the attack on few images, it should be quicker to enable eager execution in the initialization.

In [None]:
from adversarial_attacks.attacks.rgb import RGBBIM
from adversarial_attacks.attacks.jpeg import JpegBIM
from adversarial_attacks.utils.frequency_masks import lambdas


ds = 'imagenet' # 'imagenet', 'cifar10'
number_of_images = 200 # 10000 is used in the paper's experiments

source_model = 'Resnet'

lambda_medium = lambdas['medium'][0] # the medium vector that is also used in the paper

### Sample Images

First, we will show some sample images for our JPEG luma medium and the RGB attack (both BIM). Below the images, we also compute the LPIPS distance.

In [None]:
rgb_ds = RGBDataset(ds, train_or_test='test' if ds == 'cifar10' else 'validation', shuffle=False, repeat=False, number_of_images=None) # 
images, labels = next(rgb_ds.ds.batch(8).__iter__())

In [None]:
from adversarial_attacks.models.lpips import LossNetwork

ln = LossNetwork(ds, lpips=True) # is already trained and uses a vgg16 net

In [None]:
vis_utils.show_multiple_images(images)

We start with the RGB attack.

For both the RGB and the JPEG attack, we use a very high epsilon value such that the difference in the structure of the perturbations becomes clear. 
The amount of perturbation is also not comparable between the two attacks.
So, it does not say something about the attacks' efficiency.

In [None]:
example_attack = RGBBIM(dataset=ds, model_name=source_model, epsilon=64., T=10)

In [None]:
adv_images = example_attack(images, labels)
vis_utils.show_multiple_images(adv_images)

In [None]:
ln(images, adv_images)

Now, the JPEG attack follows.

In [None]:
example_attack = JpegBIM(dataset=ds, model_name=source_model, eps_Y=10., eps_Cb=0., eps_Cr=0., lambda_Y=lambda_medium, T=10, chroma_subsampling=False)

In [None]:
adv_images = example_attack(images, labels)
vis_utils.show_multiple_images(adv_images)

In [None]:
ln(images, adv_images)

### Experiments

This section will be on our Experiments. As explained in the paper, we incrementally increase the input parameter (perturbation bound - epsilon), and measure both the perceptual distance (LPIPS, but also CIEDE2000 $L_2$, RGB $L_2$) and the attack's success rate (as well as the nets accuracy and crossentropy loss). 

First, we define the dataset.

In [None]:
rgb_ds = RGBDataset(ds, train_or_test='test' if ds == 'cifar10' else 'validation', shuffle=False, repeat=False, number_of_images=number_of_images) 

Now, we define a set of evaluation metrics and distance metrics.

In [None]:
# First, we define (target-) models on which the adversarial examples should be evaluated 
# In this example, we only use an undefended model and one defended with JPEG compression at inference time.
model_names_for_evaluation = ['Densenet', 'Densenet_Compression_75'] # make sure that each tf model expects 0...255 input and is saved in {Config.MODEL_PATH}/{ds}/{model_name}


# define evaluation_metrics that will measure the success rate etc on a given model
from adversarial_attacks.utils.evaluation_metrics import EvaluationMetricCollection
evaluation_metrics = {model_name: EvaluationMetricCollection(ds, model_name) for model_name in model_names_for_evaluation}

# define distance_metric that will measure the perceptual distances
from adversarial_attacks.utils.distance_metrics import ExperimentDistanceWrapper
distance_metrics = ExperimentDistanceWrapper(ds)

Now, we define a dictionary of attacks. Each entry consists of a list of attacks, with different parameters, e.g. epsilons.

In [None]:
attack_dict = {
 'rgb_bim': [RGBBIM(dataset=ds, model_name=source_model, epsilon=eps, T=10) for eps in [2., 8., 16., 32.]],
 'jpeg_luma_medium_bim':  [JpegBIM(dataset=ds, model_name=source_model, eps_Y=eps, eps_Cb=0., eps_Cr=0., lambda_Y=lambda_medium, T=10, fix_zero_coefficients=True) for eps in [1., 3., 5.]]
}

In [None]:
batch_size = 10
distance_metrics_results, attack_results = {}, {} # sucess rates and distances will be saved here

import math
n_batches = math.ceil(number_of_images/batch_size) # compute the number of batches

for attack_name in tqdm(attack_dict.keys(), desc='Iterating attack names...'):
    distance_metrics_results[attack_name] = []
    attack_results[attack_name] = []
    
    for attack in tqdm(attack_dict[attack_name], desc=f'Iterating {attack_name} attacks...'):
        for images, labels in tqdm(rgb_ds.ds.batch(batch_size), total=n_batches, leave=False, desc='Attacking Batches...'):
            adv_images = attack(images, labels) # execute the attack
            distance_metrics.update_state(images, adv_images) # measure and update the perceptual distances
            for evaluation_metric in evaluation_metrics.values():
                evaluation_metric.update_state(labels, images, adv_images) # for every target model, evaluate the images: measure success rate, accuracy and crossentropy loss
        attack_results[attack_name].append({model_name: evaluation_metric.result() for model_name, evaluation_metric in evaluation_metrics.items()}) # write the attack evaluation to the attack_results dict
        for evaluation_metric in evaluation_metrics.values():
            evaluation_metric.reset_state() # reset the evaluation metrics
        distance_metrics_results[attack_name].append(distance_metrics.result()) # write distances to the distance_metrics_results dict
        distance_metrics.reset_state() # reset the distance metrics

In [None]:
def get_results_metric(data, attack_name, metric, model):
    res = []
    for entry in data[attack_name]:
        res.append(entry[model][metric])
    return res

def get_distance_metric(data, attack_name, metric, norm='l2'):
    res = []
    for entry in data[attack_name]:
        if metric == 'perceptual':
            res.append(entry[metric]['avg'])
        else:
            res.append(entry[metric][norm]['avg'])
    return res

We now plot the success rate (or Accuracy, Loss) in dependence of the perceptual distance, measured by LPIPS (or CIEDE2000, or RGB).

In [None]:
import matplotlib.pyplot as plt

In [None]:
metric = 'perceptual' # 'perceptual', 'ciede2000', 'rgb'
norm = 'l2' # ignored if metric=='perceptual'

success_metric = 'success_rate' # 'Acc', 'Loss'
target_model = 'Densenet_Compression_75'

plt.figure()

for attack_name in attack_dict:
    plt.plot(get_distance_metric(distance_metrics_results, attack_name, metric, norm), get_results_metric(attack_results, attack_name, success_metric, target_model), label=attack_name, marker='x')
    
plt.legend()

plt.title(f'Attack Efficiency on the {target_model}')
plt.xlabel('LPIPS' if metric == 'perceptual' else f'{metric} - {norm}')
plt.ylabel(success_metric)

plt.show()

The plot shows that the JPEG luma medium attack is more efficient on the Densenet_Compression_75 than the RGB BIM attack, as stated in the paper.