# Task 1
"Generate adversarial examples in the context of the zero-knowledge threat model."

In [1]:
import os
import sys
from pathlib import Path

In [2]:
# Set up proper directory paths

project_path = Path().absolute().parent
src_path = project_path.joinpath("src")

# Ensure the paths are properly assigned
# If this assertion fails, change project_dir as needed to become the project directory
# If project_dir is correct, change the name in the assertion check
assert project_path.name == "project-athena", "Parent directory name assertion failed (check the path)"

In [3]:
# Add src_dir to module paths
if str(src_path) not in sys.path:
    sys.path.append(str(src_path))

In [4]:
from datetime import time

import numpy as np

In [5]:
from attacks.attack import generate
from models.athena import ENSEMBLE_STRATEGY, Ensemble
from utils.data import subsampling
from utils.file import load_from_json
from utils.metrics import error_rate, get_corrections
from utils.model import load_lenet, load_pool

In [6]:
def generate_ae(model, data, labels, attack_configs, save=False, output_dir=None, device=None):
    """
    Generate adversarial examples for a model
    """
    
    print("Generating adversarial examples")
    
    img_rows, img_cols = data.shape[1], data.shape[2]
    num_attacks = attack_configs.get("num_attacks")
    data_loader = (data, labels)

    if len(labels.shape) > 1:
        labels = np.asarray([np.argmax(p) for p in labels])

    # Generate attacks
    for attack_id in range(num_attacks):
        key = f"configs{attack_id}"
        
        data_adv = generate(model=model,
                            data_loader=data_loader,
                            attack_args=attack_configs[key],
                            device=device)
        
        # Evaluate adversarial examples
        predictions = model.predict(data_adv)
        predictions = np.asarray([np.argmax(p) for p in predictions])

        err = error_rate(y_pred=predictions, y_true=labels)
        
        print(f">>> error rate: {err}")

        # Save adversarial examples
        if save:
            if output_dir is None:
                raise ValueError("Cannot save images to a none path.")
            
            file_path = os.path.join(output_dir, f"{time.monotonic()}.npy")
            np.save(file, data_adv)
            
            print(f"Saved the adversarial example to file [{file_path}]")
    
    print("Done generating adversarial examples")

In [21]:
def evaluate(trans_configs, model_configs, data_configs,
             save=False, output_dir=None):
    """
    Apply transformation(s) on images.
    :param trans_configs: dictionary. The collection of the parameterized transformations to test.
        in the form of
        { configsx: {
            param: value,
            }
        }
        The key of a configuration is 'configs'x, where 'x' is the id of corresponding weak defense.
    :param model_configs:  dictionary. Defines model related information.
        Such as, location, the undefended model, the file format, etc.
    :param data_configs: dictionary. Defines data related information.
        Such as, location, the file for the true labels, the file for the benign samples,
        the files for the adversarial examples, etc.
    :param save: boolean. Save the transformed sample or not.
    :param output_dir: path or str. The location to store the transformed samples.
        It cannot be None when save is True.
    :return:
    """
    
    # Load baseline defense (PGD-ADT model)
    baseline = load_lenet(file=model_configs['pgd_trained'],
                          trans_configs=None,
                          use_logits=False,
                          wrap=False)

    # Load undefended model (UM)
    file = project_path.joinpath(model_configs['dir'], model_configs['um_file'])
    undefended = load_lenet(file=file,
                            trans_configs=trans_configs.get('configs0'),
                            wrap=True)
    
    print(f">>> UM: {type(undefended)}")

    # Load weak defenses into a pool
    pool, _ = load_pool(trans_configs=trans_configs,
                        model_configs=model_configs,
                        active_list=True,
                        wrap=True)
    
    # Create an AVEP ensemble from the WD pool
    wds = list(pool.values())
    ensemble = Ensemble(classifiers=wds, strategy=ENSEMBLE_STRATEGY.AVEP.value)
    
    print(f">>> wds: {type(wds)} {type(wds[0])}")

    # Load benign samples
    bs_file = project_path.joinpath(data_configs['dir'], data_configs['bs_file'])
    x_bs = np.load(bs_file)
    img_rows, img_cols = x_bs.shape[1], x_bs.shape[2]

    # Load true labels
    label_file = project_path.joinpath(data_configs['dir'], data_configs['label_file'])
    labels = np.load(label_file)

    print(f">>> Evaluating UM on [{bs_file}]")
    
    # Get indices of benign samples that are correctly classified by the targeted model
    pred_bs = undefended.predict(x_bs)
    corrections = get_corrections(y_pred=pred_bs, y_true=labels)

    # Evaluate adversarial examples
    results = {}
    
    ae_list = data_configs.get('ae_files')
    ae_file = project_path.joinpath(data_configs['dir'], ae_list[4])
    x_adv = np.load(ae_file)
    
    print(f">>> Evaluating UM on [{ae_file}]")

    # Evaluate undefended model on adversarial examples
    pred_adv_um = undefended.predict(x_adv)
    err_um = error_rate(y_pred=pred_adv_um, y_true=labels, correct_on_bs=corrections)
    results['UM'] = err_um
    
    print(f">>> Evaluating ensemble on [{ae_file}]")

    # Evaluate ensemble model on adversarial examples
    pred_adv_ens = ensemble.predict(x_adv)
    err_ens = error_rate(y_pred=pred_adv_ens, y_true=labels, correct_on_bs=corrections)
    results['Ensemble'] = err_ens

    # Evaluate baseline model on adversarial examples
    print(">>> Evaluating baseline model on [{}], it may take a while...".format(ae_file))
    pred_adv_bl = baseline.predict(x_adv)
    err_bl = error_rate(y_pred=pred_adv_bl, y_true=labels, correct_on_bs=corrections)
    results['PGD-ADT'] = err_bl

    # TODO: collect and dump the evaluation results to file(s) such that you can analyze them later.
    print(f">>> Evaluations on [{ae_file}]:\n{results}")

In [8]:
# Load data configs
file = src_path.joinpath("configs/demo/data-mnist.json")
data_configs = load_from_json(file)
output_root = project_path.joinpath("results")

# Load model configs
model_configs = load_from_json(src_path.joinpath("configs/demo/model-mnist.json"))
attack_configs = load_from_json(src_path.joinpath("configs/demo/attack-zk-mnist.json"))

# Fix configs
# i.e. remove relative paths
data_configs['dir'] = str(project_path.joinpath("data"))
data_configs['sub_dir'] = str(project_path.joinpath("results"))

model_configs['dir'] = str(project_path.joinpath("models/cnn"))
model_configs['pgd_trained'] = str(project_path.joinpath("models/baseline", Path(model_configs['pgd_trained']).name))

In [9]:
# Load benign samples
file = project_path.joinpath(data_configs['dir'], data_configs['bs_file'])
X_bs = np.load(file)

In [10]:
# Load true labels
file =  project_path.joinpath(data_configs['dir'], data_configs['label_file'])
labels = np.load(file)

In [11]:
# Load model
model_file = project_path.joinpath(model_configs['dir'], model_configs['um_file'])
target = load_lenet(file=model_file, wrap=True)

>>> Loading model [D:\GitHub\project-athena\models\cnn\model-mnist-cnn-clean.h5]...


In [12]:
# Load the benign samples
# TODO: Is this redundant duplicate code?
data_file = project_path.joinpath(data_configs['dir'], data_configs['bs_file'])
data_bs = np.load(data_file)

In [13]:
# Load true labels
label_file = project_path.joinpath(data_configs['dir'], data_configs['label_file'])
labels = np.load(label_file)

In [14]:
# Generate adversarial examples
data_bs = data_bs[:5]
labels = labels[:5]

generate_ae(model=target, data=data_bs,labels=labels, attack_configs=attack_configs, device="cuda")

Generating adversarial examples
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
>>> error rate: 0.0
>>> error rate: 0.0
>>> error rate: 0.0
>>> error rate: 0.0
>>> error rate: 0.2
>>> error rate: 0.2
>>> error rate: 0.6
>>> error rate: 0.8
>>> error rate: 0.8
>>> error rate: 1.0
>>> error rate: 1.0
>>> error rate: 1.0
>>> error rate: 1.0
>>> error rate: 1.0
>>> error rate: 1.0
Done generating adversarial examples


In [22]:
# Load configs
file = src_path.joinpath("configs/demo/athena-mnist.json")
trans_configs = load_from_json(file)

output_dir = project_path.joinpath("results")

output_dir.mkdir(exist_ok=True)

# Evaluate model
evaluate(trans_configs=trans_configs,
         model_configs=model_configs,
         data_configs=data_configs,
         save=False,
         output_dir=output_root)

>>> Loading model [D:\GitHub\project-athena\models\baseline\advTrained-mnist-adtC.h5]...
>>> Loading model [D:\GitHub\project-athena\models\cnn\model-mnist-cnn-clean.h5]...
>>> UM: <class 'models.keras.WeakDefense'>
>>> Loading model [D:\GitHub\project-athena\models\cnn\model-mnist-cnn-flip_horizontal.h5]...
>>> Loading model [D:\GitHub\project-athena\models\cnn\model-mnist-cnn-affine_both_stretch.h5]...
>>> Loading model [D:\GitHub\project-athena\models\cnn\model-mnist-cnn-morph_gradient.h5]...
>>> Loaded 3 models.
>>> wds: <class 'list'> <class 'models.keras.WeakDefense'>
>>> Evaluating UM on [D:\GitHub\project-athena\data\test_BS-mnist-clean.npy]
>>> Evaluating UM on [D:\GitHub\project-athena\data\test_AE-mnist-cnn-clean-fgsm_eps0.3.npy]
>>> Evaluating ensemble on [D:\GitHub\project-athena\data\test_AE-mnist-cnn-clean-fgsm_eps0.3.npy]
>>> Evaluating baseline model on [D:\GitHub\project-athena\data\test_AE-mnist-cnn-clean-fgsm_eps0.3.npy], it may take a while...
>>> Evaluations on [D