# Investigating CW Attack Variants Using Diversity Promoting Regularization

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

import datetime
import glob
import os

import pickle

import pandas as pd
pd.set_option('display.max_rows', None)

%matplotlib inline

from models import *
from cw_div import *
from neuron_coverage import *
from inception_score import *
from fid_score import *

%load_ext autoreload
%autoreload 2

# check if CUDA is available
device = torch.device("cpu")
if torch.cuda.is_available():
    print('CUDA is available!  Training on GPU ...')
    device = torch.device("cuda")
else:
    print('CUDA is not available.  Training on CPU ...')

CUDA is available!  Training on GPU ...


In [3]:
# # load the results from file
# with open('assets/results.pickle', 'rb') as handle:
#     results = pickle.load(handle)

In [4]:
n_epochs = 10
batch_size_train = 64
batch_size_test = 100
learning_rate = 0.01
momentum = 0.5

random_seed = 1
torch.manual_seed(random_seed)

<torch._C.Generator at 0x24b9d2d9670>

In [5]:
#  torchvision.transforms.Normalize(
#    (0.1307,), (0.3081,))

train_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('/data/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor()
                             ])),
    batch_size=batch_size_train, shuffle=True, pin_memory=True)

test_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('/data/', train=False, download=True,
                         transform=torchvision.transforms.Compose([
                           torchvision.transforms.ToTensor()
                         ])),
    batch_size=batch_size_test, shuffle=False, pin_memory=True)

# targets = true labels only for when you're doing a targeted attack
# otherwise, you're going to make the inputs easier to classify to 
# do a targeted attack, targets should be some class other than
# the true label

inputs, targets = next(iter(test_loader))

inputs = inputs.to(device)
targets = targets.to(device)

# Train or Load Pretrained Model if available

In [6]:
retrain = False
track_low_high = False

model = ConvNet().to(device)
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

# check to see if we can just load a previous model
# %mkdir models
latest_model = None
m_type = model.__class__.__name__
prev_models = glob.glob('pretrained_models/mnist/*'+ m_type +'*.pth')
if prev_models:
    latest_model = max(prev_models, key=os.path.getctime)

if (retrain is False 
    and latest_model is not None 
    and m_type in latest_model):  
    print('loading model', latest_model)
    model.load_state_dict(torch.load(latest_model))  
else:
    if track_low_high:
        model.init_dict(model.lowhigh_dict, inputs, 'relu', {'low': 0, 'high': 0})
        try:
            for epoch in range(1, n_epochs + 1):
                model.hook_lowhigh_dict('relu')
                train(model, device, train_loader, optimizer, epoch)
                model.remove_hooks()
                test(model, device, test_loader)    
        finally:
            model.remove_hooks()   
    else:
        for epoch in range(1, n_epochs + 1):
            train(model, device, train_loader, optimizer, epoch)
            acc = test(model, device, test_loader)  
    torch.save(model.state_dict(), 'pretrained_models/mnist/model_' + m_type + '_' + str(datetime.datetime.now()).replace(':','.') + '_' + str(acc) + '.pth')

loading model pretrained_models/mnist\model_ConvNet_2019-07-25 16.16.26.580052_98.82.pth


# Attack Time

| Version | Loss Function | Scaling Constant | Regularizer | Adversary Selection |
| - | - | - | - | - |
|  Baseline CW | CW |  True | L2 |  L2 |
|  Diversity v1 | CW |  True | L2, Batch Divergence | Instance Divergence |
|  Diversity v2 | CW |  False | L2, Batch Divergence | Instance Divergence |
|  Diversity v3 | Cross Entropy |  False | L2, Batch Divergence | Instance Divergence |
|  Diversity v4 | CW |  True | L2, Batch Divergence | L2 |

In [7]:
targeted=False
confidence=0.0
c_range=(1e-3, 1e10)
search_steps=5
max_steps=1000
abort_early=True
optimizer_lr=5e-4

mean = (0.1307,) # the mean used in inputs normalization
std = (0.3081,) # the standard deviation used in inputs normalization
box = (min((0 - m) / s for m, s in zip(mean, std)),
       max((1 - m) / s for m, s in zip(mean, std)))

log_frequency = 100

## Baseline CW

| Loss Function | Scaling Constant | Regularizer | Adversary Selection |
| - | - | - | - |
|  CW |  True | L2 | L2 |

In [8]:
# def cw_attack(model, inputs, targets, device, targeted=False, norm_type='inf', epsilon=100., 
#                  confidence=0.0, c_range=(1e-3, 1e10), search_steps=5, 
#                  max_steps=1000, abort_early=True, box=(-1., 1.), 
#                  optimizer_lr=1e-2, init_rand=False, log_frequency=10):

#     batch_size = inputs.size(0)
#     num_classes = model(torch.tensor(inputs[0][None,:], requires_grad=False)).size(1)

#     ae_tol = torch.tensor(1e-4, device=device)

#     # `lower_bounds`, `upper_bounds` and `scale_consts` are used
#     # for binary search of each `scale_const` in the batch. The element-wise
#     # inquality holds: lower_bounds < scale_consts <= upper_bounds
#     lower_bounds = torch.tensor(np.zeros(batch_size), dtype=torch.float, device=device)
#     upper_bounds = torch.tensor(np.ones(batch_size) * c_range[1], dtype=torch.float, device=device)
#     scale_consts = torch.tensor(np.ones(batch_size) * c_range[0], dtype=torch.float, device=device)

#     # Optimal attack to be found.
#     # The three "placeholders" are defined as:
#     # - `o_best_norm`        : the smallest norms encountered so far
#     # - `o_best_norm_ppred`  : the perturbed predictions made by the adversarial perturbations with the smallest norms
#     # - `o_best_adversaries` : the underlying adversarial example of `o_best_norm_ppred`
#     o_best_norm = torch.tensor(np.ones(batch_size) * np.inf, dtype=torch.float, device=device)
#     o_best_norm_ppred = torch.tensor(-np.ones(batch_size), dtype=torch.float, device=device)
#     o_best_adversaries = inputs.clone()

#     # convert `inputs` to tanh-space
#     inputs_tanh = to_tanh_space(inputs)
#     targets_oh = F.one_hot(targets).float()

#     # the perturbation tensor (only one we need to track gradients on)
#     pert_tanh = torch.zeros(inputs.size(), device=device, requires_grad=True)

#     optimizer = optim.Adam([pert_tanh], lr=optimizer_lr)

#     for const_step in range(search_steps):

#         print('Step', const_step)

#         # the minimum norms of perturbations found during optimization
#         best_norm = torch.tensor(np.ones(batch_size) * np.inf, dtype=torch.float, device=device)

#         # the perturbed predictions made by the adversarial perturbations with the smallest norms
#         best_norm_ppred = torch.tensor(-np.ones(batch_size), dtype=torch.float, device=device)

#         # previous (summed) batch loss, to be used in early stopping policy
#         prev_batch_loss = torch.tensor(np.inf, device=device)

#         # optimization steps
#         for optim_step in range(max_steps):

#             adversaries = from_tanh_space(inputs_tanh + pert_tanh)
#             pert_outputs = model(adversaries)
            
#             if norm_type == 'inf':
#                 inf_norms = torch.norm(adversaries - inputs, p=float("inf"), dim=(1,2,3))
#                 norms = inf_norms
#             elif norm_type == 'l2':
#                 l2_norms = torch.pow(adversaries - inputs, exponent=2)
#                 l2_norms = torch.sum(l2_norms.view(l2_norms.size(0), -1), 1)
#                 norms = l2_norms
#             else:
#                 raise Exception('must provide a valid norm_type for epsilon distance constraint: inf, l2') 
                
#             target_activ = torch.sum(targets_oh * pert_outputs, 1)
#             maxother_activ = torch.max(((1 - targets_oh) * pert_outputs - targets_oh * 1e4), 1)[0]

#             if targeted:           
#                 # if targeted, optimize to make `target_activ` larger than `maxother_activ` by `confidence`
#                 f = torch.clamp(maxother_activ - target_activ + confidence, min=0.0)
#             else:
#                 # if not targeted, optimize to make `maxother_activ` larger than `target_activ` (the ground truth image labels) by `confidence`
#                 f = torch.clamp(target_activ - maxother_activ + confidence, min=0.0)

#             # the total loss of current batch, should be of dimension [1]
#             cw_loss = torch.sum(scale_consts * f)
#             norm_loss = torch.sum(norms)
#             batch_loss = cw_loss + norm_loss

#             # Do optimization for one step
#             optimizer.zero_grad()
#             batch_loss.backward()
#             optimizer.step()

#             # "returns" batch_loss, pert_norms, pert_outputs, adversaries

#             if optim_step % log_frequency == 0: 
#                 print('batch [{}] batch_loss: {} cw_loss: {} norm_loss: {}'.format(optim_step, batch_loss, cw_loss, norm_loss))
#                 print(o_best_norm)
                
#             if abort_early and not optim_step % (max_steps // 10):   
#                 if batch_loss > prev_batch_loss * (1 - ae_tol):
#                     break
#                 if batch_loss == 0:
#                     break
#                 prev_batch_loss = batch_loss

#             # update best attack found during optimization
#             pert_predictions = torch.argmax(pert_outputs, dim=1)
#             comp_pert_predictions = torch.argmax(compensate_confidence(pert_outputs, targets, targeted, confidence), dim=1)
#             for i in range(batch_size):
#                 norm = norms[i]
#                 cppred = comp_pert_predictions[i]
#                 ppred = pert_predictions[i]
#                 tlabel = targets[i]
#                 ax = adversaries[i] 
#                 if attack_successful(cppred, tlabel, targeted) and norm < epsilon:
#                     assert cppred == ppred
#                     if norm < best_norm[i]:
#                         best_norm[i] = norm
#                         best_norm_ppred[i] = ppred
#                     if norm < o_best_norm[i]:
#                         o_best_norm[i] = norm
#                         o_best_norm_ppred[i] = ppred
#                         o_best_adversaries[i] = ax

#         # binary search of `scale_const`
#         for i in range(batch_size):
#             tlabel = targets[i]
#             if best_norm_ppred[i] != -1:
#                 # successful: attempt to lower `scale_const` by halving it
#                 if scale_consts[i] < upper_bounds[i]:
#                     upper_bounds[i] = scale_consts[i]
#                 # `upper_bounds[i] == c_range[1]` implies no solution
#                 # found, i.e. upper_bounds[i] has never been updated by
#                 # scale_consts[i] until `scale_consts[i] > 0.1 * c_range[1]`
#                 if upper_bounds[i] < c_range[1] * 0.1:
#                     scale_consts[i] = (lower_bounds[i] + upper_bounds[i]) / 2
#             else:
#                 # failure: multiply `scale_const` by ten if no solution
#                 # found; otherwise do binary search
#                 if scale_consts[i] > lower_bounds[i]:
#                     lower_bounds[i] = scale_consts[i]
#                 if upper_bounds[i] < c_range[1] * 0.1:
#                     scale_consts[i] = (lower_bounds[i] + upper_bounds[i]) / 2
#                 else:
#                     scale_consts[i] *= 10
                    
#     return o_best_adversaries

In [9]:
# cw_advs = cw_attack(model=model, 
#                     inputs=inputs, 
#                     targets=targets, 
#                     device=device, 
#                     norm_type='inf', 
#                     epsilon=32./255,
#                     targeted=False, 
#                     confidence=0.0, 
#                     c_range=(1e-3, 1e10), 
#                     search_steps=3, 
#                     max_steps=1000, 
#                     abort_early=True, 
#                     box=box,  
#                     optimizer_lr=5e-4, 
#                     init_rand=False, 
#                     log_frequency=100)

# eval_performance(model, inputs, cw_advs, targets)
# sample_1D_images(model, inputs, cw_advs, targets)

## Diversity Attack v1

| Loss Function | Scaling Constant | Regularizer | Adversary Selection |
| - | - | - | - |
|  CW |  True | Batch Divergence | Instance Divergence |

In [27]:
# cw_advs_div1 = cw_div1_attack(model=model, 
#                               module=layer_dict['relu1-1'], 
#                               regularizer_weight=1, 
#                               inputs=inputs, 
#                               targets=targets, 
#                               device=device, 
#                               norm_type='inf', 
#                               epsilon=32./255,
#                               targeted=False, 
#                               confidence=0.0, 
#                               c_range=(1e-3, 1e10), 
#                               search_steps=3, 
#                               max_steps=1000, 
#                               abort_early=True, 
#                               box=box,  
#                               optimizer_lr=5e-4, 
#                               init_rand=False, 
#                               log_frequency=100)

# eval_performance(model, inputs, cw_advs_div1, targets)
# sample_1D_images(model, inputs, cw_advs_div1, targets)

## Diversity Attack v2

| Loss Function | Scaling Constant | Regularizer | Adversary Selection |
| - | - | - | - |
|  CW |  False | Batch Divergence | Instance Divergence |

In [20]:
# cw_advs_div2 = cw_div2_attack(model=model, 
#                               module=layer_dict['relu1-1'], 
#                               regularizer_weight=1, 
#                               inputs=inputs, 
#                               targets=targets, 
#                               device=device, 
#                               norm_type='inf', 
#                               epsilon=32./255,
#                               targeted=False, 
#                               confidence=0.0, 
#                               c_range=(1e-3, 1e10), 
#                               search_steps=3, 
#                               max_steps=1000, 
#                               abort_early=True, 
#                               box=box,  
#                               optimizer_lr=5e-4, 
#                               init_rand=False, 
#                               log_frequency=100)

# eval_performance(model, inputs, cw_advs_div2, targets)
# sample_1D_images(model, inputs, cw_advs_div2, targets)

## Diversity Attack v3

| Loss Function | Scaling Constant | Regularizer | Adversary Selection |
| - | - | - | - |
| Cross Entropy |  False | Batch Divergence | Instance Divergence |

In [23]:
# cw_advs_div3 = cw_div3_attack(model=model, 
#                               module=layer_dict['relu1-1'], 
#                               regularizer_weight=1, 
#                               inputs=inputs, 
#                               targets=targets, 
#                               device=device, 
#                               norm_type='inf', 
#                               epsilon=32./255,
#                               targeted=False, 
#                               confidence=0.0, 
#                               c_range=(1e-3, 1e10), 
#                               search_steps=3, 
#                               max_steps=1000, 
#                               abort_early=True, 
#                               box=box,  
#                               optimizer_lr=5e-4, 
#                               init_rand=False, 
#                               log_frequency=100)

# eval_performance(model, inputs, cw_advs_div3, targets)
# sample_1D_images(model, inputs, cw_advs_div3, targets)

## Diversity Attack v4

| Loss Function | Scaling Constant | Regularizer | Adversary Selection |
| - | - | - | - |
| CW |  True | Batch Divergence | L2 |

In [25]:
# cw_advs_div4 = cw_div4_attack(model=model, 
#                               module=layer_dict['relu1-1'], 
#                               regularizer_weight=1, 
#                               inputs=inputs, 
#                               targets=targets, 
#                               device=device, 
#                               norm_type='inf', 
#                               epsilon=32./255,
#                               targeted=False, 
#                               confidence=0.0, 
#                               c_range=(1e-3, 1e10), 
#                               search_steps=3, 
#                               max_steps=1000, 
#                               abort_early=True, 
#                               box=box,  
#                               optimizer_lr=5e-4, 
#                               init_rand=False, 
#                               log_frequency=100)

# eval_performance(model, inputs, cw_advs_div4, targets)
# sample_1D_images(model, inputs, cw_advs_div4, targets)

# Evaluation

In [None]:
results = []
save_file_path = "assets/results_mnist2019.08.17.pickle"

# attack params
search_steps=5
targeted=False
norm_type='inf' 
epsilon=32./255
c_range=(1e-3, 1e10)
max_steps=1000
abort_early=True
optimizer_lr=5e-4
init_rand=False
log_frequency = 100

mean = (0.1307,) # the mean used in inputs normalization
std = (0.3081,) # the standard deviation used in inputs normalization
box = (min((0 - m) / s for m, s in zip(mean, std)),
       max((1 - m) / s for m, s in zip(mean, std)))

layer_dict = get_model_modules(model)

n=2
attack_versions = [cw_div4_attack] # [cw_div1_attack, cw_div2_attack, cw_div3_attack, cw_div4_attack]
target_layers = list(layer_dict)[1::n]
reg_weights = [0, 1, 10, 100, 1000, 10000]
confidences = [0]

# neuron coverage params
nc_threshold = 0. # all activations are scaled to (0,1) after relu

# inception score (is) params
is_cuda = True
is_batch_size = 10
is_resize = True
is_splits = 10

# fréchet inception distance score (fid) params
real_path = "C:/temp_imgs/mnist/real/"
fake_path = "C:/temp_imgs/mnist/fake/"
fid_batch_size = 64
fid_cuda = True

if not os.path.exists(save_file_path):
    os.makedirs(save_file_path)

for attack in attack_versions:
    for layer_idx in target_layers:
        module = layer_dict[layer_idx]
        for rw in reg_weights:
            for c in confidences:
                
                timestamp = str(datetime.datetime.now()).replace(':','.')
                
                print('timestamp', timestamp, 
                      'attack', attack.__name__, 
                      'layer: ', layer_idx, 
                      'regularization_weight: ', rw, 
                      'confidence: ', c)
                
                # adversarial attack 
                adversaries = attack(model=model, 
                                     module=module, 
                                     regularizer_weight=rw, 
                                     inputs=inputs, 
                                     targets=targets, 
                                     device=device, 
                                     norm_type=norm_type, 
                                     epsilon=epsilon,
                                     targeted=targeted, 
                                     confidence=c, 
                                     c_range=c_range, 
                                     search_steps=search_steps, 
                                     max_steps=max_steps, 
                                     abort_early=abort_early, 
                                     box=box,  
                                     optimizer_lr=optimizer_lr, 
                                     init_rand=init_rand, 
                                     log_frequency=log_frequency)
               
                # evaluate adversary effectiveness
                pert_acc, orig_acc = eval_performance(model, inputs, adversaries, targets)
                sample_1D_images(model, inputs, adversaries, targets)
                
                pert_acc = pert_acc.item() / 100.
                orig_acc = orig_acc.item() / 100.
                
                # neuron coverage
                covered_neurons, total_neurons, neuron_coverage_000 = eval_nc(model, adversaries, 0.00)
                print('neuron_coverage_000:', neuron_coverage_000)
                covered_neurons, total_neurons, neuron_coverage_020 = eval_nc(model, adversaries, 0.20)
                print('neuron_coverage_020:', neuron_coverage_020)
                covered_neurons, total_neurons, neuron_coverage_050 = eval_nc(model, adversaries, 0.50)
                print('neuron_coverage_050:', neuron_coverage_050)
                covered_neurons, total_neurons, neuron_coverage_075 = eval_nc(model, adversaries, 0.75)
                print('neuron_coverage_075:', neuron_coverage_075)
                
                # inception score
                preprocessed_advs = preprocess_1D_imgs(adversaries)
                mean_is, std_is = inception_score(preprocessed_advs, is_cuda, is_batch_size, is_resize, is_splits)
                print('inception_score:', mean_is)
                
                # fid score 
                paths = [real_path, fake_path]
                
                # dimensionality = 64
                target_num = 64
                generate_imgs(inputs, real_path, target_num)
                generate_imgs(adversaries, fake_path, target_num)
                fid_score_64 = calculate_fid_given_paths(paths, fid_batch_size, fid_cuda, dims=64)
                print('fid_score_64:', fid_score_64)
                
                # dimensionality = 2048
                target_num = 2048
                generate_imgs(inputs, real_path, target_num)
                generate_imgs(adversaries, fake_path, target_num)
                fid_score_2048 = calculate_fid_given_paths(paths, fid_batch_size, fid_cuda, dims=2048)
                print('fid_score_2048:', fid_score_2048)
                
                out = {'timestamp': timestamp, 
                       'attack': attack.__name__, 
                       'layer': layer_idx, 
                       'regularization_weight': rw, 
                       'confidence': c, 
                       'adversaries': adversaries,
                       'pert_acc':pert_acc, 
                       'orig_acc': orig_acc,
                       'neuron_coverage_000': neuron_coverage_000,
                       'neuron_coverage_020': neuron_coverage_020,
                       'neuron_coverage_050': neuron_coverage_050,
                       'neuron_coverage_075': neuron_coverage_075,
                       'inception_score': mean_is,
                       'fid_score_64': fid_score_64,
                       'fid_score_2048': fid_score_2048}
                
                results.append(out)
                
                # save incremental outputs
                pickle.dump(results, open(save_file_path, "wb"))

timestamp 2019-08-20 18.59.59.645161 attack cw_div4_attack layer:  relu1-1 regularization_weight:  0 confidence:  0
Step 0
batch [0] batch_loss: 1.8629951477050781 cw_loss: 0.8633882403373718 norm_loss: 0.9996068477630615 div_reg: 0.0
batch [100] batch_loss: 5.812216281890869 cw_loss: 0.584656834602356 norm_loss: 5.227559566497803 div_reg: 0.0
Step 1
batch [0] batch_loss: 11.100231170654297 cw_loss: 5.823084831237793 norm_loss: 5.277146339416504 div_reg: 0.0
batch [100] batch_loss: 15.266921043395996 cw_loss: 2.724855422973633 norm_loss: 12.542065620422363 div_reg: 0.0
Step 2
batch [0] batch_loss: 39.68328094482422 cw_loss: 27.09294319152832 norm_loss: 12.590335845947266 div_reg: 0.0
batch [100] batch_loss: 27.206432342529297 cw_loss: 9.04799747467041 norm_loss: 18.158435821533203 div_reg: 0.0
batch [200] batch_loss: 23.121402740478516 cw_loss: 4.3559794425964355 norm_loss: 18.765422821044922 div_reg: 0.0
batch [300] batch_loss: 20.851367950439453 cw_loss: 2.4635329246520996 norm_loss:

In [15]:
df = pd.DataFrame.from_dict(results)
target_features = ['attack', 'layer', 'regularization_weight', 'confidence', 'orig_acc', 'pert_acc', 'neuron_coverage', 'inception_score', 'fid_score_64', 'fid_score_2048']
df[target_features]

KeyboardInterrupt: 

In [16]:
# save the results of the evaluation
pickle.dump(results, open( "assets/results.pickle", "wb" ))

In [17]:
# load the results from file
with open('assets/results.pickle', 'rb') as handle:
    results = pickle.load(handle)

# Coverage Criteria

## Neuron Coverage (DeepXplore)

In [30]:
def get_layer_output_sizes(model, data):   
    output_sizes = {}
    hooks = []  
    layers = list(dict(model.named_children()))   
    def hook(module, input, output):
        module_idx = len(output_sizes)
        m_key = layers[module_idx]
        output_sizes[m_key] = list(output.size()[1:])      
    for name, module in model.named_children():
        if (not isinstance(module, nn.Sequential)
            and not isinstance(module, nn.ModuleList)
            and not (module == model)):
            hooks.append(module.register_forward_hook(hook))
    try:
        out = model(data[:1])  
    finally:
        for h in hooks:
            h.remove()       
    return output_sizes

def get_init_dict(model, data, init_value=False): 
    output_sizes = get_layer_output_sizes(model, data)       
    model_layer_dict = {}  
    for layer, output_size in output_sizes.items():
        for index in range(np.prod(output_size)):
            # since we only care about post-activation outputs
            if 'relu' in layer.lower():
                model_layer_dict[(layer, index)] = init_value               
    return model_layer_dict

def neurons_covered(model_layer_dict):
    covered_neurons = len([v for v in model_layer_dict.values() if v])
    total_neurons = len(model_layer_dict)
    return covered_neurons, total_neurons, covered_neurons / float(total_neurons)

def scale(out, rmax=1, rmin=0):
    output_std = (out - out.min()) / (out.max() - out.min())
    output_scaled = output_std * (rmax - rmin) + rmin
    return output_scaled

def update_coverage(model, data, model_layer_dict, threshold=0.):   
    layers = list(set([x[0] for x in list(model_layer_dict.keys())]))    
    for layer in layers:       
        outputs = torch.squeeze(model.extract_outputs(data, layer))
        scaled_outputs = scale(outputs)            
        for i, out in enumerate(scaled_outputs.view(-1)):
            if out > threshold:
                model_layer_dict[(layer, i)] = True
                
def eval_nc(model, data, threshold=0.):
    model_layer_dict = get_init_dict(model, data, False)
    update_coverage(model, data, model_layer_dict, threshold=threshold)
    _, _, nc = neurons_covered(model_layer_dict)
    return nc

In [32]:
# test baseline (unperturbed) inputs
for t in np.linspace(0, 1, 10):
    print('threshold:', t, eval_nc(model, inputs, threshold=t))

threshold: 0.0 0.6015768725361367
threshold: 0.1111111111111111 0.1795663600525624
threshold: 0.2222222222222222 0.1038107752956636
threshold: 0.3333333333333333 0.06399474375821287
threshold: 0.4444444444444444 0.042378449408672796
threshold: 0.5555555555555556 0.026675427069645204
threshold: 0.6666666666666666 0.01392904073587385
threshold: 0.7777777777777777 0.006176084099868594
threshold: 0.8888888888888888 0.0021681997371879105
threshold: 1.0 0.0


In [25]:
for i in range(len(results)):
    a = results[i]['attack'] 
    l = results[i]['layer']
    rw = results[i]['regularization_weight']
    nc = eval_nc(model, results[i]['adversaries'], threshold=0.)
    results[i]['neuron_coverage'] = nc
    print(i, a, l, rw, nc) 

0 cw_div1_attack relu1 0 0.5850197109067017
1 cw_div1_attack relu1 0.001 0.5576215505913272
2 cw_div1_attack relu1 0.01 0.4885676741130092
3 cw_div1_attack relu1 1 0.43495400788436267
4 cw_div1_attack relu1 10 0.585611038107753
5 cw_div1_attack relu1 100 0.585611038107753
6 cw_div1_attack relu2 0 0.5851511169513798
7 cw_div1_attack relu2 0.001 0.5846911957950066
8 cw_div1_attack relu2 0.01 0.5894875164257556
9 cw_div1_attack relu2 1 0.5682654402102497
10 cw_div1_attack relu2 10 0.5706964520367936
11 cw_div1_attack relu2 100 0.5704993429697766
12 cw_div1_attack relu3 0 0.5850197109067017
13 cw_div1_attack relu3 0.001 0.5851511169513798
14 cw_div1_attack relu3 0.01 0.5850197109067017
15 cw_div1_attack relu3 1 0.5851511169513798
16 cw_div1_attack relu3 10 0.5854139290407359
17 cw_div1_attack relu3 100 0.5849540078843627
18 cw_div2_attack relu1 0 0.5852825229960578
19 cw_div2_attack relu1 0.001 0.5838370565045992
20 cw_div2_attack relu1 0.01 0.5883048620236531
21 cw_div2_attack relu1 1 0.6

In [19]:
df = pd.DataFrame.from_dict(results)
df[['attack', 'layer', 'regularization_weight', 'orig_acc', 'pert_acc', 'neuron_coverage']]

Unnamed: 0,attack,layer,regularization_weight,orig_acc,pert_acc,neuron_coverage
0,cw_div1_attack,relu1,0.0,0.98,0.0,0.58502
1,cw_div1_attack,relu1,0.001,0.98,0.0,0.557622
2,cw_div1_attack,relu1,0.01,0.98,0.0,0.488568
3,cw_div1_attack,relu1,1.0,0.98,0.0,0.434954
4,cw_div1_attack,relu1,10.0,0.98,0.01,0.585611
5,cw_div1_attack,relu1,100.0,0.98,0.01,0.585611
6,cw_div1_attack,relu2,0.0,0.98,0.0,0.585151
7,cw_div1_attack,relu2,0.001,0.98,0.0,0.584691
8,cw_div1_attack,relu2,0.01,0.98,0.0,0.589488
9,cw_div1_attack,relu2,1.0,0.98,0.0,0.568265


In [76]:
# save the results of the evaluation
pickle.dump(results, open( "assets/results.pickle", "wb" ))

## k-multisection Neuron Coverage (DeepGauge)

In [13]:
# model.init_dict(model.covered_dict, inputs, 'relu', False)
# model.init_dict(model.lowhigh_dict, inputs, 'relu', {'low': 0, 'high': 0})
# print(len(model.covered_dict), len(model.lowhigh_dict))

In [9]:
# # move this to cpu because the GPU RAM probably isn't large enough to handle it 
# device = torch.device("cpu")
# model.to(device)

model.init_dict(model.lowhigh_dict, inputs, 'relu', {'low': 0, 'high': 0})
model.hook_lowhigh_dict('relu')

lohi_batch_size = int(len(train_loader.dataset) / 10)

lowhigh_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('/data/', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor()
                             ])),
    batch_size=lohi_batch_size, shuffle=True, pin_memory=True)

for i, (lohi_inputs, _) in enumerate(lowhigh_loader):
    print('batch:', i)
    lohi_inputs = lohi_inputs.to(device)
    model(lohi_inputs)

model.remove_hooks()

# device = torch.device("cuda")

pickle.dump(model.lowhigh_dict, open( "assets/lowhigh_dict.pickle", "wb" ))

{('relu1', 0): {'low': 0, 'high': 0.17005696892738342},
 ('relu1', 1): {'low': 0, 'high': 0.2610016167163849},
 ('relu1', 2): {'low': 0, 'high': 0.3574622869491577},
 ('relu1', 3): {'low': 0, 'high': 0.4250221848487854},
 ('relu1', 4): {'low': 0, 'high': 0.4433531165122986},
 ('relu1', 5): {'low': 0, 'high': 0.42099499702453613},
 ('relu1', 6): {'low': 0, 'high': 0.3878710865974426},
 ('relu1', 7): {'low': 0, 'high': 0.4209814667701721},
 ('relu1', 8): {'low': 0, 'high': 0.42347657680511475},
 ('relu1', 9): {'low': 0, 'high': 0.40517371892929077},
 ('relu1', 10): {'low': 0, 'high': 0.7064920663833618},
 ('relu1', 11): {'low': 0, 'high': 0.8078417778015137},
 ('relu1', 12): {'low': 0, 'high': 0.6157643795013428},
 ('relu1', 13): {'low': 0, 'high': 0.47518014907836914},
 ('relu1', 14): {'low': 0, 'high': 0.3899363875389099},
 ('relu1', 15): {'low': 0, 'high': 0.3532407879829407},
 ('relu1', 16): {'low': 0, 'high': 0.3282723128795624},
 ('relu1', 17): {'low': 0, 'high': 0.3503431081771850

In [5]:
# load the results from file
with open('assets/lowhigh_dict.pickle', 'rb') as handle:
    model.lowhigh_dict = pickle.load(handle)

In [6]:
get_dict_for_layer(model.lowhigh_dict, 'relu3')

{('relu3', 0): {'low': 0, 'high': 4.8202948570251465},
 ('relu3', 1): {'low': 0, 'high': 4.82769775390625},
 ('relu3', 2): {'low': 0, 'high': 7.479001522064209},
 ('relu3', 3): {'low': 0, 'high': 8.612505912780762},
 ('relu3', 4): {'low': 0, 'high': 5.791659355163574},
 ('relu3', 5): {'low': 0, 'high': 7.655079364776611},
 ('relu3', 6): {'low': 0, 'high': 6.207518100738525},
 ('relu3', 7): {'low': 0, 'high': 7.808154582977295},
 ('relu3', 8): {'low': 0, 'high': 8.590152740478516},
 ('relu3', 9): {'low': 0, 'high': 2.782480001449585},
 ('relu3', 10): {'low': 0, 'high': 1.412194848060608},
 ('relu3', 11): {'low': 0, 'high': 6.954821586608887},
 ('relu3', 12): {'low': 0, 'high': 4.98781681060791},
 ('relu3', 13): {'low': 0, 'high': 1.674249291419983},
 ('relu3', 14): {'low': 0, 'high': 3.0363736152648926},
 ('relu3', 15): {'low': 0, 'high': 5.852689743041992},
 ('relu3', 16): {'low': 0, 'high': 7.916790962219238},
 ('relu3', 17): {'low': 0, 'high': 6.889847755432129},
 ('relu3', 18): {'lo

# Inception Scoring

In [21]:
from inception_score import *
# %load_ext autoreload
# %autoreload 2

In [22]:
def preprocess_for_is(imgs):
    '''
    imgs : input expected to be a pytorch cuda tensor of images with a single dimension
    returns the input images as a 3D numpy array by replicating the single array 3 times
    '''
    adv_np = imgs.cpu().detach().numpy()
    adv_3d = np.tile(adv_np[..., None], 3)
    adv_3d = np.transpose(adv_3d, (0, 4, 2, 3, 1))
    adv_3d = np.squeeze(adv_3d)
    return adv_3d

In [18]:
for i in range(len(results)):
    a = results[i]['attack'] 
    l = results[i]['layer']
    rw = results[i]['regularization_weight']
    adv_3d = preprocess_for_is(results[i]['adversaries'])
    mean_is, std_is = inception_score(adv_3d, cuda=True, batch_size=10, resize=True, splits=10)
    results[i]['inception_score'] = mean_is
    print(i, a, l, rw, mean_is) 

0 cw_div1_attack relu1 0 2.1209492815586906
1 cw_div1_attack relu1 0.001 2.127311732225868
2 cw_div1_attack relu1 0.01 2.1292338489015177
3 cw_div1_attack relu1 1 2.1406525736513213
4 cw_div1_attack relu1 10 2.1289341141537674
5 cw_div1_attack relu1 100 2.129162888722381
6 cw_div1_attack relu2 0 2.121341867838196
7 cw_div1_attack relu2 0.001 2.1283085506669894
8 cw_div1_attack relu2 0.01 2.128306204108593
9 cw_div1_attack relu2 1 2.156900076796944
10 cw_div1_attack relu2 10 2.160886621290113
11 cw_div1_attack relu2 100 2.16015964869021
12 cw_div1_attack relu3 0 2.1206074935563985
13 cw_div1_attack relu3 0.001 2.1211123572604844
14 cw_div1_attack relu3 0.01 2.1215065538287528
15 cw_div1_attack relu3 1 2.121403170482811
16 cw_div1_attack relu3 10 2.1203692489550567
17 cw_div1_attack relu3 100 2.1208818906729934
18 cw_div2_attack relu1 0 2.150741244827747
19 cw_div2_attack relu1 0.001 2.1565648058678675
20 cw_div2_attack relu1 0.01 2.1564078357085883
21 cw_div2_attack relu1 1 2.1597769637

In [23]:
df = pd.DataFrame.from_dict(results)
pd.set_option('display.max_rows', None)
df[['attack', 'layer', 'regularization_weight', 'orig_acc', 'pert_acc', 'inception_score']]

Unnamed: 0,attack,layer,regularization_weight,orig_acc,pert_acc,inception_score
0,cw_div1_attack,relu1,0.0,0.98,0.0,2.120949
1,cw_div1_attack,relu1,0.001,0.98,0.0,2.127312
2,cw_div1_attack,relu1,0.01,0.98,0.0,2.129234
3,cw_div1_attack,relu1,1.0,0.98,0.0,2.140653
4,cw_div1_attack,relu1,10.0,0.98,0.01,2.128934
5,cw_div1_attack,relu1,100.0,0.98,0.01,2.129163
6,cw_div1_attack,relu2,0.0,0.98,0.0,2.121342
7,cw_div1_attack,relu2,0.001,0.98,0.0,2.128309
8,cw_div1_attack,relu2,0.01,0.98,0.0,2.128306
9,cw_div1_attack,relu2,1.0,0.98,0.0,2.1569


In [30]:
# save the results of the evaluation
pickle.dump(results, open( "assets/results.pickle", "wb" ))

In [20]:
# plt.imshow(adv_np[0].reshape(28, 28))
# plt.imshow(adv_3d[0].transpose((1,2,0)))

# Fréchet Inception Distance (FID) Scoring
https://github.com/mseitzer/pytorch-fid

In [10]:
batch_size_FID = 2048

FID_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('/data/', train=False, download=True,
                         transform=torchvision.transforms.Compose([
                           torchvision.transforms.ToTensor()
                         ])),
    batch_size=batch_size_FID, shuffle=True, pin_memory=True)

# targets = true labels only for when you're doing a targeted attack
# otherwise, you're going to make the inputs easier to classify to 
# do a targeted attack, targets should be some class other than
# the true label

inputs_fid, targets_fid = next(iter(FID_loader))

inputs_fid = inputs_fid.to(device)
targets_fid = targets_fid.to(device)

In [11]:
img_root = "temp_imgs/"
img_dirs = ["real/", "fake/"]

for img_dir in img_dirs:
    target_dir = img_root + img_dir
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
        for i, (img, lbl) in enumerate(zip(inputs_fid, targets_fid)):
            filename = target_dir + "img_" + str(i) + "_" + str(lbl.item()) + ".png"
            torchvision.utils.save_image(img, filename)

In [None]:
# # run this in cmd line
# !python fid_score.py temp_imgs/real temp_imgs/fake

# starting calculate_fid_given_paths()...
# starting _compute_statistics_of_path()...
# starting calculate_activation_statistics()...
# starting get_activations()...
# starting _compute_statistics_of_path()...
# starting calculate_activation_statistics()...
# starting get_activations()...
# starting calculate_frechet_distance()...
# FID:  -4.0333336670528297e-10

[autoreload of _frozen_importlib failed: Traceback (most recent call last):
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 434, in superreload
    module = reload(module)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\imp.py", line 314, in reload
    """
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\importlib\__init__.py", line 166, in reload
    _bootstrap._exec(spec, module)
AttributeError: module 'importlib._bootstrap' has no attribute '_find_spec'
]
[autoreload of _frozen_importlib_external failed: Traceback (most recent call last):
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\

[autoreload of runpy failed: Traceback (most recent call last):
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 450, in superreload
    update_generic(old_obj, new_obj)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 387, in update_generic
    update(a, b)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 357, in update_class
    update_instances(old, new)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 280, in update_instances
   

[autoreload of collections.abc failed: Traceback (most recent call last):
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 450, in superreload
    update_generic(old_obj, new_obj)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 387, in update_generic
    update(a, b)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 357, in update_class
    update_instances(old, new)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 280, in update_ins

[autoreload of ast failed: Traceback (most recent call last):
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 450, in superreload
    update_generic(old_obj, new_obj)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 387, in update_generic
    update(a, b)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 357, in update_class
    update_instances(old, new)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 280, in update_instances
    f

[autoreload of ctypes failed: Traceback (most recent call last):
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 450, in superreload
    update_generic(old_obj, new_obj)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 387, in update_generic
    update(a, b)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 357, in update_class
    update_instances(old, new)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 280, in update_instances
  

[autoreload of IPython.core.interactiveshell failed: Traceback (most recent call last):
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\extensions\autoreload.py", line 434, in superreload
    module = reload(module)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\imp.py", line 314, in reload
    """
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\importlib\__init__.py", line 169, in reload
    finally:
  File "<frozen importlib._bootstrap>", line 630, in _exec
  File "<frozen importlib._bootstrap_external>", line 728, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "C:\Users\Fabrice\AppData\Local\Continuu

Traceback (most recent call last):
Error in callback <function _enable_matplotlib_integration.<locals>.configure_once at 0x0000023D8827F2F0> (for post_run_cell):
Traceback (most recent call last):
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\core\interactiveshell.py", line 2880, in _run_cell
    try:
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\core\interactiveshell.py", line 3001, in run_cell_async
    with self.builtin_trap:
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\core\compilerop.py", line 135, in cache
    linecache._ipython_cache[name] = entry
AttributeError: module 'linecache' has no attribute '_ipython_cache'

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.
--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\core\interactiveshell.py", line 2880, in _run_cell
    try:
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\core\interactiveshell.py", line 3001, in run_cell_async
    with self.builtin_trap:
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\core\compilerop.py", line 135, in cache
    linecache._ipython_cache[name] = entry
AttributeError: module 'linecache' has no attribute '_ipython_cache'

During handling of the above exce

Future exception was never retrieved
future: <Future finished exception=TypeError('can only concatenate str (not "list") to str')>
Traceback (most recent call last):
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\core\interactiveshell.py", line 2880, in _run_cell
    try:
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\core\interactiveshell.py", line 3001, in run_cell_async
    with self.builtin_trap:
  File "C:\Users\Fabrice\AppData\Local\Continuum\anaconda3\envs\summer19_research\lib\site-packages\IPython\core\compilerop.py", line 135, in cache
    linecache._ipython_cache[name] = entry
AttributeError: module 'linecache' has no attribute '_ipython_cache'

During handling of the above 