### Black Box Attacks and Defenses 

### Experiment 3


In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.models as models 

import numpy as np

from art.estimators.classification import PyTorchClassifier

from PIL import Image
from torchvision import transforms

In [2]:
import ast 
import pandas as pd 

In [3]:
import matplotlib.pyplot as plt 

from datetime import datetime ## important 
import time 
import warnings
warnings.filterwarnings('ignore')

In [4]:
from model_codes_cifar10.vgg_cifar10 import * 
from model_codes_cifar10.resnet_cifar10 import * 
from model_codes_cifar10.densenet_cifar10 import * 

In [5]:
import torch
device_name = 'cuda:1'

torch.cuda.set_device(1)
torch.cuda.current_device()

1

#### Attacks from ART 

In [6]:
from art.attacks.evasion import ProjectedGradientDescent
from art.attacks.evasion import FastGradientMethod
from art.attacks.evasion import NewtonFool 
from art.attacks.evasion import CarliniL2Method
from art.attacks.evasion import SimBA
from art.attacks.evasion import HopSkipJump

In [22]:
from skimage.metrics import structural_similarity as ssim

#### Some methods and variables 

In [23]:
preprocess = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [24]:
def softmax_activation(inputs): 
    inputs = inputs.tolist()
    exp_values = np.exp(inputs - np.max(inputs)) 
    
    # Normalize 
    probabilities = exp_values / np.sum(exp_values)
    return probabilities

In [25]:
def compare_images(imageA, imageB):
    return 1 - ssim(imageA, imageB, multichannel=True)

In [26]:
def calc_L_dist(adv_map, targ_map):
    n = len(adv_map)
    l1_dist = np.linalg.norm(adv_map.reshape((n, -1)) - targ_map.reshape((n, -1)), 1, axis=1) 
    
    l1_dist = l1_dist[0]/(32*32)
    return l1_dist

In [27]:
# CIFAR 10 selected images dataset 
selected_images_file_path = "data/1000_cifar10_images_paths_filter65.txt"


#### Loading images selected image paths 

In [28]:
selected_images_list_file = open(selected_images_file_path, "r")
loaded_image_paths_list = selected_images_list_file.read()

### Converst file content into list using ast
loaded_image_paths_list = ast.literal_eval(loaded_image_paths_list)

In [29]:
len(loaded_image_paths_list)

1000

In [30]:
loaded_image_paths_list[0]

'cifar10png/test/airplane/0466.png'

### Main method

In [31]:
def experiment3_method(target_classifier, attack, image_list): 
    column_list = ['image', 'benign_pred', 'benign_conf', 'adv_pred', 'adv_conf', 'attack_time', 'noise_ratio'] 
    results_df =  pd.DataFrame(columns = column_list) 
    
    for image in image_list: 
        input_image = Image.open(image)
        input_tensor = preprocess(input_image)
        input_batch = input_tensor.unsqueeze(0).numpy().astype(np.float32)
        
        preds = target_classifier.predict(input_batch)  
        predicted_label = np.argmax(preds, axis=1)[0] 
        
        confidence = round(np.max(softmax_activation(preds), axis=1)[0], 3) 
        
        ## attack 
        start = time.time() 
        adv_image = attack.generate(x=input_batch)
        attack_time = time.time()-start 
        
        noise_ratio = compare_images(input_batch[0].transpose(1,2,0), adv_image[0].transpose(1,2,0))
        noise_ratio = round(noise_ratio, 5) 
        
        l1_distance = calc_L_dist(adv_image, input_batch)
        
        adv_prediction = target_classifier.predict(adv_image)  
        adv_predicted_label = np.argmax(adv_prediction, axis=1)[0] 
        adv_confidence = round(np.max(softmax_activation(adv_prediction), axis=1)[0], 3)  
        
        results_df = results_df.append({'image': image, 
                                       'benign_pred': predicted_label, 
                                       'benign_conf': confidence, 
                                       'adv_pred': adv_predicted_label,
                                       'adv_conf': adv_confidence, 
                                       'attack_time': attack_time, 
                                       'noise_ratio': noise_ratio, 
                                       'l1_distance': l1_distance}, ignore_index=True)
    return results_df

In [32]:
def summarize_result(results, total_images):
    avg_benign_conf = round(results['benign_conf'].mean(), 3)
    benign_conf_std = round(results['benign_conf'].std(), 3)
    
    attack_success_count = results[results.benign_pred != results.adv_pred]["image"].count()
    attack_failure_count = total_images - attack_success_count 
    
    avg_adv_success_conf = round(results[results.benign_pred != results.adv_pred]["adv_conf"].mean(), 3) 
    adv_success_conf_std = round(results[results.benign_pred != results.adv_pred]["adv_conf"].std(), 3) 
    
    avg_adv_fail_conf = round(results[results.benign_pred == results.adv_pred]["adv_conf"].mean(), 3) 
    adv_fail_conf_std = round(results[results.benign_pred == results.adv_pred]["adv_conf"].std(), 3) 
    
    avg_attack_time = round(results['attack_time'].mean(), 4) 
    attack_time_std = round(results['attack_time'].std(), 4) 
    
    avg_noise_ratio = round(results['noise_ratio'].mean(), 5) 
    noise_ratio_std = round(results['noise_ratio'].std(), 5)
    
    avg_l1_distance = round(results['l1_distance'].mean(), 5)      
    l1_distance_std = round(results['l1_distance'].std(), 5)  
    
    
    print("Benign average confidence: {} ± {}".format(avg_benign_conf, benign_conf_std))
    print("Attack success rate: {}/1K ({}±{})".format(attack_success_count, avg_adv_success_conf, adv_success_conf_std))
    print("Attack failure rate: {}/1K ({}±{})".format(attack_failure_count, avg_adv_fail_conf, adv_fail_conf_std))
    print("Average attack time: {} ± {}".format(avg_attack_time, attack_time_std))
    print("Average noise ratio: {} ± {}".format(avg_noise_ratio, noise_ratio_std))    
    print("Average l1 distance: {} ± {}".format(avg_l1_distance, l1_distance_std))  
    print("===================================")

In [33]:
model_resnet18 = ResNet18() 
model_resnet34 = ResNet34()
model_resnet50 = ResNet50()
model_resnet101 = ResNet101()
model_resnet152 = ResNet152() 

model_vgg11 = VGG('VGG11')
model_vgg13 = VGG('VGG13')
model_vgg16 = VGG('VGG16')
model_vgg19 = VGG('VGG19')

model_densenet121 = DenseNet121() 
model_densenet161 = DenseNet161() 
model_densenet169 = DenseNet169() 
model_densenet201 = DenseNet201() 

resnet18_file_name = "trained_models_cifar10/resnet18_cifar10_lr01.pth"  
resnet34_file_name = "trained_models_cifar10/resnet34_cifar10_lr01.pth"  
resnet50_file_name = "trained_models_cifar10/resnet50_cifar10_lr01.pth"  
resnet101_file_name = "trained_models_cifar10/resnet101_cifar10_lr01.pth"  
resnet152_file_name = "trained_models_cifar10/resnet152_cifar10_lr01.pth"  

vgg11_file_name = "trained_models_cifar10/vgg11_cifar10_lr01.pth"  
vgg13_file_name = "trained_models_cifar10/vgg13_cifar10_lr01.pth"  
vgg16_file_name = "trained_models_cifar10/vgg16_cifar10_lr01.pth"  
vgg19_file_name = "trained_models_cifar10/vgg19_cifar10_lr01.pth"  

densenet121_file_name = "trained_models_cifar10/densenet121_cifar10_lr01.pth"  
densenet161_file_name = "trained_models_cifar10/densenet161_cifar10_lr01.pth"  
densenet169_file_name = "trained_models_cifar10/densenet169_cifar10_lr01.pth"  
densenet201_file_name = "trained_models_cifar10/densenet201_cifar10_lr01.pth"  

model_resnet18.load_state_dict(torch.load(resnet18_file_name, map_location=device_name)['net'])
model_resnet34.load_state_dict(torch.load(resnet34_file_name, map_location=device_name)['net'])
model_resnet50.load_state_dict(torch.load(resnet50_file_name, map_location=device_name)['net'])
model_resnet101.load_state_dict(torch.load(resnet101_file_name, map_location=device_name)['net'])
model_resnet152.load_state_dict(torch.load(resnet152_file_name, map_location=device_name)['net'])

model_vgg11.load_state_dict(torch.load(vgg11_file_name, map_location=device_name)['net'])
model_vgg13.load_state_dict(torch.load(vgg13_file_name, map_location=device_name)['net'])
model_vgg16.load_state_dict(torch.load(vgg16_file_name, map_location=device_name)['net'])
model_vgg19.load_state_dict(torch.load(vgg19_file_name, map_location=device_name)['net'])

model_densenet121.load_state_dict(torch.load(densenet121_file_name, map_location=device_name)['net'])
model_densenet161.load_state_dict(torch.load(densenet161_file_name, map_location=device_name)['net'])
model_densenet169.load_state_dict(torch.load(densenet169_file_name, map_location=device_name)['net'])
model_densenet201.load_state_dict(torch.load(densenet201_file_name, map_location=device_name)['net'])

model_resnet18.eval()
model_resnet34.eval()
model_resnet50.eval()
model_resnet101.eval()
model_resnet152.eval()

model_vgg11.eval()
model_vgg13.eval()
model_vgg16.eval()
model_vgg19.eval()

model_densenet121.eval()
model_densenet161.eval()
model_densenet169.eval()
model_densenet201.eval()

print("Loaded...")

Loaded...


In [34]:
target_models_list = [model_resnet18, model_resnet34, model_resnet50, model_resnet101, model_resnet152, 
                      model_vgg11, model_vgg13, model_vgg16, model_vgg19,
                      model_densenet121, model_densenet161, model_densenet169, model_densenet201]
target_models_names = ["resnet18", "resnet34", "resnet50", "resnet101", "resnet152",
                       "vgg11", "vgg13", "vgg16",  "vgg19",  
                       "densenet121", "densenet161", "densenet169", "densenet201"] 

In [None]:
for i in range(0, len(target_models_list)): 
    criterion = nn.CrossEntropyLoss()

    # Create the ART classifier
    classifier = PyTorchClassifier(
        model=target_models_list[i],
        loss=criterion,
        input_shape=(3, 32, 32),
        nb_classes=10,
        device_type='cuda:2'
    )
    
#     pgd_attack = ProjectedGradientDescent(classifier, max_iter=20, eps_step=1, eps=0.01) 
#     fgsm_attack = FastGradientMethod(estimator = classifier, eps=0.01)
#     newton_attack = NewtonFool(classifier=classifier, max_iter=5, verbose=False) 
#     attack_simba = SimBA(classifier=classifier, epsilon = 0.05, max_iter=5000)
    hopskipjump_attack = HopSkipJump(classifier=classifier, max_iter=20, verbose=False)
#     cw_attack = CarliniL2Method(classifier=classifier, max_iter=2, learning_rate=0.05, verbose=False)
    result_table = experiment3_method(classifier, hopskipjump_attack, loaded_image_paths_list)      
    
    result_table.to_csv("exp3_results/{}_cifar10_hopskipjump.csv".format(target_models_names[i]), index=False)
    print("Target model: {}".format(target_models_names[i]))
    summarize_result(result_table, 1000)