In [1]:
# Enable autoreload
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.chdir('../')

In [64]:
import json
import torch
import torchvision
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from torchvision import transforms
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from zest import utils
from zest import model

## Original data

In [None]:
label_map = {'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
inv_label_map = {v: k for k, v in label_map.items()}

agree_file = 'results/agree_CIFAR10_testset_100_samples_densenet121_densenet161_densenet169_googlenet_inception_v3_mobilenet_v2_resnet18_resnet34_resnet50_vgg11_bn_vgg13_bn_vgg16_bn_vgg19_bn.npy'

agree_data = np.load(agree_file, allow_pickle=True).item()
agree_idxs = agree_data['idxs']
agree_imgs = agree_data['imgs']
agree_labels = agree_data['labels']

print('Original labels')
print(agree_labels)
print([inv_label_map[l] for l in agree_labels])

Original labels
[3 8 8 6 6 1 6 3 1 0 9 5 7 9 8 5 7 8 6 7 4 9 5 2 4 0 9 6 6 5 5 9 4 9 5 4 6
 5 6 0 9 3 7 6 9 8 8 8 7 7 6 6 2 1 2 3 7 6 8 8 0 2 9 3 8 8 1 1 7 2 5 8 9 0
 3 8 6 4 6 0 0 7 4 5 6 3 1 1 3 6 8 7 4 0 2 1 3 0 4 7]
['cat', 'ship', 'ship', 'frog', 'frog', 'automobile', 'frog', 'cat', 'automobile', 'airplane', 'truck', 'dog', 'horse', 'truck', 'ship', 'dog', 'horse', 'ship', 'frog', 'horse', 'deer', 'truck', 'dog', 'bird', 'deer', 'airplane', 'truck', 'frog', 'frog', 'dog', 'dog', 'truck', 'deer', 'truck', 'dog', 'deer', 'frog', 'dog', 'frog', 'airplane', 'truck', 'cat', 'horse', 'frog', 'truck', 'ship', 'ship', 'ship', 'horse', 'horse', 'frog', 'frog', 'bird', 'automobile', 'bird', 'cat', 'horse', 'frog', 'ship', 'ship', 'airplane', 'bird', 'truck', 'cat', 'ship', 'ship', 'automobile', 'automobile', 'horse', 'bird', 'dog', 'ship', 'truck', 'airplane', 'cat', 'ship', 'frog', 'deer', 'frog', 'airplane', 'airplane', 'horse', 'deer', 'dog', 'frog', 'cat', 'automobile', 'automobile', 'c

# MLaaS predictions

In this notebook we will take a look a the predictions of the MLaaS model on the original data points and the adversarial examples.

In [7]:
results_dir = 'results/prediction-cifar10model-2022-05-19T20:31:22.771605Z'
results_f_names = os.listdir(results_dir)
results_files = [os.path.join(results_dir, f) for f in results_f_names]
print('Number of result files:', len(results_files))

Number of result files: 10


In [54]:
all_dfs = []

for res in results_files:
    with open(res, 'r') as f:
        lines = f.readlines()
        lines = [json.loads(l) for l in lines]

        dicts = []
        for l in lines:
            img_id_list = l['instance']['content'].split('/')[-1].split('.')[0].split('_')
            
            if img_id_list[0] == 'original':
                victim = 'original'
                num = int(img_id_list[1])
            else:
                victim = img_id_list[1]
                num = int(img_id_list[-1])
            
            pred_class = l['prediction']['displayNames']
            pred_confs = l['prediction']['confidences']
            assert len(pred_class) == len(pred_confs)

            cd = dict(zip(pred_class, pred_confs))
            d = {'Victim': victim, 'Num': num}
            d.update(cd)

            dicts.append(d)

        df = pd.DataFrame.from_dict(dicts)
        all_dfs.append(df)

In [55]:
results_df = pd.concat(all_dfs)
display(results_df)

Unnamed: 0,Victim,Num,deer,frog,cat,dog,bird,horse,truck,airplane,automobile,ship
0,resnet50,92,5.780329e-01,2.406800e-01,6.492062e-02,5.202641e-02,4.186803e-02,1.943978e-02,1.811616e-03,6.673631e-04,3.631178e-04,1.902263e-04
1,original,76,8.481673e-10,9.999982e-01,1.294000e-06,2.423910e-11,4.228060e-07,9.491437e-17,3.321543e-16,1.466065e-15,7.970061e-17,4.427351e-17
2,vgg11,2,1.366353e-02,1.688944e-02,2.773516e-02,1.449320e-02,2.722985e-02,1.005321e-03,1.200539e-01,2.297276e-01,4.188167e-01,1.303853e-01
3,vgg19,77,9.939103e-01,1.342662e-05,1.722902e-04,5.093974e-04,2.696117e-04,5.124941e-03,5.445313e-10,3.314667e-08,2.303649e-12,1.816630e-09
4,resnet34,64,6.071344e-02,1.339891e-01,8.160979e-02,9.301661e-03,1.028031e-01,3.631478e-03,1.711478e-01,1.241747e-01,2.965941e-01,1.603483e-02
...,...,...,...,...,...,...,...,...,...,...,...,...
117,resnet50,51,8.608881e-06,9.978830e-01,1.055011e-03,9.526847e-06,1.043565e-03,1.967823e-07,5.293371e-09,2.790189e-08,3.802171e-09,4.043393e-09
118,original,92,2.889846e-01,5.674812e-01,5.827442e-02,2.250937e-02,4.177467e-02,1.784367e-02,1.584892e-03,1.312245e-03,1.795221e-04,5.550966e-05
119,vgg19,26,1.720007e-08,1.511407e-08,3.120859e-07,1.628501e-08,1.027238e-08,9.628591e-08,9.620988e-01,4.224179e-05,3.773899e-02,1.195544e-04
120,densenet121,0,1.332359e-03,3.084315e-01,4.989193e-01,1.732027e-01,1.748889e-02,1.240357e-04,4.314902e-06,4.826148e-04,6.284149e-06,8.006763e-06


In [58]:
res_df = results_df.set_index(['Victim', 'Num'])
res_df = res_df[sorted(res_df.columns)]
res_df = res_df.sort_index()
display(res_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,airplane,automobile,bird,cat,deer,dog,frog,horse,ship,truck
Victim,Num,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
densenet121,0,4.826148e-04,6.284149e-06,1.748889e-02,4.989193e-01,1.332359e-03,1.732027e-01,3.084315e-01,1.240357e-04,8.006763e-06,4.314902e-06
densenet121,1,3.206015e-02,7.685662e-01,4.195570e-06,3.633246e-06,6.828655e-08,6.848312e-08,7.222313e-08,9.167651e-09,1.866122e-01,1.275344e-02
densenet121,2,4.440393e-01,7.903539e-02,5.172575e-03,4.067995e-03,2.694696e-03,7.866923e-04,1.170938e-03,3.171921e-04,3.714682e-01,9.124711e-02
densenet121,3,8.591389e-06,1.554945e-06,1.014566e-01,9.822901e-03,2.148734e-02,1.364124e-04,8.670562e-01,1.210652e-06,4.373297e-06,2.489878e-05
densenet121,4,5.537146e-04,4.733334e-04,8.563163e-03,1.038006e-01,5.323780e-02,2.088778e-01,5.795472e-01,4.447039e-02,4.614757e-05,4.298761e-04
...,...,...,...,...,...,...,...,...,...,...,...
vgg19,95,4.762987e-02,1.973461e-01,6.770473e-08,4.948343e-07,6.064129e-08,2.086198e-09,2.094480e-08,4.645475e-09,1.428021e-03,7.535953e-01
vgg19,96,1.915767e-09,8.739159e-10,1.808313e-04,9.978156e-01,2.847948e-05,1.160797e-03,8.061575e-04,8.106579e-06,8.707180e-10,3.592796e-09
vgg19,97,9.317530e-01,1.030032e-07,6.267422e-02,3.174469e-04,2.499410e-03,4.538620e-05,2.638367e-03,1.328213e-05,5.406893e-05,4.662164e-06
vgg19,98,3.248759e-02,5.081114e-02,1.839645e-01,1.590495e-01,1.656501e-01,5.731690e-02,1.679779e-01,5.573472e-02,4.623580e-02,8.077198e-02


### Original images

In [77]:
preds_on_original = res_df.loc['original'].to_numpy()
print(preds_on_original.shape)
preds_on_original_int = np.argmax(preds_on_original, axis=-1)
print(preds_on_original_int.shape)
acc_original = accuracy_score(agree_labels, preds_on_original_int)
print('Accuracy on original images:', acc_original)
print(classification_report(agree_labels, preds_on_original_int))

(100, 10)
(100,)
Accuracy on original images: 0.92
              precision    recall  f1-score   support

           0       1.00      0.78      0.88         9
           1       1.00      0.88      0.93         8
           2       0.71      0.83      0.77         6
           3       1.00      0.78      0.88         9
           4       0.78      0.88      0.82         8
           5       1.00      1.00      1.00         9
           6       0.89      1.00      0.94        16
           7       1.00      1.00      1.00        11
           8       1.00      0.93      0.96        14
           9       0.83      1.00      0.91        10

    accuracy                           0.92       100
   macro avg       0.92      0.91      0.91       100
weighted avg       0.93      0.92      0.92       100



### Adversarial examples

In [70]:
models = res_df.index.get_level_values(0).unique().tolist()
models.remove('original')
print(models)

['densenet121', 'densenet161', 'densenet169', 'googlenet', 'inception', 'mobilenet', 'resnet18', 'resnet34', 'resnet50', 'vgg11', 'vgg13', 'vgg16', 'vgg19']


In [79]:
for victim in models:
    print('Adversarial examples generated on: {}'.format(victim))

    preds_on_v = res_df.loc[victim].to_numpy()
    assert preds_on_v.shape == preds_on_original.shape

    preds_on_v_int = np.argmax(preds_on_v, axis=-1)
    acc_on_v = accuracy_score(agree_labels, preds_on_v_int)
    print('Accuracy on adversarial examples:', accuracy_score)
    transfer_v = acc_original - acc_on_v
    print('Transfer success rate:', transfer_v)
    print('Transfer relative accuracy degradation:', transfer_v / acc_original)
    print('-' * 80)
    print()

Adversarial examples generated on: densenet121
Accuracy on adversarial examples: <function accuracy_score at 0x7f125d7f04c0>
Transfer success rate: 0.21000000000000008
Transfer relative accuracy degradation: 0.22826086956521746
--------------------------------------------------------------------------------

Adversarial examples generated on: densenet161
Accuracy on adversarial examples: <function accuracy_score at 0x7f125d7f04c0>
Transfer success rate: 0.24
Transfer relative accuracy degradation: 0.2608695652173913
--------------------------------------------------------------------------------

Adversarial examples generated on: densenet169
Accuracy on adversarial examples: <function accuracy_score at 0x7f125d7f04c0>
Transfer success rate: 0.20000000000000007
Transfer relative accuracy degradation: 0.21739130434782614
--------------------------------------------------------------------------------

Adversarial examples generated on: googlenet
Accuracy on adversarial examples: <functi