In [1]:
from cleverhans.torch.attacks.fast_gradient_method import fast_gradient_method 
from cleverhans.torch.attacks.projected_gradient_descent import projected_gradient_descent
from cleverhans.torch.attacks.noise import noise 
from cleverhans.torch.attacks.hop_skip_jump_attack import hop_skip_jump_attack 
from cleverhans.torch.attacks.spsa import spsa 
from cleverhans.torch.attacks.carlini_wagner_l2 import carlini_wagner_l2 
from cleverhans.torch.attacks.sparse_l1_descent import sparse_l1_descent 


In [2]:
from models import *
import regularizer_losts as rl
from torchvision import datasets, transforms
from torch import optim
import torch
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm
import copy
import utils as CFI_utils
from matplotlib import pyplot as plt
import numpy as np
from collections import defaultdict
from datetime import datetime
import seaborn as sn
import pandas as pd


from absl import app, flags
from easydict import EasyDict
import torch.nn as nn

%matplotlib inline

import seaborn as sns

from custom_transform import *
from transforms import TRANSFORMS
from mask import *
import imageio
from utils import generate_gif
import os


In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
   
    transforms.Normalize((0.1307,), (0.3081,))
   
  
])


epochs = 10
batch_size = 1000
test_batch_size = 10000
stable_batch_size = 60000

use_cuda = torch.cuda.is_available()
print(use_cuda)

#torch specific configs
torch.manual_seed(1)

device = torch.device("cuda" if use_cuda else "cpu")

train_kwargs = {'batch_size': batch_size}

test_kwargs = {'batch_size': test_batch_size}
stable_kwargs = {'batch_size': stable_batch_size}

if use_cuda:
    cuda_kwargs = {'num_workers': 1,
                   'pin_memory': True,
                   'shuffle': True}
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)
    stable_kwargs.update(cuda_kwargs)
    
    
#init stuffs

LOADPATH = 'FFN18_28_21'

LAST_N_EPOCHS = 10

dataset1 = datasets.MNIST('./data', train=True, download=False,
                          transform=transform)
dataset2 = datasets.MNIST('./data', train=False, download=False,
                          transform=transform)



modification_string = ""
train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

model = FeedforwardNeuralNetModel(28*28, 128, 10).to(device)
model.load_state_dict(torch.load(LOADPATH, map_location=device))



False


<All keys matched successfully>

## Check pattern overlap ratio between random two images. The pattern here includes all the neurons.

In [4]:
layers = ['fc1', 'fc2', 'fc3', 'fc4']

# Check average overlap ratio on 200 random pairs
pair_num = 200

list1 = np.random.choice(range(10000), pair_num, replace=False)
list2 = np.random.choice(range(10000), pair_num, replace=False)

avg_pair_check_ratio = 0

for idx1, idx2 in zip(list1, list2):
    pair_imgs = torch.utils.data.Subset(dataset2, [idx1, idx2])
    (pair_img1, pair_target1) = pair_imgs[0]
    (pair_img2, pair_target2) = pair_imgs[1]
    
    pair_img1_extend = pair_img1[None, :]
    pair_img2_extend = pair_img2[None, :]
    
    pair_img1_pattern = model.get_pattern(pair_img1_extend, layers, device, flatten=True)
    pair_img2_pattern = model.get_pattern(pair_img2_extend, layers, device, flatten=True)
    
    pair_imgs_pattern_check = pair_img1_pattern == pair_img2_pattern
    
    pair_overlap_ratio = pair_imgs_pattern_check.sum() / len(pair_imgs_pattern_check[0])
    
    avg_pair_check_ratio += pair_overlap_ratio

    
avg_pair_check_ratio = avg_pair_check_ratio / pair_num

print(f"\nAverage pattern overlap ratio over {pair_num} random pairs {avg_pair_check_ratio}")


Average pattern overlap ratio over 200 random pairs 0.8023908296943236


## Checking the pattern overlap ratio between $\alpha I$ and $I$, the pattern includes all the neurons
Sample a list of $\alpha$'s on $[0,1]$, checking the pattern overlap ratio between $\alpha I$ and $I$, find the average $\alpha$ for the ratio can pass some given threshold $r$

#### threshold $r = 1$, i.e. the patterns of $\alpha I$ and $I$ has to be exactly same

In [14]:
###########################Set parames###########################
num_sample = 20 # number of alpha values to sample between [0,1]
num_check = 200 # number of images to check

thrd = 1
print(f"Threshold: {thrd}\n")
###################################################################


alps = np.linspace(0, 1, num_sample)
alps = np.sort(alps) 
print(f"alphas picked: \n{alps} \n")


img_idxes = np.random.choice(range(10000), num_check, replace=False)
check_set = torch.utils.data.Subset(dataset2, img_idxes)
print(f"indices of {num_check} images going to be checked: \n {img_idxes}")




avg_alp_satified_thrd = 0

for (rdm_img, rdm_target) in check_set:

    rdm_img_extend = rdm_img[None, :]

    img_pattern = model.get_pattern(rdm_img_extend, layers, device, flatten=True)
    
    satify_thrd = False
    for i in range(len(alps)):
        a = alps[i]
        if a == 0:
            continue
        discounted_img = a*rdm_img_extend

        discounted_img_pattern = model.get_pattern(discounted_img, layers, device, flatten=True)
        pattern_check = img_pattern == discounted_img_pattern
        pattern_check_ratio = pattern_check.sum() / len(pattern_check[0])
        
        if pattern_check_ratio >= thrd and not satify_thrd:
            avg_alp_satified_thrd += a
            satify_thrd = True
    
#         print("pattern check ratio:", pattern_check_ratio)
#         print("number of neuron in difference:", len(pattern_check[0])-pattern_check.sum() )

avg_alp_satified_thrd = avg_alp_satified_thrd / num_check
print(f"\n Avg alpha for satisfying threshold: {avg_alp_satified_thrd}")

Threshold: 1

alphas picked: 
[0.         0.05263158 0.10526316 0.15789474 0.21052632 0.26315789
 0.31578947 0.36842105 0.42105263 0.47368421 0.52631579 0.57894737
 0.63157895 0.68421053 0.73684211 0.78947368 0.84210526 0.89473684
 0.94736842 1.        ] 

indices of 200 images going to be checked: 
 [8970  733 6442 1234 1462 7431 8984 5069 2206 7289 6711 9318 5536 6413
 5912 2045 4236 3238 7868 5484 6081 4401 6751 1506 4264 2052 8174 4092
  444 7685 7726 1265 4656 9439 1731 7368 4261 2360 3407  732  308 7036
 7806 6540 3041 5988 2097 7442 7445 3558 8155 5034 9743 1565 3267 7409
 5573 1569  235 7028 9400 3207 6948 9250 4865 5459 2330 1072 8246 8602
 8344 4568 9544 4172 7230 7435 3989 2711 3786 9085 6405  527 7314 2088
 5489 4227 5413 7622 7515 1918 7938 6058 1231 7303 3069  345 1542 4968
 9051 3368 2657 8905 2223 4144 1843 8341  188 7691 1873 9662 6949 2185
 8090 8202 7720 3901 7793 2048  248 7395 3561 8651  286  859 5946  994
 9681 3982  595 7787 6658 1668  644  471 6873 8379 3976  23

## Check the range for pattern overlap ratio does not satisfy the threshold anymore
Setting two thresholds $r_{low}$ and $r_{high}$ and find the range of the $\alpha$'s that can still make the overlap ration between $\alpha I$ and $I$ satisfies the threshold.
- $r_{low}$ is for overlap ratio obtained on $\alpha \in [0,1]$ and $r_{high}$ is for overlap ratio obtained on $\alpha \in [1,\infty]$
- Since when $\alpha$ moves from 0 to 1, the ratio will increase and reach 1 at $\alpha=1$, when $\alpha$ continues to increase, i.e. $1 \to \infty $, it will decrease again.

##### Observation of results from below:
- If we set the high threshold to be anything smaller than 1 like 0.97, it will very difficult to find the breaking point alpha (it could be super large value like more than 7 digits)
- So instead, we have the high threshold to be 1.0. We set the number of samples we check to get the average to be 100. And the result os around 1.82.
- The low threshold, is around 0.77.
- Since the step value is too small, we set the limitation to be 20.
- Need to define what is considered as break?

In [15]:
##########################Set params###########################

# number of images to check
num_check = 10 

# setting the step of increasing the value of alpha on [0,1] and [1, infinity] respectively
# instead of using the np.linspace() to sample the alphas, we use step here
alps_step_low = 0.05
alps_step_high = 0.01

low_thrd = 1.0
high_thrd = 1.0

avg_alp_satified_low_thrd = 0
avg_alp_satified_high_thrd = 0

# upperbound alpha value for checking on [1, infinity], since it is possible that this checking goes infinity
alps_upperbound = 20

###############################################################

# count for NOT be able to find the alpha value satisfying the threshold on [1, infinity]
count_high_out_of_bound = 0


img_idxes = np.random.choice(range(10000), num_check, replace=False)
check_set = torch.utils.data.Subset(dataset2, img_idxes)
print(f"indices of {num_check} images going to be checked: \n {img_idxes}")



for (rdm_img, rdm_target) in check_set:
    print("====================")

    rdm_img_extend = rdm_img[None, :]

    img_pattern = model.get_pattern(rdm_img_extend, layers, device, flatten=True)
    
    satify_low_thrd = False
    satify_high_thrd = False
    
    alp = 0
    
    while not satify_high_thrd:
        discounted_img = alp * rdm_img_extend

        discounted_img_pattern = model.get_pattern(discounted_img, layers, device, flatten=True)
        pattern_check = img_pattern == discounted_img_pattern
        pattern_check_ratio = pattern_check.sum() / len(pattern_check[0])
        
        if alp > 1 and not satify_low_thrd:
            print("PROBLEM!!!!!", pattern_check_ratio)
        
        if pattern_check_ratio >= low_thrd and not satify_low_thrd:
            avg_alp_satified_low_thrd += alp
            satify_low_thrd = True
            print(f"find low: {alp}")
            alp = 1
            
        elif satify_low_thrd and not satify_high_thrd and pattern_check_ratio < high_thrd:
            avg_alp_satified_high_thrd += alp
            satify_high_thrd = True
            print(f"find high: {alp}")
            
        elif satify_low_thrd and not satify_high_thrd and alp >= alps_upperbound:
            count_high_out_of_bound += 1
            break
        
#         print("pattern check ratio:", pattern_check_ratio)
#         print("number of neuron in difference:", len(pattern_check[0])-pattern_check.sum() )
            
        if not satify_low_thrd:
            alp += alps_step_low
            alp = min(1, alp)
        else:
            alp += alps_step_high
        

avg_alp_satified_low_thrd = avg_alp_satified_low_thrd / num_check
avg_alp_satified_high_thrd = avg_alp_satified_high_thrd / (num_check - count_high_out_of_bound) if (num_check - count_high_out_of_bound) != 0 else None 

print(f"Totally {count_high_out_of_bound} samples cannot find the break point for high bound.")
print(f"\n Avg alpha for satisfying the low threshold: {avg_alp_satified_low_thrd}")
print(f"\n Avg alpha for satisfying the high threshold: {avg_alp_satified_high_thrd}")

indices of 10 images going to be checked: 
 [1309 2606 8849 2251 1981 1880 2638 3670 6766 6584]
find low: 0.7500000000000001
find low: 0.8000000000000002
find high: 1.06
find low: 0.6
find high: 1.1
find low: 0.7500000000000001
find high: 1.03
find low: 0.9000000000000002
find high: 1.1
find low: 0.9000000000000002
find high: 1.1600000000000001
find low: 0.6
find high: 2.0100000000000007
find low: 0.5499999999999999
find high: 1.2700000000000002
find low: 0.7000000000000001
find low: 0.8500000000000002
find high: 2.0100000000000007
Totally 2 samples cannot find the break point for high bound.

 Avg alpha for satisfying the low threshold: 0.7400000000000001

 Avg alpha for satisfying the high threshold: 1.3425000000000002
