In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from collections import OrderedDict, defaultdict
import torch.optim as optim
import time
from src import *
import math
import pickle
import pandas as pd

In [2]:
plt.rc('text', usetex=True)
%config InlineBackend.figure_format = 'retina'
!mkdir -p figures
!mkdir -p snapshots

In [24]:
# parameters
# PGK
ϵ = 8 / 256
ϵ_s = 2 / 256


val_K = 10
EPOCHS = 200
TEST_EVERY = 40

batch_size = 128
pre_train = False

small = False
training_with_replay_Ks = [1, 4, 10, 20]
free_Ks = [1, 2, 4, 10]

    
PGD_Ks = [1, 2]



attack_names = ['FSM', 'PGD-20', 'PGD-100', 'CW-100']
attacks = [
     *[PGD(K, ϵ, 2.5 * ϵ/K) for K in [1, 20, 100]],
     CW(100, 1e4, ϵ, 2.5 * ϵ/ 100)]
    
    
if small:
    EPOCHS = 5
    TEST_EVERY = 5
    training_with_replay_Ks = [1, 5]
    free_Ks = [1, 5]
    attack_names = ['FSM', 'PGD-2', 'CW-2']
    attacks = [
         *[PGD(K, ϵ, 2.5 * ϵ/K) for K in [1, 2]],
         CW(2, 1e4, ϵ, 2.5 * ϵ/ 2)]

In [4]:
assert all(EPOCHS == K * int(EPOCHS / K) for K in training_with_replay_Ks)
assert all(EPOCHS == K * int(EPOCHS / K) for K in free_Ks)

In [5]:
# CIFAR INPUT
transform = transforms.Compose(
    [transforms.RandomCrop(32, padding=4),
     transforms.ToTensor(),
     transforms.RandomHorizontalFlip(p=0.5),
    ])

transform_test = transforms.Compose([
    transforms.ToTensor()
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, 
                                        download=True, transform=transform)
if small:
    trainset = torch.utils.data.Subset(trainset, range(batch_size))

trainloader = torch.utils.data.DataLoader(trainset, 
        batch_size=batch_size,
        shuffle=True, num_workers=4, 
        pin_memory=True, drop_last=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)

if small:
    testset = torch.utils.data.Subset(testset, range(batch_size))
    
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=4)


dataiter = iter(trainloader)
images, labels = dataiter.next()

Files already downloaded and verified
Files already downloaded and verified


In [6]:
norm = StandardScalerLayer(lambda: map(lambda x: x[0], trainloader))

In [7]:
def build_model(ϵ=ϵ, K=1):
    model = WideResNet(28, 10, 10, 0.1)
    adv = AdversarialForFree(ϵ, 0, 1)
    if ϵ not in [0, False]:
        l = [('adv', adv)]
    else:
        l = []
    l.extend([
        ('normalizer', norm),
        ('resnet', model)])
    
    model = nn.Sequential(OrderedDict(l)).cuda()
    
    optimizer = optim.SGD(model.parameters(), 
                          lr=0.1,
                          nesterov=True, 
                          momentum=0.9)
    
    scheduler =  optim.lr_scheduler.MultiStepLR(optimizer, milestones=[60//K, 120//K, 160//K], gamma=0.2)
    
    return model, optimizer, scheduler

imgsize = images.size()[1:]
imgsize

torch.Size([3, 32, 32])

In [8]:
free_logs = defaultdict(lambda : defaultdict(lambda :[]))

for K in free_Ks:
    print(f'\n\n\n\n\ntraining with {K} replays------------------------\n\n\n\n')
    model, optimizer, scheduler = build_model(K=K)
    
    for epoch in range(int(EPOCHS / K)):  # loop over the dataset multiple times
        logs = train_with_replay(K, model, trainloader, optimizer, epoch,
                                after_func=lambda model: model.adv.step())
        free_logs[K]['train'].append(logs)
        
        scheduler.step()
        if (epoch * K + K) % TEST_EVERY == 0:

            logs = run_val(model, testloader, epoch)
            free_logs[K]['test'].append(logs)

            # adv loss
    run_attacks(free_logs[K], attacks, attack_names, model, testloader, epoch)
    

    print('Finished Training')
    torch.save(model.state_dict(), f"snapshots/wresnet-cifar-10-free-{K}.pch")
    del model
    torch.cuda.empty_cache()
    
with open('snapshots/free_logs.pickle', 'wb') as fd:
    pickle.dump(holder_to_dict(free_logs), fd)






training with 1 replays------------------------




train 	 1: 1.4239 48.3% 70.5s
train 	 2: 0.9001 68.2% 70.4s
train 	 3: 0.7045 75.5% 70.5s
train 	 4: 0.5947 79.3% 70.5s
train 	 5: 0.5242 81.7% 70.5s
train 	 6: 0.4688 83.9% 71.0s
train 	 7: 0.4235 85.3% 70.5s
train 	 8: 0.3927 86.3% 70.5s
train 	 9: 0.3571 87.6% 70.5s
train 	 10: 0.3299 88.6% 70.5s
train 	 11: 0.3091 89.2% 70.5s
train 	 12: 0.2870 89.9% 70.5s
train 	 13: 0.2657 90.8% 70.6s
train 	 14: 0.2475 91.4% 70.9s
train 	 15: 0.2348 91.8% 70.5s
train 	 16: 0.2170 92.3% 70.5s
train 	 17: 0.2089 92.7% 70.9s
train 	 18: 0.1956 93.2% 70.5s
train 	 19: 0.1847 93.6% 70.5s
train 	 20: 0.1736 93.8% 70.5s
train 	 21: 0.1627 94.2% 70.5s
train 	 22: 0.1493 94.7% 70.5s
train 	 23: 0.1438 94.9% 70.5s
train 	 24: 0.1347 95.2% 70.5s
train 	 25: 0.1252 95.5% 70.5s
train 	 26: 0.1222 95.7% 70.5s
train 	 27: 0.1158 95.9% 70.5s
train 	 28: 0.1060 96.2% 70.5s
train 	 29: 0.1054 96.3% 70.5s
train 	 30: 0.0952 96.6% 70.5s
train 	 31: 0.0900 96.

In [12]:
#standard training with replay logs
srl = defaultdict(lambda : defaultdict(lambda : []))
for K in [training_with_replay_Ks[0]]:
    print(f'\n\n\n\n\ntraining with {K} replays------------------------\n\n\n\n')

    model, optimizer, scheduler = build_model(False, K=K)
        
    for epoch in range(int(EPOCHS / K)): # loop over the dataset multiple times
            
        logs = train_with_replay(K, model, trainloader, optimizer, epoch)
        
        scheduler.step()
        srl[K]['train'].append(logs)
        if (epoch * K + K) % TEST_EVERY == 0:
            # valdiation loss
            logs = run_val(model, testloader, epoch)
            srl[K]['test'].append(logs)
    run_attacks(srl[K], attacks, 
                attack_names, model, testloader, epoch)
    print('Finished Training')
    torch.save(model.state_dict(), f"wresnet-cifar-10-normal-{K}.pch")
    del model
    torch.cuda.empty_cache()

with open('snapshots/srl.pickle', 'wb') as fd:
    pickle.dump(holder_to_dict(srl), fd)






training with 1 replays------------------------




train 	 1: 1.4033 48.6% 70.5s
train 	 2: 0.8623 69.6% 70.5s
train 	 3: 0.6493 77.6% 70.7s
train 	 4: 0.5334 81.4% 70.5s
train 	 5: 0.4524 84.2% 70.5s
train 	 6: 0.4021 86.0% 70.5s
train 	 7: 0.3554 87.7% 70.5s
train 	 8: 0.3164 89.2% 70.5s
train 	 9: 0.2872 90.1% 70.5s
train 	 10: 0.2611 90.9% 70.5s
train 	 11: 0.2374 91.8% 70.5s
train 	 12: 0.2204 92.2% 70.5s
train 	 13: 0.1986 93.0% 70.5s
train 	 14: 0.1839 93.5% 70.5s
train 	 15: 0.1719 93.9% 70.5s
train 	 16: 0.1539 94.6% 70.4s
train 	 17: 0.1418 95.1% 70.4s
train 	 18: 0.1302 95.4% 70.8s
train 	 19: 0.1231 95.7% 70.4s
train 	 20: 0.1162 95.8% 70.4s
train 	 21: 0.1056 96.3% 70.6s
train 	 22: 0.0957 96.6% 70.5s
train 	 23: 0.0918 96.7% 70.4s
train 	 24: 0.0814 97.1% 70.4s
train 	 25: 0.0770 97.3% 70.4s
train 	 26: 0.0726 97.4% 70.4s
train 	 27: 0.0656 97.7% 70.4s
train 	 28: 0.0610 97.9% 70.4s
train 	 29: 0.0594 97.9% 70.4s
train 	 30: 0.0519 98.2% 70.4s
train 	 31: 0.0539 98.

In [27]:
pgd_logs = defaultdict(lambda : defaultdict(lambda : []))
for K in PGD_Ks:
    print(f'\n\n\n\n\ntraining with {K}-PGD------------------------\n\n\n\n')
    model, optimizer, scheduler = build_model(False)
    
    attack = PGD(K, ϵ, 2.5 * ϵ / K)
    
    for epoch in range(EPOCHS):  # loop over the dataset multiple times
        
        
        

        logs = train_with_replay(1, 
             model, 
             trainloader, 
             optimizer,
             epoch,
             input_func=lambda inputs, labels: attack(model, inputs, labels))
        pgd_logs[K]['train'].append(logs)
        
        scheduler.step()
        if (epoch + 1) % TEST_EVERY == 0:
            logs = run_val(model, testloader, epoch)
            pgd_logs[K]['test'].append(logs)

        if (epoch + 1) == math.ceil(EPOCHS / (K + 1)):
            if (epoch + 1) % TEST_EVERY != 0:
                logs = run_val(model, testloader, epoch)
                pgd_logs[K]['test'].append(logs)
            # for K = 1 we want to test at epoch 99 -> True
            # for K = 2 we want to test at epoch 67 -> True
            
            run_attacks(pgd_logs[K], attacks, 
                attack_names, model, testloader, epoch)
            
    run_attacks(pgd_logs[K], attacks, 
                attack_names, model, testloader, epoch)

    print('Finished Training')
    torch.save(model.state_dict(), f"snapshots/wresnet-cifar-10-pgk-{K}.pch")
    del model
    torch.cuda.empty_cache()
    
with open('snapshots/pgd_logs.pickle', 'wb') as fd:
    pickle.dump(holder_to_dict(pgd_logs), fd)






training with 1-PGD------------------------




train 	 1: 2.0247 23.6% 136.0s
train 	 2: 1.8348 30.4% 137.3s
train 	 3: 1.7204 34.4% 138.2s
train 	 4: 1.6313 38.0% 137.5s
train 	 5: 1.5662 40.5% 137.5s
train 	 6: 1.5104 42.6% 137.5s
train 	 7: 1.4617 44.6% 137.5s
train 	 8: 1.4238 46.2% 137.5s
train 	 9: 1.3879 47.4% 137.5s
train 	 10: 1.3541 48.6% 137.6s
train 	 11: 1.3222 49.9% 137.6s
train 	 12: 1.2946 50.7% 137.5s
train 	 13: 1.2669 51.8% 137.5s
train 	 14: 1.2390 52.7% 137.5s
train 	 15: 1.2097 53.7% 137.5s
train 	 16: 1.1880 54.7% 137.5s
train 	 17: 1.1607 55.7% 137.5s
train 	 18: 0.9996 61.9% 137.5s
train 	 19: 0.4728 83.3% 137.6s
train 	 20: 0.3548 87.6% 137.6s
train 	 21: 0.2962 89.7% 137.5s
train 	 22: 0.2675 90.7% 137.6s
train 	 23: 0.2267 92.1% 137.5s
train 	 24: 0.2015 93.1% 137.5s
train 	 25: 0.1866 93.4% 137.6s
train 	 26: 0.1765 93.9% 137.5s
train 	 27: 0.1535 94.6% 138.2s
train 	 28: 0.1460 94.9% 137.5s
train 	 29: 0.1162 96.0% 137.5s
train 	 30: 0.1097 96.1% 137

In [56]:
fmt = lambda x: f'$${x * 100:.2f}\%$$'
d = {}
def get_good_log(x, K):
    return sorted(x, key=lambda j: abs((j.epoch + 1) - (EPOCHS // (K+1))))[-1]
d['Training'] = ['Natural', 
         *[f'Free $m={K}$' for K in free_Ks],
         *[f'{K}-PGD' for K in PGD_Ks],
         *[f'{K}-PGD(EPOCH/K)' for K in PGD_Ks]]

x = [srl[1]['test'][-1].acc,
         *[free_logs[K]['test'][-1].acc for K in free_Ks],
         *[pgd_logs[K]['test'][-1].acc for K in PGD_Ks],
        *[get_good_log(pgd_logs[K]['test'], K).acc for K in PGD_Ks]]

d['Natural Images'] = list(map(fmt, x))
            
for name in attack_names:
    n = f'adv_test/{name}'
    
    x = [srl[1][n][-1].acc]
    
    for K in free_Ks:
        x.append(free_logs[K][n][-1].acc)
    
    for K in PGD_Ks:
        x.append(pgd_logs[K][n][-1].acc)
    for K in PGD_Ks:
        x.append(pgd_logs[K][n][-2].acc)
    d[name] = list(map(fmt, x))
        
tt = lambda x: sum(i.time for i in x)
fmt = lambda x: f'$${math.ceil(x / 60)}$$'
x = [srl[1]['train'],
    *[free_logs[K]['train'] for K in free_Ks],
    *[pgd_logs[K]['train'] for K in PGD_Ks],
    *[pgd_logs[K]['train'][:math.ceil(EPOCHS / (K + 1))] for K in PGD_Ks]]

d['Training Time(M)'] = list(map(lambda x: fmt(tt(x)), x))

df = pd.DataFrame(d)

df

Unnamed: 0,Training,Natural Images,FSM,PGD-20,PGD-100,CW-100,Training Time(M)
0,Natural,$$94.01\%$$,$$10.77\%$$,$$0.00\%$$,$$0.00\%$$,$$0.00\%$$,$$235$$
1,Free $m=1$,$$93.60\%$$,$$15.33\%$$,$$0.58\%$$,$$0.54\%$$,$$0.39\%$$,$$236$$
2,Free $m=2$,$$89.98\%$$,$$43.26\%$$,$$29.28\%$$,$$29.02\%$$,$$29.50\%$$,$$235$$
3,Free $m=4$,$$86.72\%$$,$$51.03\%$$,$$41.44\%$$,$$41.31\%$$,$$42.19\%$$,$$235$$
4,Free $m=10$,$$82.49\%$$,$$52.89\%$$,$$47.08\%$$,$$47.02\%$$,$$46.44\%$$,$$235$$
5,1-PGD,$$88.54\%$$,$$97.65\%$$,$$0.01\%$$,$$0.01\%$$,$$0.01\%$$,$$459$$
6,2-PGD,$$85.53\%$$,$$54.24\%$$,$$42.90\%$$,$$42.48\%$$,$$42.43\%$$,$$680$$
7,1-PGD(EPOCH/K),$$88.54\%$$,$$97.74\%$$,$$0.01\%$$,$$0.01\%$$,$$0.01\%$$,$$230$$
8,2-PGD(EPOCH/K),$$85.53\%$$,$$54.36\%$$,$$44.38\%$$,$$44.00\%$$,$$44.88\%$$,$$228$$


In [57]:
df.to_csv('figures/grid.csv')

In [58]:
df.to_latex('figures/grid.tex')

In [None]:

for K in training_with_replay_Ks[1:]:
    print(f'\n\n\n\n\ntraining with {K} replays------------------------\n\n\n\n')

    model, optimizer, scheduler = build_model(False, K=K)
        
    for epoch in range(int(EPOCHS / K)): # loop over the dataset multiple times
            
        logs = train_with_replay(K, model, trainloader, optimizer, epoch)
        
        scheduler.step()
        srl[K]['train'].append(logs)
        if (epoch * K + K) % TEST_EVERY == 0:
            # valdiation loss
            logs = run_val(model, testloader, epoch)
            srl[K]['test'].append(logs)
    run_attacks(srl[K], attacks, 
                attack_names, model, testloader, epoch)
    print('Finished Training')
    torch.save(model.state_dict(), f"wresnet-cifar-10-normal-{K}.pch")
    del model
    torch.cuda.empty_cache()

with open('snapshots/srl.pickle', 'wb') as fd:
    pickle.dump(holder_to_dict(srl), fd)

In [None]:
fig, (ax2, ax1) = plt.subplots(ncols=2, figsize=(15,7))

y = [srl[K]["test"][-1].acc * 100 for K in training_with_replay_Ks]
bars = ax1.bar([f'$m={K}$' for K in training_with_replay_Ks], y)
for (i, bar) in zip(y, bars):
    t = ax1.text(bar.get_x() + bar.get_width() /2 - 0.07 , bar.get_height() + 0.10, f'{i:0.1f}%')
for ax in [ax1, ax2]:
    ax.set_xlabel('number of replay steps $m$')
ax1.set_ylabel('validation accuracy ($\%$)')

ax2.set_ylabel('validation loss (KL)')
y = [srl[K]["test"][-1].loss for K in training_with_replay_Ks]
bars = ax2.bar([f'$m={K}$' for K in training_with_replay_Ks], y)
for (i, bar) in zip(y, bars):
    t = ax2.text(bar.get_x() + bar.get_width() /2 - 0.07 , bar.get_height() + 0.10, f'{i:0.1f}')
def savefig(fig, name, f=['svg', 'pdf', 'png']):
    for e in f:
        fig.savefig('figures/' + name + '.' + e)
savefig(fig, 'cost_of_replay')
