In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim

from src.models import CifarResNet, MNIST_CNN, CIFAR_CNN
from src.helpers import evaluate_rob_accuracy, evaluate_clean_accuracy, load_model, safe_model,_evaluate_model
from src.data_loader import load_torchvision_dataset, load_imagenette
#from src.pruning import identify_layers, _evaluate_sparsity

import time

if torch.cuda.is_available() == True:
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")
print(device)
dtype = torch.float32

cuda:0


In [1]:
import pandas as pd

In [4]:
pd.read_pickle('./results/preliminary-double.pkl')

Unnamed: 0,l2_robustness,linf_robustness,clean_accuracy
1,70.117186,67.382809,67.9
2,68.945312,66.015623,69.22
4,68.554687,64.062499,68.67
8,65.82031,62.499996,65.39
16,57.226559,52.343747,55.59


In [18]:

evaluate_clean_accuracy(model.to(device), test_loader, device)

(10.76, 0.0)

In [10]:
from foolbox import PyTorchModel, accuracy, samples
from foolbox.attacks import LinfPGD, FGSM, L0BrendelBethgeAttack, L2CarliniWagnerAttack

epochs = 500


def run(training_method):
    model = CIFAR_CNN().to(device)
    #print(model.device)
    compression_rates = [1,2,4,8,16]
    stats = {}
    for ratio in compression_rates:
        print('compression rate: ', 1-1/ratio)
        fit = get_train_method(model, training_method)
        model.prune_magnitude_global_unstruct(1-1/ratio, device)
        #print(fit)
        train_data = fit(train_loader, test_loader, epochs, device, eps=8/255, patience=5)
        images, labels = next(iter(test_loader))
        images, labels = images.to(device), labels.to(device)
        #stats[f'ratio']['l0_robustness'] = bb_attack(model, images, labels).item()
        #print('bb done')
        stats[f'{ratio}'] = {}
        stats[f'{ratio}']['l2_robustness'] = cw_attack(model, images, labels).item()
        print('cw done')
        stats[f'{ratio}']['linf_robustness'] = pgd_attack(model, images, labels).item()
        print('pgd done')
        stats[f'{ratio}']['clean_accuracy'] = train_data['val_accuracy']
        
    return(stats)
        
        
        

def get_train_method(model, method):
    if method=='standard':
        return model.fit
    if method=='free':
        return model.fit_free
    if method=='fast':
        return model.fit_fast
    if method=='fast_double':
        return model.fit_fast_with_double_update

def bb_attack(model, images, labels, eps=8/255):
    model.eval()
    fmodel = PyTorchModel(model, bounds=(0, 1))
    attack = L0BrendelBethgeAttack()
    raw_advs, clipped_advs, success = attack(fmodel, images, labels, epsilons=eps)
    model.train()

    return (1 - torch.sum(success)/len(success)) / 100

def cw_attack(model, images, labels, eps=8/255):
    model.eval()
    fmodel = PyTorchModel(model, bounds=(0, 1))
    attack = L2CarliniWagnerAttack()
    raw_advs, clipped_advs, success = attack(fmodel, images, labels, epsilons=eps)
    model.train()

    return (1 - torch.sum(success)/len(success)) / 100

def pgd_attack(model, images, labels, eps=8/255):
    model.eval()
    fmodel = PyTorchModel(model, bounds=(0, 1))
    attack = LinfPGD()
    raw_advs, clipped_advs, success = attack(fmodel, images, labels, epsilons=eps)
    model.train()

    return (1 - torch.sum(success)/len(success)) / 100

# Double Update vs. Single Update Fast Adversarial Training
Specs:

CIFAR CNN: 4 Conv (16,16,32,32) with batchnorm, 2 FC (128,10)

Data: Cifar (32,32,3)

30 Epochs with eps=8/255



Standard Fast Adv. Training:
Clean: 63.05%
Robust: 59.34%

Fast Adv. Training w Double Update:
Clean: 63.99%
Robust: 61.35%




# Initialization

In [11]:
#model = ResNet()
#model = MNIST_CNN()
model = CIFAR_CNN()

identifying layers


In [12]:
train_loader, test_loader = load_torchvision_dataset('CIFAR10')

Files already downloaded and verified
Files already downloaded and verified


In [11]:
PATH = './saved-models/CIFAR-baseline-150-epochs.pth'
model = load_model(model, PATH)

# Experiment


1. Prune
2. Train
3. measure robust accuracy


In [13]:
standard_stats = run('standard')

identifying layers
compression rate:  0.0
[1,     1] loss: 3.07543, train_accuracy: 8.59
[1,     2] loss: 2.74960, train_accuracy: 11.13
[1,     3] loss: 2.46489, train_accuracy: 17.19
[1,     4] loss: 2.45922, train_accuracy: 21.09
[1,     5] loss: 2.41755, train_accuracy: 20.90
[1,     6] loss: 2.31154, train_accuracy: 22.66
[1,     7] loss: 2.24753, train_accuracy: 22.66
[1,     8] loss: 2.26820, train_accuracy: 20.70
[1,     9] loss: 2.13691, train_accuracy: 26.95
[1,    10] loss: 2.09834, train_accuracy: 24.80
[1,    11] loss: 2.12638, train_accuracy: 28.32
[1,    12] loss: 2.06679, train_accuracy: 31.25
[1,    13] loss: 2.05545, train_accuracy: 27.34
[1,    14] loss: 1.94770, train_accuracy: 33.20
[1,    15] loss: 1.88556, train_accuracy: 34.38
[1,    16] loss: 1.91349, train_accuracy: 31.64
[1,    17] loss: 1.89596, train_accuracy: 32.62
[1,    18] loss: 1.91558, train_accuracy: 31.45
[1,    19] loss: 1.85688, train_accuracy: 34.96
[1,    20] loss: 1.89446, train_accuracy: 38.09

[2,    72] loss: 1.25887, train_accuracy: 53.71
[2,    73] loss: 1.26233, train_accuracy: 55.66
[2,    74] loss: 1.21899, train_accuracy: 56.45
[2,    75] loss: 1.30196, train_accuracy: 54.69
[2,    76] loss: 1.26409, train_accuracy: 56.45
[2,    77] loss: 1.27879, train_accuracy: 53.32
[2,    78] loss: 1.30535, train_accuracy: 53.71
[2,    79] loss: 1.21662, train_accuracy: 53.32
[2,    80] loss: 1.31086, train_accuracy: 55.66
[2,    81] loss: 1.26724, train_accuracy: 54.10
[2,    82] loss: 1.20118, train_accuracy: 57.23
[2,    83] loss: 1.19477, train_accuracy: 58.40
[2,    84] loss: 1.27326, train_accuracy: 52.93
[2,    85] loss: 1.33639, train_accuracy: 53.71
[2,    86] loss: 1.32417, train_accuracy: 52.15
[2,    87] loss: 1.21581, train_accuracy: 53.52
[2,    88] loss: 1.30643, train_accuracy: 53.12
[2,    89] loss: 1.37992, train_accuracy: 52.15
[2,    90] loss: 1.22989, train_accuracy: 55.27
[2,    91] loss: 1.25408, train_accuracy: 55.08
[2,    92] loss: 1.20920, train_accuracy

[4,    42] loss: 1.03705, train_accuracy: 62.89
[4,    43] loss: 1.07753, train_accuracy: 63.28
[4,    44] loss: 1.02250, train_accuracy: 63.87
[4,    45] loss: 0.98614, train_accuracy: 65.43
[4,    46] loss: 0.99279, train_accuracy: 66.60
[4,    47] loss: 0.95851, train_accuracy: 65.23
[4,    48] loss: 1.06056, train_accuracy: 64.84
[4,    49] loss: 1.00350, train_accuracy: 64.26
[4,    50] loss: 0.94727, train_accuracy: 65.62
[4,    51] loss: 1.05110, train_accuracy: 64.45
[4,    52] loss: 0.99912, train_accuracy: 63.09
[4,    53] loss: 1.02604, train_accuracy: 63.67
[4,    54] loss: 1.04325, train_accuracy: 61.13
[4,    55] loss: 1.05767, train_accuracy: 61.52
[4,    56] loss: 1.04199, train_accuracy: 61.72
[4,    57] loss: 1.01788, train_accuracy: 62.30
[4,    58] loss: 1.06780, train_accuracy: 62.89
[4,    59] loss: 1.04485, train_accuracy: 64.26
[4,    60] loss: 1.05533, train_accuracy: 62.30
[4,    61] loss: 0.97149, train_accuracy: 66.80
[4,    62] loss: 0.99877, train_accuracy

[6,    12] loss: 0.87107, train_accuracy: 70.51
[6,    13] loss: 0.89852, train_accuracy: 67.19
[6,    14] loss: 0.86627, train_accuracy: 70.90
[6,    15] loss: 0.84164, train_accuracy: 70.70
[6,    16] loss: 0.79746, train_accuracy: 71.09
[6,    17] loss: 0.83180, train_accuracy: 68.55
[6,    18] loss: 0.80388, train_accuracy: 71.29
[6,    19] loss: 0.92591, train_accuracy: 66.80
[6,    20] loss: 0.86026, train_accuracy: 68.36
[6,    21] loss: 0.85710, train_accuracy: 70.12
[6,    22] loss: 0.77981, train_accuracy: 71.68
[6,    23] loss: 0.96540, train_accuracy: 66.60
[6,    24] loss: 0.88558, train_accuracy: 68.55
[6,    25] loss: 0.86126, train_accuracy: 70.51
[6,    26] loss: 0.81775, train_accuracy: 73.24
[6,    27] loss: 0.80494, train_accuracy: 70.51
[6,    28] loss: 0.90528, train_accuracy: 69.34
[6,    29] loss: 0.90150, train_accuracy: 68.36
[6,    30] loss: 0.85550, train_accuracy: 70.31
[6,    31] loss: 0.83009, train_accuracy: 73.05
[6,    32] loss: 0.88334, train_accuracy

[7,    84] loss: 0.80167, train_accuracy: 72.07
[7,    85] loss: 0.89240, train_accuracy: 67.97
[7,    86] loss: 0.87248, train_accuracy: 70.12
[7,    87] loss: 0.78617, train_accuracy: 73.83
[7,    88] loss: 0.77728, train_accuracy: 74.02
[7,    89] loss: 0.78611, train_accuracy: 70.12
[7,    90] loss: 0.77112, train_accuracy: 73.44
[7,    91] loss: 0.73884, train_accuracy: 72.66
[7,    92] loss: 0.80555, train_accuracy: 68.55
[7,    93] loss: 0.89909, train_accuracy: 66.99
[7,    94] loss: 0.80103, train_accuracy: 73.63
[7,    95] loss: 0.81962, train_accuracy: 71.68
[7,    96] loss: 0.84272, train_accuracy: 68.55
[7,    97] loss: 0.85348, train_accuracy: 71.09
[7,    98] loss: 0.89267, train_accuracy: 69.94
duration: 11 s - train loss: 0.79816 - train accuracy: 72.18 - validation loss: 1.09 - validation accuracy: 62.13 
[8,     1] loss: 0.73178, train_accuracy: 76.37
[8,     2] loss: 0.75952, train_accuracy: 72.85
[8,     3] loss: 0.70229, train_accuracy: 75.59
[8,     4] loss: 0.75

[9,    54] loss: 0.71505, train_accuracy: 75.20
[9,    55] loss: 0.71357, train_accuracy: 74.61
[9,    56] loss: 0.66897, train_accuracy: 75.39
[9,    57] loss: 0.72289, train_accuracy: 76.37
[9,    58] loss: 0.71005, train_accuracy: 74.61
[9,    59] loss: 0.59077, train_accuracy: 81.45
[9,    60] loss: 0.69094, train_accuracy: 74.61
[9,    61] loss: 0.71042, train_accuracy: 74.41
[9,    62] loss: 0.74615, train_accuracy: 73.63
[9,    63] loss: 0.64629, train_accuracy: 76.95
[9,    64] loss: 0.64369, train_accuracy: 79.10
[9,    65] loss: 0.69775, train_accuracy: 76.17
[9,    66] loss: 0.69385, train_accuracy: 78.71
[9,    67] loss: 0.63370, train_accuracy: 77.54
[9,    68] loss: 0.67440, train_accuracy: 75.20
[9,    69] loss: 0.69004, train_accuracy: 76.95
[9,    70] loss: 0.81231, train_accuracy: 73.83
[9,    71] loss: 0.74517, train_accuracy: 75.00
[9,    72] loss: 0.71794, train_accuracy: 74.80
[9,    73] loss: 0.75695, train_accuracy: 75.59
[9,    74] loss: 0.64292, train_accuracy

[11,    22] loss: 0.52438, train_accuracy: 83.20
[11,    23] loss: 0.61501, train_accuracy: 80.08
[11,    24] loss: 0.51271, train_accuracy: 81.64
[11,    25] loss: 0.58019, train_accuracy: 80.47
[11,    26] loss: 0.49126, train_accuracy: 83.20
[11,    27] loss: 0.50594, train_accuracy: 83.01
[11,    28] loss: 0.54975, train_accuracy: 82.81
[11,    29] loss: 0.55544, train_accuracy: 82.81
[11,    30] loss: 0.58013, train_accuracy: 80.47
[11,    31] loss: 0.53824, train_accuracy: 82.62
[11,    32] loss: 0.54523, train_accuracy: 81.45
[11,    33] loss: 0.55015, train_accuracy: 81.84
[11,    34] loss: 0.53555, train_accuracy: 83.01
[11,    35] loss: 0.57592, train_accuracy: 79.88
[11,    36] loss: 0.55251, train_accuracy: 82.42
[11,    37] loss: 0.54338, train_accuracy: 80.27
[11,    38] loss: 0.56183, train_accuracy: 82.62
[11,    39] loss: 0.54718, train_accuracy: 80.66
[11,    40] loss: 0.53383, train_accuracy: 80.47
[11,    41] loss: 0.52785, train_accuracy: 80.86
[11,    42] loss: 0.

[12,    90] loss: 0.57979, train_accuracy: 80.66
[12,    91] loss: 0.54003, train_accuracy: 81.84
[12,    92] loss: 0.55166, train_accuracy: 81.05
[12,    93] loss: 0.57747, train_accuracy: 79.49
[12,    94] loss: 0.58432, train_accuracy: 80.08
[12,    95] loss: 0.56046, train_accuracy: 79.88
[12,    96] loss: 0.56537, train_accuracy: 78.32
[12,    97] loss: 0.52539, train_accuracy: 82.42
[12,    98] loss: 0.59180, train_accuracy: 77.68
duration: 11 s - train loss: 0.51278 - train accuracy: 82.45 - validation loss: 1.17 - validation accuracy: 62.48 
[13,     1] loss: 0.47505, train_accuracy: 84.18
[13,     2] loss: 0.45384, train_accuracy: 87.30
[13,     3] loss: 0.43835, train_accuracy: 87.11
[13,     4] loss: 0.48637, train_accuracy: 83.79
[13,     5] loss: 0.43700, train_accuracy: 85.74
[13,     6] loss: 0.42396, train_accuracy: 86.52
[13,     7] loss: 0.45408, train_accuracy: 85.94
[13,     8] loss: 0.41696, train_accuracy: 85.94
[13,     9] loss: 0.44039, train_accuracy: 86.72
[13

[1,    57] loss: 0.54244, train_accuracy: 81.64
[1,    58] loss: 0.52838, train_accuracy: 81.84
[1,    59] loss: 0.51767, train_accuracy: 81.25
[1,    60] loss: 0.44967, train_accuracy: 86.72
[1,    61] loss: 0.59022, train_accuracy: 78.91
[1,    62] loss: 0.52758, train_accuracy: 81.25
[1,    63] loss: 0.50904, train_accuracy: 82.42
[1,    64] loss: 0.50788, train_accuracy: 82.42
[1,    65] loss: 0.45437, train_accuracy: 84.18
[1,    66] loss: 0.57891, train_accuracy: 78.91
[1,    67] loss: 0.49972, train_accuracy: 82.03
[1,    68] loss: 0.51997, train_accuracy: 81.45
[1,    69] loss: 0.51355, train_accuracy: 83.01
[1,    70] loss: 0.48535, train_accuracy: 81.45
[1,    71] loss: 0.47978, train_accuracy: 83.59
[1,    72] loss: 0.48323, train_accuracy: 83.59
[1,    73] loss: 0.50755, train_accuracy: 82.81
[1,    74] loss: 0.52626, train_accuracy: 81.84
[1,    75] loss: 0.54141, train_accuracy: 80.47
[1,    76] loss: 0.47733, train_accuracy: 83.40
[1,    77] loss: 0.51649, train_accuracy

[3,    28] loss: 0.34844, train_accuracy: 88.28
[3,    29] loss: 0.41091, train_accuracy: 86.72
[3,    30] loss: 0.39005, train_accuracy: 86.91
[3,    31] loss: 0.41851, train_accuracy: 86.13
[3,    32] loss: 0.36841, train_accuracy: 88.67
[3,    33] loss: 0.43696, train_accuracy: 86.72
[3,    34] loss: 0.43369, train_accuracy: 85.74
[3,    35] loss: 0.38775, train_accuracy: 87.89
[3,    36] loss: 0.42316, train_accuracy: 86.13
[3,    37] loss: 0.38287, train_accuracy: 87.89
[3,    38] loss: 0.34788, train_accuracy: 90.04
[3,    39] loss: 0.45893, train_accuracy: 85.16
[3,    40] loss: 0.39328, train_accuracy: 88.09
[3,    41] loss: 0.43631, train_accuracy: 85.35
[3,    42] loss: 0.42141, train_accuracy: 85.35
[3,    43] loss: 0.44272, train_accuracy: 84.77
[3,    44] loss: 0.45220, train_accuracy: 85.74
[3,    45] loss: 0.39355, train_accuracy: 86.52
[3,    46] loss: 0.49033, train_accuracy: 84.18
[3,    47] loss: 0.39726, train_accuracy: 85.94
[3,    48] loss: 0.42591, train_accuracy

duration: 10 s - train loss: 0.37415 - train accuracy: 87.70 - validation loss: 1.31 - validation accuracy: 62.45 
[5,     1] loss: 0.29866, train_accuracy: 91.60
[5,     2] loss: 0.30170, train_accuracy: 91.02
[5,     3] loss: 0.32866, train_accuracy: 89.65
[5,     4] loss: 0.33305, train_accuracy: 90.62
[5,     5] loss: 0.32744, train_accuracy: 89.65
[5,     6] loss: 0.34357, train_accuracy: 89.65
[5,     7] loss: 0.32735, train_accuracy: 88.48
[5,     8] loss: 0.36896, train_accuracy: 88.67
[5,     9] loss: 0.36271, train_accuracy: 89.45
[5,    10] loss: 0.31707, train_accuracy: 90.04
[5,    11] loss: 0.28586, train_accuracy: 92.77
[5,    12] loss: 0.34526, train_accuracy: 86.52
[5,    13] loss: 0.29254, train_accuracy: 90.23
[5,    14] loss: 0.33232, train_accuracy: 89.06
[5,    15] loss: 0.30549, train_accuracy: 91.02
[5,    16] loss: 0.30786, train_accuracy: 90.82
[5,    17] loss: 0.29184, train_accuracy: 90.62
[5,    18] loss: 0.37565, train_accuracy: 87.70
[5,    19] loss: 0.37

[6,    70] loss: 0.33129, train_accuracy: 88.28
[6,    71] loss: 0.33192, train_accuracy: 88.48
[6,    72] loss: 0.34187, train_accuracy: 88.28
[6,    73] loss: 0.33501, train_accuracy: 90.62
[6,    74] loss: 0.31488, train_accuracy: 89.65
[6,    75] loss: 0.31172, train_accuracy: 88.87
[6,    76] loss: 0.30086, train_accuracy: 89.84
[6,    77] loss: 0.34784, train_accuracy: 88.48
[6,    78] loss: 0.34502, train_accuracy: 89.06
[6,    79] loss: 0.27983, train_accuracy: 92.58
[6,    80] loss: 0.36256, train_accuracy: 86.91
[6,    81] loss: 0.35338, train_accuracy: 88.87
[6,    82] loss: 0.34955, train_accuracy: 88.87
[6,    83] loss: 0.32041, train_accuracy: 87.89
[6,    84] loss: 0.37495, train_accuracy: 88.09
[6,    85] loss: 0.30066, train_accuracy: 90.04
[6,    86] loss: 0.34351, train_accuracy: 86.72
[6,    87] loss: 0.43028, train_accuracy: 86.13
[6,    88] loss: 0.28815, train_accuracy: 90.43
[6,    89] loss: 0.38065, train_accuracy: 87.11
[6,    90] loss: 0.33570, train_accuracy

[1,    39] loss: 0.83889, train_accuracy: 69.14
[1,    40] loss: 0.91889, train_accuracy: 70.70
[1,    41] loss: 0.86782, train_accuracy: 69.53
[1,    42] loss: 0.91700, train_accuracy: 66.80
[1,    43] loss: 0.87240, train_accuracy: 67.97
[1,    44] loss: 0.85234, train_accuracy: 69.73
[1,    45] loss: 0.88553, train_accuracy: 68.16
[1,    46] loss: 0.91718, train_accuracy: 68.95
[1,    47] loss: 0.97437, train_accuracy: 65.82
[1,    48] loss: 0.89719, train_accuracy: 67.97
[1,    49] loss: 0.91656, train_accuracy: 67.77
[1,    50] loss: 0.74511, train_accuracy: 74.41
[1,    51] loss: 0.91893, train_accuracy: 67.38
[1,    52] loss: 0.93508, train_accuracy: 67.97
[1,    53] loss: 0.77706, train_accuracy: 70.90
[1,    54] loss: 0.82157, train_accuracy: 71.09
[1,    55] loss: 0.87828, train_accuracy: 71.29
[1,    56] loss: 0.88995, train_accuracy: 68.36
[1,    57] loss: 0.77035, train_accuracy: 71.68
[1,    58] loss: 0.86377, train_accuracy: 69.92
[1,    59] loss: 0.81747, train_accuracy

[3,    10] loss: 0.61302, train_accuracy: 77.15
[3,    11] loss: 0.59912, train_accuracy: 79.30
[3,    12] loss: 0.57429, train_accuracy: 80.08
[3,    13] loss: 0.64461, train_accuracy: 76.95
[3,    14] loss: 0.54358, train_accuracy: 80.86
[3,    15] loss: 0.57310, train_accuracy: 79.30
[3,    16] loss: 0.59006, train_accuracy: 78.71
[3,    17] loss: 0.64195, train_accuracy: 75.59
[3,    18] loss: 0.54751, train_accuracy: 83.20
[3,    19] loss: 0.56973, train_accuracy: 79.88
[3,    20] loss: 0.55320, train_accuracy: 78.52
[3,    21] loss: 0.57215, train_accuracy: 79.49
[3,    22] loss: 0.70220, train_accuracy: 75.39
[3,    23] loss: 0.61950, train_accuracy: 75.98
[3,    24] loss: 0.67682, train_accuracy: 75.00
[3,    25] loss: 0.61033, train_accuracy: 78.52
[3,    26] loss: 0.62644, train_accuracy: 77.34
[3,    27] loss: 0.62422, train_accuracy: 78.32
[3,    28] loss: 0.49646, train_accuracy: 82.23
[3,    29] loss: 0.56745, train_accuracy: 78.52
[3,    30] loss: 0.58011, train_accuracy

[4,    82] loss: 0.57556, train_accuracy: 82.42
[4,    83] loss: 0.62533, train_accuracy: 78.52
[4,    84] loss: 0.56377, train_accuracy: 78.71
[4,    85] loss: 0.59354, train_accuracy: 77.93
[4,    86] loss: 0.55756, train_accuracy: 79.69
[4,    87] loss: 0.49982, train_accuracy: 80.66
[4,    88] loss: 0.56076, train_accuracy: 80.08
[4,    89] loss: 0.55987, train_accuracy: 80.08
[4,    90] loss: 0.63704, train_accuracy: 76.95
[4,    91] loss: 0.52661, train_accuracy: 79.88
[4,    92] loss: 0.54289, train_accuracy: 79.88
[4,    93] loss: 0.53883, train_accuracy: 81.25
[4,    94] loss: 0.55844, train_accuracy: 77.93
[4,    95] loss: 0.55982, train_accuracy: 80.08
[4,    96] loss: 0.68819, train_accuracy: 75.98
[4,    97] loss: 0.51579, train_accuracy: 79.88
[4,    98] loss: 0.59802, train_accuracy: 79.46
duration: 18 s - train loss: 0.55962 - train accuracy: 79.83 - validation loss: 1.24 - validation accuracy: 61.89 
[5,     1] loss: 0.49228, train_accuracy: 81.84
[5,     2] loss: 0.48

[6,    52] loss: 0.51771, train_accuracy: 80.47
[6,    53] loss: 0.52832, train_accuracy: 81.45
[6,    54] loss: 0.51090, train_accuracy: 82.03
[6,    55] loss: 0.51566, train_accuracy: 80.66
[6,    56] loss: 0.48118, train_accuracy: 82.81
[6,    57] loss: 0.41287, train_accuracy: 85.35
[6,    58] loss: 0.48945, train_accuracy: 83.79
[6,    59] loss: 0.50180, train_accuracy: 81.25
[6,    60] loss: 0.53243, train_accuracy: 80.86
[6,    61] loss: 0.43496, train_accuracy: 85.94
[6,    62] loss: 0.51493, train_accuracy: 82.03
[6,    63] loss: 0.52344, train_accuracy: 80.86
[6,    64] loss: 0.50902, train_accuracy: 80.27
[6,    65] loss: 0.58173, train_accuracy: 78.32
[6,    66] loss: 0.51042, train_accuracy: 81.45
[6,    67] loss: 0.50134, train_accuracy: 84.57
[6,    68] loss: 0.47506, train_accuracy: 83.79
[6,    69] loss: 0.55803, train_accuracy: 79.10
[6,    70] loss: 0.44004, train_accuracy: 86.72
[6,    71] loss: 0.50914, train_accuracy: 83.59
[6,    72] loss: 0.49863, train_accuracy

[8,    22] loss: 0.43440, train_accuracy: 85.16
[8,    23] loss: 0.46771, train_accuracy: 83.20
[8,    24] loss: 0.45225, train_accuracy: 84.18
[8,    25] loss: 0.43926, train_accuracy: 84.18
[8,    26] loss: 0.47653, train_accuracy: 81.84
[8,    27] loss: 0.48711, train_accuracy: 82.23
[8,    28] loss: 0.46429, train_accuracy: 83.79
[8,    29] loss: 0.45167, train_accuracy: 83.98
[8,    30] loss: 0.44803, train_accuracy: 83.20
[8,    31] loss: 0.42739, train_accuracy: 84.96
[8,    32] loss: 0.46523, train_accuracy: 82.81
[8,    33] loss: 0.51405, train_accuracy: 81.84
[8,    34] loss: 0.42826, train_accuracy: 85.35
[8,    35] loss: 0.44733, train_accuracy: 83.98
[8,    36] loss: 0.49444, train_accuracy: 82.42
[8,    37] loss: 0.50036, train_accuracy: 80.86
[8,    38] loss: 0.40928, train_accuracy: 84.96
[8,    39] loss: 0.42524, train_accuracy: 83.98
[8,    40] loss: 0.43900, train_accuracy: 83.40
[8,    41] loss: 0.40250, train_accuracy: 85.35
[8,    42] loss: 0.43418, train_accuracy

[1,    90] loss: 1.04896, train_accuracy: 62.70
[1,    91] loss: 1.06661, train_accuracy: 64.45
[1,    92] loss: 1.05366, train_accuracy: 61.91
[1,    93] loss: 1.14454, train_accuracy: 60.35
[1,    94] loss: 1.05772, train_accuracy: 62.30
[1,    95] loss: 1.01401, train_accuracy: 63.09
[1,    96] loss: 1.00793, train_accuracy: 64.06
[1,    97] loss: 1.03464, train_accuracy: 62.89
[1,    98] loss: 1.12978, train_accuracy: 59.82
duration: 25 s - train loss: 1.44987 - train accuracy: 54.08 - validation loss: 1.32 - validation accuracy: 56.44 
[2,     1] loss: 1.10455, train_accuracy: 62.11
[2,     2] loss: 1.10879, train_accuracy: 60.35
[2,     3] loss: 1.13911, train_accuracy: 60.94
[2,     4] loss: 1.02102, train_accuracy: 64.06
[2,     5] loss: 1.00061, train_accuracy: 65.04
[2,     6] loss: 0.99954, train_accuracy: 67.19
[2,     7] loss: 1.01006, train_accuracy: 63.09
[2,     8] loss: 0.89291, train_accuracy: 68.36
[2,     9] loss: 0.99123, train_accuracy: 63.28
[2,    10] loss: 1.04

[3,    60] loss: 0.97827, train_accuracy: 66.60
[3,    61] loss: 0.86630, train_accuracy: 69.34
[3,    62] loss: 0.85550, train_accuracy: 70.31
[3,    63] loss: 0.90250, train_accuracy: 67.38
[3,    64] loss: 0.89560, train_accuracy: 69.34
[3,    65] loss: 0.92977, train_accuracy: 66.80
[3,    66] loss: 0.85045, train_accuracy: 67.38
[3,    67] loss: 0.86874, train_accuracy: 69.14
[3,    68] loss: 0.91934, train_accuracy: 68.75
[3,    69] loss: 0.84534, train_accuracy: 68.16
[3,    70] loss: 0.89575, train_accuracy: 66.99
[3,    71] loss: 0.84589, train_accuracy: 68.95
[3,    72] loss: 0.87957, train_accuracy: 67.97
[3,    73] loss: 0.92305, train_accuracy: 66.41
[3,    74] loss: 0.90635, train_accuracy: 67.58
[3,    75] loss: 0.96797, train_accuracy: 67.38
[3,    76] loss: 0.89432, train_accuracy: 67.38
[3,    77] loss: 0.90415, train_accuracy: 67.77
[3,    78] loss: 0.81596, train_accuracy: 70.31
[3,    79] loss: 0.82914, train_accuracy: 69.14
[3,    80] loss: 0.82069, train_accuracy

[5,    30] loss: 0.73639, train_accuracy: 74.02
[5,    31] loss: 0.72091, train_accuracy: 73.44
[5,    32] loss: 0.78724, train_accuracy: 72.07
[5,    33] loss: 0.80688, train_accuracy: 70.51
[5,    34] loss: 0.84538, train_accuracy: 68.95
[5,    35] loss: 0.88075, train_accuracy: 68.75
[5,    36] loss: 0.74115, train_accuracy: 73.83
[5,    37] loss: 0.83622, train_accuracy: 70.31
[5,    38] loss: 0.79119, train_accuracy: 69.92
[5,    39] loss: 0.78460, train_accuracy: 71.48
[5,    40] loss: 0.92799, train_accuracy: 67.77
[5,    41] loss: 0.92350, train_accuracy: 69.14
[5,    42] loss: 0.73309, train_accuracy: 74.22
[5,    43] loss: 0.80770, train_accuracy: 72.85
[5,    44] loss: 0.82810, train_accuracy: 70.31
[5,    45] loss: 0.80240, train_accuracy: 71.29
[5,    46] loss: 0.76208, train_accuracy: 72.27
[5,    47] loss: 0.85305, train_accuracy: 70.70
[5,    48] loss: 0.81299, train_accuracy: 71.68
[5,    49] loss: 0.77116, train_accuracy: 73.63
[5,    50] loss: 0.70740, train_accuracy

[7,     1] loss: 0.69217, train_accuracy: 75.39
[7,     2] loss: 0.65587, train_accuracy: 76.17
[7,     3] loss: 0.69997, train_accuracy: 77.54
[7,     4] loss: 0.70057, train_accuracy: 73.44
[7,     5] loss: 0.67384, train_accuracy: 76.76
[7,     6] loss: 0.71741, train_accuracy: 73.44
[7,     7] loss: 0.72575, train_accuracy: 72.46
[7,     8] loss: 0.71067, train_accuracy: 74.02
[7,     9] loss: 0.75863, train_accuracy: 73.24
[7,    10] loss: 0.73488, train_accuracy: 76.17
[7,    11] loss: 0.69517, train_accuracy: 74.41
[7,    12] loss: 0.76401, train_accuracy: 71.29
[7,    13] loss: 0.68712, train_accuracy: 76.95
[7,    14] loss: 0.67286, train_accuracy: 74.41
[7,    15] loss: 0.68215, train_accuracy: 75.98
[7,    16] loss: 0.78544, train_accuracy: 72.46
[7,    17] loss: 0.74158, train_accuracy: 73.05
[7,    18] loss: 0.74576, train_accuracy: 73.24
[7,    19] loss: 0.77822, train_accuracy: 73.63
[7,    20] loss: 0.77932, train_accuracy: 73.05
[7,    21] loss: 0.69082, train_accuracy

[8,    72] loss: 0.73373, train_accuracy: 74.22
[8,    73] loss: 0.76470, train_accuracy: 72.07
[8,    74] loss: 0.72519, train_accuracy: 73.63
[8,    75] loss: 0.82390, train_accuracy: 70.12
[8,    76] loss: 0.72453, train_accuracy: 74.61
[8,    77] loss: 0.75417, train_accuracy: 72.07
[8,    78] loss: 0.77655, train_accuracy: 70.31
[8,    79] loss: 0.69450, train_accuracy: 75.20
[8,    80] loss: 0.77158, train_accuracy: 74.02
[8,    81] loss: 0.74840, train_accuracy: 71.68
[8,    82] loss: 0.73375, train_accuracy: 72.46
[8,    83] loss: 0.69908, train_accuracy: 74.80
[8,    84] loss: 0.68380, train_accuracy: 76.37
[8,    85] loss: 0.74404, train_accuracy: 73.63
[8,    86] loss: 0.71724, train_accuracy: 73.24
[8,    87] loss: 0.84948, train_accuracy: 69.73
[8,    88] loss: 0.74147, train_accuracy: 73.24
[8,    89] loss: 0.80846, train_accuracy: 70.70
[8,    90] loss: 0.79332, train_accuracy: 74.02
[8,    91] loss: 0.69589, train_accuracy: 76.95
[8,    92] loss: 0.83032, train_accuracy

[10,    43] loss: 0.76364, train_accuracy: 73.05
[10,    44] loss: 0.70324, train_accuracy: 75.20
[10,    45] loss: 0.72867, train_accuracy: 71.48
[10,    46] loss: 0.74477, train_accuracy: 72.07
[10,    47] loss: 0.71067, train_accuracy: 73.63
[10,    48] loss: 0.78564, train_accuracy: 73.83
[10,    49] loss: 0.70896, train_accuracy: 74.61
[10,    50] loss: 0.69714, train_accuracy: 76.17
[10,    51] loss: 0.72832, train_accuracy: 74.61
[10,    52] loss: 0.61575, train_accuracy: 77.73
[10,    53] loss: 0.62166, train_accuracy: 79.88
[10,    54] loss: 0.75750, train_accuracy: 72.46
[10,    55] loss: 0.81610, train_accuracy: 71.29
[10,    56] loss: 0.71683, train_accuracy: 74.61
[10,    57] loss: 0.79125, train_accuracy: 70.12
[10,    58] loss: 0.68939, train_accuracy: 73.24
[10,    59] loss: 0.70427, train_accuracy: 75.78
[10,    60] loss: 0.79259, train_accuracy: 72.85
[10,    61] loss: 0.72853, train_accuracy: 74.80
[10,    62] loss: 0.67217, train_accuracy: 74.61
[10,    63] loss: 0.

[12,    10] loss: 0.72874, train_accuracy: 74.22
[12,    11] loss: 0.65915, train_accuracy: 77.15
[12,    12] loss: 0.60807, train_accuracy: 79.30
[12,    13] loss: 0.76818, train_accuracy: 73.05
[12,    14] loss: 0.65425, train_accuracy: 77.93
[12,    15] loss: 0.76413, train_accuracy: 69.92
[12,    16] loss: 0.73690, train_accuracy: 73.05
[12,    17] loss: 0.68020, train_accuracy: 75.59
[12,    18] loss: 0.73779, train_accuracy: 71.48
[12,    19] loss: 0.69559, train_accuracy: 75.39
[12,    20] loss: 0.66989, train_accuracy: 78.52
[12,    21] loss: 0.59834, train_accuracy: 80.66
[12,    22] loss: 0.62350, train_accuracy: 76.76
[12,    23] loss: 0.67873, train_accuracy: 76.95
[12,    24] loss: 0.75380, train_accuracy: 73.83
[12,    25] loss: 0.72728, train_accuracy: 76.56
[12,    26] loss: 0.71045, train_accuracy: 74.22
[12,    27] loss: 0.69831, train_accuracy: 74.61
[12,    28] loss: 0.68844, train_accuracy: 73.63
[12,    29] loss: 0.65443, train_accuracy: 78.71
[12,    30] loss: 0.

[1,    76] loss: 1.39148, train_accuracy: 48.83
[1,    77] loss: 1.34071, train_accuracy: 53.91
[1,    78] loss: 1.18982, train_accuracy: 58.98
[1,    79] loss: 1.36360, train_accuracy: 50.39
[1,    80] loss: 1.37938, train_accuracy: 49.22
[1,    81] loss: 1.38103, train_accuracy: 49.61
[1,    82] loss: 1.31455, train_accuracy: 53.71
[1,    83] loss: 1.34620, train_accuracy: 53.71
[1,    84] loss: 1.22726, train_accuracy: 57.03
[1,    85] loss: 1.24067, train_accuracy: 56.45
[1,    86] loss: 1.32561, train_accuracy: 55.47
[1,    87] loss: 1.32909, train_accuracy: 52.73
[1,    88] loss: 1.26578, train_accuracy: 53.71
[1,    89] loss: 1.23530, train_accuracy: 57.62
[1,    90] loss: 1.36016, train_accuracy: 51.17
[1,    91] loss: 1.18888, train_accuracy: 57.03
[1,    92] loss: 1.30612, train_accuracy: 54.49
[1,    93] loss: 1.25687, train_accuracy: 55.08
[1,    94] loss: 1.30353, train_accuracy: 52.93
[1,    95] loss: 1.23576, train_accuracy: 54.49
[1,    96] loss: 1.26739, train_accuracy

[3,    47] loss: 1.14461, train_accuracy: 59.18
[3,    48] loss: 1.06189, train_accuracy: 61.72
[3,    49] loss: 0.95389, train_accuracy: 67.38
[3,    50] loss: 1.11152, train_accuracy: 60.35
[3,    51] loss: 0.99595, train_accuracy: 66.02
[3,    52] loss: 1.00123, train_accuracy: 65.04
[3,    53] loss: 1.00287, train_accuracy: 66.99
[3,    54] loss: 1.12640, train_accuracy: 60.35
[3,    55] loss: 1.05957, train_accuracy: 65.23
[3,    56] loss: 1.04616, train_accuracy: 63.28
[3,    57] loss: 1.11242, train_accuracy: 58.98
[3,    58] loss: 1.03533, train_accuracy: 62.50
[3,    59] loss: 1.00422, train_accuracy: 64.26
[3,    60] loss: 1.04351, train_accuracy: 59.38
[3,    61] loss: 0.98319, train_accuracy: 65.23
[3,    62] loss: 1.07155, train_accuracy: 60.55
[3,    63] loss: 1.03165, train_accuracy: 62.11
[3,    64] loss: 1.04951, train_accuracy: 63.67
[3,    65] loss: 0.99023, train_accuracy: 66.21
[3,    66] loss: 1.02670, train_accuracy: 64.26
[3,    67] loss: 1.05965, train_accuracy

[5,    17] loss: 1.03460, train_accuracy: 62.89
[5,    18] loss: 0.97510, train_accuracy: 62.89
[5,    19] loss: 1.06685, train_accuracy: 60.74
[5,    20] loss: 0.93591, train_accuracy: 68.16
[5,    21] loss: 1.03919, train_accuracy: 62.11
[5,    22] loss: 0.92816, train_accuracy: 66.80
[5,    23] loss: 0.93696, train_accuracy: 67.77
[5,    24] loss: 0.88172, train_accuracy: 67.38
[5,    25] loss: 0.92972, train_accuracy: 67.19
[5,    26] loss: 0.94978, train_accuracy: 66.60
[5,    27] loss: 1.01826, train_accuracy: 63.28
[5,    28] loss: 0.91985, train_accuracy: 66.99
[5,    29] loss: 0.93114, train_accuracy: 64.06
[5,    30] loss: 1.01579, train_accuracy: 66.02
[5,    31] loss: 1.06921, train_accuracy: 63.48
[5,    32] loss: 0.97876, train_accuracy: 67.38
[5,    33] loss: 0.99058, train_accuracy: 63.87
[5,    34] loss: 0.91236, train_accuracy: 67.77
[5,    35] loss: 0.94704, train_accuracy: 67.19
[5,    36] loss: 1.11467, train_accuracy: 60.94
[5,    37] loss: 1.00349, train_accuracy

[6,    88] loss: 1.00316, train_accuracy: 61.13
[6,    89] loss: 1.01950, train_accuracy: 63.48
[6,    90] loss: 0.96794, train_accuracy: 64.65
[6,    91] loss: 0.97899, train_accuracy: 66.41
[6,    92] loss: 0.97322, train_accuracy: 65.62
[6,    93] loss: 0.96537, train_accuracy: 67.38
[6,    94] loss: 0.95883, train_accuracy: 65.43
[6,    95] loss: 0.90156, train_accuracy: 66.60
[6,    96] loss: 0.88368, train_accuracy: 67.19
[6,    97] loss: 1.00896, train_accuracy: 63.09
[6,    98] loss: 0.98936, train_accuracy: 67.86
duration: 25 s - train loss: 0.95242 - train accuracy: 66.00 - validation loss: 1.11 - validation accuracy: 61.02 
[7,     1] loss: 0.93769, train_accuracy: 67.19
[7,     2] loss: 0.89039, train_accuracy: 66.21
[7,     3] loss: 0.93091, train_accuracy: 68.55
[7,     4] loss: 0.86906, train_accuracy: 70.51
[7,     5] loss: 0.92979, train_accuracy: 66.60
[7,     6] loss: 0.94913, train_accuracy: 64.84
[7,     7] loss: 0.93346, train_accuracy: 66.02
[7,     8] loss: 0.94

[8,    58] loss: 0.97336, train_accuracy: 64.65
[8,    59] loss: 0.95266, train_accuracy: 64.26
[8,    60] loss: 0.96148, train_accuracy: 66.41
[8,    61] loss: 0.85963, train_accuracy: 67.77
[8,    62] loss: 0.89474, train_accuracy: 67.58
[8,    63] loss: 0.91023, train_accuracy: 68.95
[8,    64] loss: 0.94161, train_accuracy: 66.80
[8,    65] loss: 0.92817, train_accuracy: 66.21
[8,    66] loss: 0.95967, train_accuracy: 65.82
[8,    67] loss: 0.86397, train_accuracy: 69.14
[8,    68] loss: 1.07253, train_accuracy: 60.74
[8,    69] loss: 0.93960, train_accuracy: 68.55
[8,    70] loss: 0.98555, train_accuracy: 65.82
[8,    71] loss: 0.92618, train_accuracy: 65.62
[8,    72] loss: 0.90192, train_accuracy: 66.99
[8,    73] loss: 0.87419, train_accuracy: 68.95
[8,    74] loss: 0.87431, train_accuracy: 67.97
[8,    75] loss: 0.91386, train_accuracy: 68.95
[8,    76] loss: 0.95306, train_accuracy: 65.23
[8,    77] loss: 0.95822, train_accuracy: 67.97
[8,    78] loss: 0.91742, train_accuracy

[10,    28] loss: 0.87959, train_accuracy: 69.14
[10,    29] loss: 0.94008, train_accuracy: 64.65
[10,    30] loss: 0.87558, train_accuracy: 68.36
[10,    31] loss: 0.94132, train_accuracy: 67.19
[10,    32] loss: 0.92189, train_accuracy: 67.97
[10,    33] loss: 0.86316, train_accuracy: 67.77
[10,    34] loss: 0.86326, train_accuracy: 71.88
[10,    35] loss: 0.88068, train_accuracy: 69.34
[10,    36] loss: 1.02186, train_accuracy: 65.04
[10,    37] loss: 0.95729, train_accuracy: 66.21
[10,    38] loss: 0.92735, train_accuracy: 67.19
[10,    39] loss: 0.88213, train_accuracy: 66.80
[10,    40] loss: 0.92978, train_accuracy: 67.77
[10,    41] loss: 0.90400, train_accuracy: 68.55
[10,    42] loss: 0.93207, train_accuracy: 65.23
[10,    43] loss: 0.89479, train_accuracy: 67.58
[10,    44] loss: 0.94229, train_accuracy: 65.43
[10,    45] loss: 0.88829, train_accuracy: 67.97
[10,    46] loss: 0.88059, train_accuracy: 67.58
[10,    47] loss: 0.90159, train_accuracy: 65.23
[10,    48] loss: 0.

[11,    95] loss: 0.84442, train_accuracy: 71.09
[11,    96] loss: 0.89900, train_accuracy: 66.80
[11,    97] loss: 0.88640, train_accuracy: 69.34
[11,    98] loss: 0.95221, train_accuracy: 66.07
duration: 22 s - train loss: 0.88958 - train accuracy: 68.40 - validation loss: 1.09 - validation accuracy: 61.96 
[12,     1] loss: 0.86257, train_accuracy: 69.73
[12,     2] loss: 0.82130, train_accuracy: 71.68
[12,     3] loss: 0.80004, train_accuracy: 73.05
[12,     4] loss: 0.83781, train_accuracy: 69.92
[12,     5] loss: 0.89260, train_accuracy: 69.92
[12,     6] loss: 0.89911, train_accuracy: 66.80
[12,     7] loss: 0.84054, train_accuracy: 67.77
[12,     8] loss: 0.84377, train_accuracy: 70.12
[12,     9] loss: 0.93444, train_accuracy: 67.97
[12,    10] loss: 0.80241, train_accuracy: 73.83
[12,    11] loss: 0.84102, train_accuracy: 67.77
[12,    12] loss: 0.90731, train_accuracy: 68.75
[12,    13] loss: 0.83507, train_accuracy: 70.12
[12,    14] loss: 0.86195, train_accuracy: 69.53
[12

[13,    62] loss: 0.86360, train_accuracy: 67.19
[13,    63] loss: 0.79228, train_accuracy: 73.24
[13,    64] loss: 0.87452, train_accuracy: 68.36
[13,    65] loss: 0.95952, train_accuracy: 66.02
[13,    66] loss: 0.86185, train_accuracy: 69.14
[13,    67] loss: 0.92095, train_accuracy: 66.60
[13,    68] loss: 0.87238, train_accuracy: 70.12
[13,    69] loss: 0.87082, train_accuracy: 69.34
[13,    70] loss: 0.93108, train_accuracy: 65.43
[13,    71] loss: 0.85988, train_accuracy: 67.38
[13,    72] loss: 0.85587, train_accuracy: 69.73
[13,    73] loss: 0.85232, train_accuracy: 69.73
[13,    74] loss: 0.84200, train_accuracy: 69.34
[13,    75] loss: 0.86081, train_accuracy: 70.12
[13,    76] loss: 0.88275, train_accuracy: 68.55
[13,    77] loss: 0.95118, train_accuracy: 64.06
[13,    78] loss: 0.85485, train_accuracy: 70.31
[13,    79] loss: 0.89480, train_accuracy: 67.19
[13,    80] loss: 0.78402, train_accuracy: 72.07
[13,    81] loss: 0.84181, train_accuracy: 70.12
[13,    82] loss: 0.

[15,    29] loss: 0.86663, train_accuracy: 70.70
[15,    30] loss: 0.89905, train_accuracy: 68.95
[15,    31] loss: 0.76869, train_accuracy: 73.24
[15,    32] loss: 0.84930, train_accuracy: 71.29
[15,    33] loss: 0.89917, train_accuracy: 71.48
[15,    34] loss: 0.88794, train_accuracy: 68.75
[15,    35] loss: 0.92486, train_accuracy: 67.77
[15,    36] loss: 0.93670, train_accuracy: 68.55
[15,    37] loss: 0.80955, train_accuracy: 72.27
[15,    38] loss: 0.87863, train_accuracy: 68.55
[15,    39] loss: 0.88210, train_accuracy: 65.23
[15,    40] loss: 0.83506, train_accuracy: 72.27
[15,    41] loss: 0.87953, train_accuracy: 70.31
[15,    42] loss: 0.87265, train_accuracy: 69.92
[15,    43] loss: 0.78708, train_accuracy: 72.66
[15,    44] loss: 0.85674, train_accuracy: 70.12
[15,    45] loss: 0.85230, train_accuracy: 71.09
[15,    46] loss: 0.85048, train_accuracy: 70.51
[15,    47] loss: 0.84102, train_accuracy: 69.34
[15,    48] loss: 0.87132, train_accuracy: 69.73
[15,    49] loss: 0.

[16,    96] loss: 0.84176, train_accuracy: 69.92
[16,    97] loss: 0.92499, train_accuracy: 69.14
[16,    98] loss: 0.83059, train_accuracy: 70.54
duration: 24 s - train loss: 0.86072 - train accuracy: 69.54 - validation loss: 1.08 - validation accuracy: 62.31 
[17,     1] loss: 0.82085, train_accuracy: 68.55
[17,     2] loss: 0.76267, train_accuracy: 73.05
[17,     3] loss: 0.77627, train_accuracy: 74.02
[17,     4] loss: 0.79598, train_accuracy: 72.66
[17,     5] loss: 0.81698, train_accuracy: 71.29
[17,     6] loss: 0.85135, train_accuracy: 71.09
[17,     7] loss: 0.89824, train_accuracy: 67.38
[17,     8] loss: 0.81004, train_accuracy: 72.27
[17,     9] loss: 0.76715, train_accuracy: 72.46
[17,    10] loss: 0.79472, train_accuracy: 73.63
[17,    11] loss: 0.79862, train_accuracy: 72.27
[17,    12] loss: 0.94414, train_accuracy: 66.02
[17,    13] loss: 0.83574, train_accuracy: 70.31
[17,    14] loss: 0.82137, train_accuracy: 71.29
[17,    15] loss: 0.82732, train_accuracy: 70.90
[17

[18,    63] loss: 0.81799, train_accuracy: 71.29
[18,    64] loss: 0.84422, train_accuracy: 67.19
[18,    65] loss: 0.86432, train_accuracy: 69.92
[18,    66] loss: 0.79836, train_accuracy: 71.29
[18,    67] loss: 0.85918, train_accuracy: 70.70
[18,    68] loss: 0.87735, train_accuracy: 70.12
[18,    69] loss: 0.81942, train_accuracy: 70.31
[18,    70] loss: 0.92133, train_accuracy: 69.53
[18,    71] loss: 0.82966, train_accuracy: 71.09
[18,    72] loss: 0.85452, train_accuracy: 69.73
[18,    73] loss: 0.88909, train_accuracy: 67.19
[18,    74] loss: 0.88987, train_accuracy: 71.09
[18,    75] loss: 0.92115, train_accuracy: 66.80
[18,    76] loss: 0.81115, train_accuracy: 70.12
[18,    77] loss: 0.87563, train_accuracy: 70.90
[18,    78] loss: 0.82587, train_accuracy: 70.70
[18,    79] loss: 0.85220, train_accuracy: 69.53
[18,    80] loss: 0.79867, train_accuracy: 71.48
[18,    81] loss: 0.92477, train_accuracy: 66.99
[18,    82] loss: 0.81414, train_accuracy: 70.12
[18,    83] loss: 0.

[20,    30] loss: 0.82383, train_accuracy: 72.27
[20,    31] loss: 0.79074, train_accuracy: 71.09
[20,    32] loss: 0.79394, train_accuracy: 70.12
[20,    33] loss: 0.86947, train_accuracy: 69.34
[20,    34] loss: 0.86101, train_accuracy: 69.53
[20,    35] loss: 0.81953, train_accuracy: 72.85
[20,    36] loss: 0.81717, train_accuracy: 69.53
[20,    37] loss: 0.90287, train_accuracy: 67.38
[20,    38] loss: 0.77697, train_accuracy: 72.66
[20,    39] loss: 0.81861, train_accuracy: 70.12
[20,    40] loss: 0.92206, train_accuracy: 67.58
[20,    41] loss: 0.79143, train_accuracy: 73.83
[20,    42] loss: 0.82477, train_accuracy: 71.09
[20,    43] loss: 0.89041, train_accuracy: 69.53
[20,    44] loss: 0.87090, train_accuracy: 68.36
[20,    45] loss: 0.73865, train_accuracy: 75.00
[20,    46] loss: 0.85140, train_accuracy: 70.70
[20,    47] loss: 0.82297, train_accuracy: 72.85
[20,    48] loss: 0.81319, train_accuracy: 71.09
[20,    49] loss: 0.83570, train_accuracy: 70.31
[20,    50] loss: 0.

[21,    97] loss: 0.85645, train_accuracy: 67.38
[21,    98] loss: 0.81864, train_accuracy: 71.73
duration: 25 s - train loss: 0.84274 - train accuracy: 70.21 - validation loss: 1.08 - validation accuracy: 62.59 
[22,     1] loss: 0.88381, train_accuracy: 67.97
[22,     2] loss: 0.82424, train_accuracy: 70.31
[22,     3] loss: 0.80804, train_accuracy: 71.48
[22,     4] loss: 0.83496, train_accuracy: 70.12
[22,     5] loss: 0.78535, train_accuracy: 70.90
[22,     6] loss: 0.88623, train_accuracy: 67.19
[22,     7] loss: 0.82410, train_accuracy: 71.29
[22,     8] loss: 0.76520, train_accuracy: 72.85
[22,     9] loss: 0.82453, train_accuracy: 69.92
[22,    10] loss: 0.87297, train_accuracy: 69.14
[22,    11] loss: 0.91808, train_accuracy: 68.16
[22,    12] loss: 0.85648, train_accuracy: 68.75
[22,    13] loss: 0.84969, train_accuracy: 70.90
[22,    14] loss: 0.76880, train_accuracy: 73.83
[22,    15] loss: 0.78363, train_accuracy: 70.12
[22,    16] loss: 0.85905, train_accuracy: 71.29
[22

[23,    64] loss: 0.76802, train_accuracy: 73.83
[23,    65] loss: 0.87542, train_accuracy: 68.36
[23,    66] loss: 0.77040, train_accuracy: 72.27
[23,    67] loss: 0.90749, train_accuracy: 66.80
[23,    68] loss: 0.84779, train_accuracy: 69.34
[23,    69] loss: 0.84901, train_accuracy: 69.53
[23,    70] loss: 0.86084, train_accuracy: 68.55
[23,    71] loss: 0.94943, train_accuracy: 65.04
[23,    72] loss: 0.82837, train_accuracy: 70.12
[23,    73] loss: 0.90126, train_accuracy: 68.36
[23,    74] loss: 0.83996, train_accuracy: 71.09
[23,    75] loss: 0.86564, train_accuracy: 70.51
[23,    76] loss: 0.88117, train_accuracy: 70.12
[23,    77] loss: 0.87036, train_accuracy: 69.14
[23,    78] loss: 0.82191, train_accuracy: 72.27
[23,    79] loss: 0.82629, train_accuracy: 71.48
[23,    80] loss: 0.81973, train_accuracy: 70.31
[23,    81] loss: 0.86298, train_accuracy: 67.19
[23,    82] loss: 0.88396, train_accuracy: 70.70
[23,    83] loss: 0.87345, train_accuracy: 68.95
[23,    84] loss: 0.

In [7]:
standard_stats['1']['l2_robustness']

0.55078125

In [14]:
import pandas as pd
df = pd.DataFrame(data=standard_stats).T
df['l2_robustness'] = df['l2_robustness']*10000
df['linf_robustness'] = df['linf_robustness']*10000
df.to_pickle('./results/preliminary-standard-no-augmentation.pkl')
df

Unnamed: 0,l2_robustness,linf_robustness,clean_accuracy
1,62.109372,53.90625,62.07
2,59.570312,52.343747,62.11
4,62.890626,58.007813,62.3
8,65.82031,58.789062,63.78
16,64.843749,56.249998,62.44


In [6]:
fast_stats = run('fast')

identifying layers
[1,     1] loss: 2.95128, train_accuracy: 10.35
[1,    11] loss: 2.32203, train_accuracy: 20.90
[1,    21] loss: 2.07043, train_accuracy: 25.78
[1,    31] loss: 2.01518, train_accuracy: 26.56
[1,    41] loss: 2.03410, train_accuracy: 25.59
[1,    51] loss: 1.96823, train_accuracy: 27.73
[1,    61] loss: 2.02681, train_accuracy: 29.30
[1,    71] loss: 1.99444, train_accuracy: 30.27
[1,    81] loss: 1.97469, train_accuracy: 28.91
[1,    91] loss: 1.94731, train_accuracy: 29.30
duration: 60 s - train loss: 2.02881 - train accuracy: 27.44 - validation loss: 1.65035 - validation accuracy: 40.17 
[2,     1] loss: 1.80543, train_accuracy: 33.79
[2,    11] loss: 1.87866, train_accuracy: 33.98
[2,    21] loss: 1.83539, train_accuracy: 33.59
[2,    31] loss: 1.79033, train_accuracy: 33.59
[2,    41] loss: 1.78200, train_accuracy: 35.35
[2,    51] loss: 1.79811, train_accuracy: 32.62
[2,    61] loss: 1.79824, train_accuracy: 35.55
[2,    71] loss: 1.75369, train_accuracy: 37.11

KeyboardInterrupt: 

In [None]:
free_stats = run('free')

identifying layers
[1,     1] loss: 4.04980, train_accuracy: 4.44
[1,     2] loss: 3.13854, train_accuracy: 5.86
[1,     3] loss: 2.63508, train_accuracy: 10.21
[1,     4] loss: 2.44655, train_accuracy: 15.32
[1,     5] loss: 2.31197, train_accuracy: 18.89
[1,     6] loss: 2.36261, train_accuracy: 16.88
[1,     7] loss: 2.33470, train_accuracy: 19.06
[1,     8] loss: 2.32105, train_accuracy: 18.72
[1,     9] loss: 2.27571, train_accuracy: 18.08
[1,    10] loss: 2.23458, train_accuracy: 20.93
[1,    11] loss: 2.20737, train_accuracy: 20.59
[1,    12] loss: 2.24530, train_accuracy: 20.59
[1,    13] loss: 2.24561, train_accuracy: 17.91
[1,    14] loss: 2.16499, train_accuracy: 22.10
[1,    15] loss: 2.18761, train_accuracy: 21.57
[1,    16] loss: 2.17943, train_accuracy: 19.81
[1,    17] loss: 2.14748, train_accuracy: 21.29
[1,    18] loss: 2.14471, train_accuracy: 21.15
[1,    19] loss: 2.21635, train_accuracy: 20.65
[1,    20] loss: 2.14575, train_accuracy: 20.34
[1,    21] loss: 2.1973

[2,    71] loss: 1.96260, train_accuracy: 29.72
[2,    72] loss: 1.92929, train_accuracy: 29.77
[2,    73] loss: 1.94031, train_accuracy: 28.85
[2,    74] loss: 1.90231, train_accuracy: 28.66
[2,    75] loss: 2.04087, train_accuracy: 23.94
[2,    76] loss: 1.95751, train_accuracy: 28.21
[2,    77] loss: 1.91723, train_accuracy: 27.40
[2,    78] loss: 1.96543, train_accuracy: 28.93
[2,    79] loss: 1.98063, train_accuracy: 24.89
[2,    80] loss: 1.90134, train_accuracy: 28.12
[2,    81] loss: 1.92274, train_accuracy: 28.99
[2,    82] loss: 1.98986, train_accuracy: 27.26
[2,    83] loss: 1.97384, train_accuracy: 26.56
[2,    84] loss: 1.96078, train_accuracy: 27.96
[2,    85] loss: 1.92282, train_accuracy: 28.40
[2,    86] loss: 1.90109, train_accuracy: 28.52
[2,    87] loss: 1.92160, train_accuracy: 30.44
[2,    88] loss: 1.90432, train_accuracy: 31.19
[2,    89] loss: 1.90042, train_accuracy: 29.83
[2,    90] loss: 1.86666, train_accuracy: 32.48
[2,    91] loss: 1.94562, train_accuracy

In [None]:
double_stats = run('fast_double')

In [7]:
images, labels = next(iter(test_loader))
images, labels = images.to(device), labels.to(device)


In [8]:
bb_attack(model, images, labels)

1
2


KeyboardInterrupt: 

In [9]:
pgd_attack(model, images, labels, eps=8/255)

tensor(0.3359, device='cuda:0')

In [10]:
cw_attack(model, images, labels, eps=8/255)

tensor(0.2656, device='cuda:0')

In [13]:
bb_attack(model, images, labels)

NameError: name 'L0BrendelBethgeAttackfoolbox' is not defined

# Test

In [4]:
model.fit_free(train_loader, test_loader, 1, device, number_of_replays=3, eps = 8/255)

[1,     1] loss: 4.13015, train_accuracy: 4.17
[1,     2] loss: 3.66484, train_accuracy: 4.88
[1,     3] loss: 3.12940, train_accuracy: 6.97
[1,     4] loss: 2.90560, train_accuracy: 8.98
[1,     5] loss: 2.72787, train_accuracy: 9.70
[1,     6] loss: 2.53809, train_accuracy: 10.22
[1,     7] loss: 2.40960, train_accuracy: 15.62
[1,     8] loss: 2.43231, train_accuracy: 14.13
[1,     9] loss: 2.44054, train_accuracy: 14.78
[1,    10] loss: 2.36526, train_accuracy: 17.90
[1,    11] loss: 2.37196, train_accuracy: 19.01
[1,    12] loss: 2.28873, train_accuracy: 18.49
[1,    13] loss: 2.31310, train_accuracy: 14.58
[1,    14] loss: 2.30206, train_accuracy: 18.55
[1,    15] loss: 2.27567, train_accuracy: 15.89
[1,    16] loss: 2.21592, train_accuracy: 18.03
[1,    17] loss: 2.19046, train_accuracy: 20.05
[1,    18] loss: 2.15967, train_accuracy: 23.11
[1,    19] loss: 2.23406, train_accuracy: 18.68
[1,    20] loss: 2.25862, train_accuracy: 17.45
[1,    21] loss: 2.12274, train_accuracy: 23.

{'criterion': CrossEntropyLoss(),
 'optimizer': Adam (
 Parameter Group 0
     amsgrad: False
     betas: (0.9, 0.999)
     eps: 1e-08
     lr: 0.001
     weight_decay: 0
 ),
 'hist': 'Not implemented',
 'val_accuracy': 34.82}

In [5]:
model.fit_fast(train_loader, test_loader, 1, device,eps = 8/255)

[1,     1] loss: 1.96565, train_accuracy: 28.91
[1,    11] loss: 1.97713, train_accuracy: 27.93
[1,    21] loss: 1.93998, train_accuracy: 27.34
[1,    31] loss: 1.85866, train_accuracy: 31.05
[1,    41] loss: 1.93476, train_accuracy: 29.88
[1,    51] loss: 1.81361, train_accuracy: 33.79
[1,    61] loss: 1.79394, train_accuracy: 34.96
[1,    71] loss: 1.87157, train_accuracy: 32.42
[1,    81] loss: 1.81604, train_accuracy: 33.98
[1,    91] loss: 1.78834, train_accuracy: 31.84
duration: 255 s - train loss: 1.84690 - train accuracy: 32.50 - validation loss: 1.53757 - validation accuracy: 44.68 
Finished Training


{'criterion': CrossEntropyLoss(),
 'optimizer': Adam (
 Parameter Group 0
     amsgrad: False
     betas: (0.9, 0.999)
     eps: 1e-08
     lr: 0.001
     weight_decay: 0
 ),
 'hist': 'Not implemented',
 'val_accuracy': 44.68}

In [6]:
model.fit_fast_with_double_update(train_loader, test_loader, 1, device,eps = 8/255)

[1,     1] loss: 1.70809, train_accuracy: 39.65
[1,    11] loss: 1.82264, train_accuracy: 33.59
[1,    21] loss: 1.73971, train_accuracy: 32.81
[1,    31] loss: 1.81496, train_accuracy: 35.94
[1,    41] loss: 1.78697, train_accuracy: 31.45
[1,    51] loss: 1.78453, train_accuracy: 33.20
[1,    61] loss: 1.72529, train_accuracy: 35.55
[1,    71] loss: 1.67665, train_accuracy: 38.67
[1,    81] loss: 1.74809, train_accuracy: 35.55
[1,    91] loss: 1.71799, train_accuracy: 38.67
duration: 320 s - train loss: 1.76333 - train accuracy: 35.67 - validation loss: 1.45301 - validation accuracy: 48.06 
Finished Training


{'criterion': CrossEntropyLoss(),
 'optimizer': Adam (
 Parameter Group 0
     amsgrad: False
     betas: (0.9, 0.999)
     eps: 1e-08
     lr: 0.001
     weight_decay: 0
 ),
 'hist': 'Not implemented',
 'val_accuracy': 48.06}

In [None]:
losses = [10, 9, 8,7,6,5,4,3,2,2]
patience = 3
check_early_stopping(losses, patience)

In [17]:
a = 4
b = [5,6,7]
list(filter(lambda x: x<a, b))

[]

In [20]:
b[-1:]

[7]

In [None]:
def check_early_stopping(val_loss_hist, patience):
    filter(lambda x: val_loss_hist[-1:] > x, val_loss_hist[patience:])
    return list(filter(lambda x: val_loss_hist[-1:][0] > x, val_loss_hist[patience:]))