In [1]:
import pandas as pd
import numpy as np

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.utils import make_grid

import math
import random
import json

from PIL import Image, ImageOps, ImageEnhance
import numbers

import torchattacks
from torchattacks import CW
# from CW_Emsemble_Attack import CW as CW_ensemble

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
batch_size_train = 64
batch_size_test = 1000
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=True, download=True,
                             transform=transform),
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=False, download=True,
                             transform=transform),
  batch_size=batch_size_test, shuffle=True)

use_cuda = True
device = torch.device("cuda" if use_cuda else "cpu")

In [3]:
class HNet(nn.Module):    
    def __init__(self):
        super(HNet, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(784, 128)  # 6*6 from image dimension
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)


    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
#         x = F.softmax(x, dim = 1)
        return x     

In [4]:
class NHNet(nn.Module):    
    def __init__(self):
        super(NHNet, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(784, 128)  # 6*6 from image dimension
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)


    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.softmax(x, dim = 1)
        return x   

In [5]:
gamma_exp = []
train_output = []
train_losses = []
train_counter = []
test_losses = []
# test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]

In [6]:
initial_model = HNet()

optimizer = optim.Adam(initial_model.parameters(), lr=0.003)

criterion = nn.CrossEntropyLoss()

# exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

if torch.cuda.is_available():
    initial_model = initial_model.cuda()
    criterion = criterion.cuda()

In [7]:
def train(epoch):
    initial_model.train()
#     exp_lr_scheduler.step()
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        
        optimizer.zero_grad()
        output = initial_model(data)
#         print(output.shape)
#         train_output.append(output)
#         if batch_idx == 937:      
#             train_output.append(output)
#         print("before:",batch_idx,output[0])
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
#         print(optimizer.state_dict())
#         gamma_exp.append(optimizer.state_dict()['exp_avg'])
#         gamma_exp_sq.append(optimizer.state_dict()['exp_avg_sq'])
#         print(gamma[''])
#         print("after:",output[0])
#         train_output.append(output.data.max(1, keepdim=True))
#         if batch_idx == 0:
#             print(output.data.max(1, keepdim=True)[1].shape)
        train_losses.append(loss.item())
        train_counter.append(
                (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))
        torch.save(initial_model.state_dict(), 'C:/Users/cozyn/Desktop/Research/Adversarial-Machine-Learning/results/model.pth')
        torch.save(optimizer.state_dict(), 'C:/Users/cozyn/Desktop/Research/Adversarial-Machine-Learning/results/optimizer.pth')
        if (batch_idx + 1)% 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                100. * (batch_idx + 1) / len(train_loader), loss.item()))
#             train_losses.append(loss.item())
#             train_counter.append(
#                 (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))

In [8]:
def evaluate(data_loader):
    initial_model.eval()
    loss = 0
    correct = 0
    
    with torch.no_grad():
        for data, target in data_loader:
#             data, target = Variable(data, volatile=True), Variable(target)
            if torch.cuda.is_available():
                data = data.cuda()
                target = target.cuda()
        
            output = initial_model(data)
        
            loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
    loss /= len(data_loader.dataset)
    test_losses.append(loss)    
    print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        loss, correct, len(data_loader.dataset),
        100. * correct / len(data_loader.dataset)))

In [9]:
n_epochs = 65

for epoch in range(n_epochs):
    train(epoch)
    evaluate(train_loader)


Average loss: 0.1745, Accuracy: 56759/60000 (94.598%)


Average loss: 0.1335, Accuracy: 57509/60000 (95.848%)


Average loss: 0.1204, Accuracy: 57699/60000 (96.165%)


Average loss: 0.1375, Accuracy: 57383/60000 (95.638%)


Average loss: 0.1592, Accuracy: 56995/60000 (94.992%)


Average loss: 0.0872, Accuracy: 58280/60000 (97.133%)


Average loss: 0.0827, Accuracy: 58388/60000 (97.313%)


Average loss: 0.0836, Accuracy: 58374/60000 (97.290%)


Average loss: 0.0965, Accuracy: 58152/60000 (96.920%)


Average loss: 0.0675, Accuracy: 58694/60000 (97.823%)


Average loss: 0.0713, Accuracy: 58673/60000 (97.788%)


Average loss: 0.0565, Accuracy: 58925/60000 (98.208%)


Average loss: 0.0702, Accuracy: 58708/60000 (97.847%)


Average loss: 0.0709, Accuracy: 58755/60000 (97.925%)


Average loss: 0.0569, Accuracy: 58887/60000 (98.145%)


Average loss: 0.0576, Accuracy: 58858/60000 (98.097%)




Average loss: 0.0608, Accuracy: 58890/60000 (98.150%)


Average loss: 0.0548, Accuracy: 59007/60000 (98.345%)


Average loss: 0.0842, Accuracy: 58435/60000 (97.392%)


Average loss: 0.0564, Accuracy: 58994/60000 (98.323%)


Average loss: 0.0498, Accuracy: 59076/60000 (98.460%)


Average loss: 0.0377, Accuracy: 59266/60000 (98.777%)


Average loss: 0.0293, Accuracy: 59429/60000 (99.048%)


Average loss: 0.0690, Accuracy: 58753/60000 (97.922%)


Average loss: 0.0521, Accuracy: 58987/60000 (98.312%)


Average loss: 0.0443, Accuracy: 59172/60000 (98.620%)


Average loss: 0.0492, Accuracy: 59134/60000 (98.557%)


Average loss: 0.0456, Accuracy: 59176/60000 (98.627%)


Average loss: 0.0587, Accuracy: 59115/60000 (98.525%)


Average loss: 0.0403, Accuracy: 59240/60000 (98.733%)


Average loss: 0.0821, Accuracy: 58621/60000 (97.702%)


Average loss: 0.0271, Accuracy: 59492/60000 (99.153%)




Average loss: 0.0498, Accuracy: 59062/60000 (98.437%)


Average loss: 0.0475, Accuracy: 59150/60000 (98.583%)


Average loss: 0.0452, Accuracy: 59184/60000 (98.640%)


Average loss: 0.0484, Accuracy: 59192/60000 (98.653%)


Average loss: 0.0387, Accuracy: 59330/60000 (98.883%)


Average loss: 0.0562, Accuracy: 59078/60000 (98.463%)


Average loss: 0.0335, Accuracy: 59377/60000 (98.962%)


Average loss: 0.0320, Accuracy: 59428/60000 (99.047%)


Average loss: 0.0291, Accuracy: 59519/60000 (99.198%)


Average loss: 0.0553, Accuracy: 59134/60000 (98.557%)


Average loss: 0.0393, Accuracy: 59302/60000 (98.837%)


Average loss: 0.0211, Accuracy: 59612/60000 (99.353%)


Average loss: 0.0360, Accuracy: 59372/60000 (98.953%)


Average loss: 0.0244, Accuracy: 59584/60000 (99.307%)


Average loss: 0.0247, Accuracy: 59546/60000 (99.243%)


Average loss: 0.0154, Accuracy: 59672/60000 (99.453%)




Average loss: 0.0292, Accuracy: 59490/60000 (99.150%)


Average loss: 0.0142, Accuracy: 59724/60000 (99.540%)


Average loss: 0.0332, Accuracy: 59449/60000 (99.082%)


Average loss: 0.0204, Accuracy: 59640/60000 (99.400%)


Average loss: 0.0344, Accuracy: 59447/60000 (99.078%)


Average loss: 0.0205, Accuracy: 59626/60000 (99.377%)


Average loss: 0.0783, Accuracy: 58931/60000 (98.218%)


Average loss: 0.0624, Accuracy: 59109/60000 (98.515%)


Average loss: 0.0347, Accuracy: 59447/60000 (99.078%)


Average loss: 0.0424, Accuracy: 59332/60000 (98.887%)


Average loss: 0.0346, Accuracy: 59493/60000 (99.155%)


Average loss: 0.0471, Accuracy: 59335/60000 (98.892%)


Average loss: 0.0293, Accuracy: 59507/60000 (99.178%)


Average loss: 0.0245, Accuracy: 59563/60000 (99.272%)


Average loss: 0.0377, Accuracy: 59425/60000 (99.042%)


Average loss: 0.0168, Accuracy: 59703/60000 (99.505%)




Average loss: 0.0217, Accuracy: 59609/60000 (99.348%)



In [10]:
torch.save(initial_model, 'C:/Users/cozyn/Desktop/Research/Adversarial-Machine-Learning/results/initial_model.pth')

In [7]:
initial_model = torch.load('C:/Users/cozyn/Desktop/Research/Adversarial-Machine-Learning/results/initial_model.pth')
initial_model.eval()

HNet(
  (flatten): Flatten()
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
)

In [12]:
def mseresidual(y, F):
    return y - F

In [13]:
Hoptimizer = optim.Adam(initial_model.parameters(), lr=0.003)

Hcriterion = nn.MSELoss()

# exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

if torch.cuda.is_available():
    Hcriterion = Hcriterion.cuda()

In [14]:
evaluate(train_loader)


Average loss: 0.0217, Accuracy: 59609/60000 (99.348%)



In [15]:
def Htrain(Hmodel, epoch):
    Hmodel.train()
    for m in range(num_of_models):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = Variable(data), Variable(target)
            nb_digits = 10
            target_onehot = torch.FloatTensor(data.shape[0], nb_digits)
            if torch.cuda.is_available():
                data = data.cuda()
                target = target.cuda()
                target_onehot = target_onehot.cuda()
            Hoptimizer.zero_grad()
            output = initial_model(data)
            for i in range(m):
                model = models[i]
                if torch.cuda.is_available():
                    output = output.cuda()
                    model = model.cuda()
                output = output + gamma_exp[i] * model(data)
            target = target.view(-1,1)
            target_onehot.zero_()
            target_onehot.scatter_(1, target, 1)
            residual = mseresidual(target_onehot, output)
            houtput = Hmodel(data)
            houtput = houtput.type(torch.cuda.FloatTensor)
            residual = residual.type(torch.cuda.FloatTensor)
#             residual_list.append(residual)
    #             print("residual is:", residual)
    #             print("predicted is:", houtput)
            loss = Hcriterion(houtput, residual)
            loss.backward(retain_graph=True)
            Hoptimizer.step()
            if (batch_idx + 1)% 100 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                    100. * (batch_idx + 1) / len(train_loader), loss.item()))

In [16]:
def GradientBoosting(initial_model, M):
    gamma_exp = torch.ones([M], dtype = torch.float64)
    models = []
    residual_list = []
    for m in range(M):
        # Create new model for training residuals
        Hmodel = NHNet()
        if torch.cuda.is_available():
            Hmodel = Hmodel.cuda()
            gamma_exp = gamma_exp.cuda()
#         Htrain(Hmodel, 100)
#         for i in range(5):
#             print(i)
        Hmodel.train()
        epoch = 3
        for i in range(epoch):
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = Variable(data), Variable(target)
                nb_digits = 10
                target_onehot = torch.FloatTensor(data.shape[0], nb_digits)
                if torch.cuda.is_available():
                    data = data.cuda()
                    target = target.cuda()
                    target_onehot = target_onehot.cuda()
                Hoptimizer.zero_grad()
                output = initial_model(data)
                for i in range(m):
                    model = models[i]
                    if torch.cuda.is_available():
                        output = output.cuda()
                        model = model.cuda()
                    output = output + gamma_exp[i] * model(data)
                target = target.view(-1,1)
                target_onehot.zero_()
                target_onehot.scatter_(1, target, 1)
                residual = mseresidual(target_onehot, output)
                houtput = Hmodel(data)
                houtput = houtput.type(torch.cuda.FloatTensor)
                residual = residual.type(torch.cuda.FloatTensor)
                residual_list.append(residual)
    #             print("residual is:", residual)
    #             print("predicted is:", houtput)
                loss = Hcriterion(houtput, residual)
                loss.backward(retain_graph=True)
                Hoptimizer.step()
                if (batch_idx + 1)% 100 == 0:
                    print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                        100. * (batch_idx + 1) / len(train_loader), loss.item()))
#             print('batch_idx', batch_idx)
#             train_losses.append(loss.item())
#             torch.save(model.state_dict(), 'C:/Users/cozyn/Desktop/Research/results/model.pth')
#             torch.save(optimizer.state_dict(), 'C:/Users/cozyn/Desktop/Research/results/optimizer.pth')
        models.append(Hmodel)
#         print("Hmodel is:", Hmodel)
#         print("Appended model is:", models[1])
#         print("Length of models:", len(models))
        
        
#         loss = 0
#         correct = 0
    
#         with torch.no_grad():
#             for batch_idx, (data, target) in enumerate(train_loader):
# #             data, target = Variable(data, volatile=True), Variable(target)
#                 residual = residual_list[batch_idx]
#                 if torch.cuda.is_available():
#                     data = data.cuda()
#                     residual = residual.cuda()
#                 residual = residual.type(torch.cuda.LongTensor)
#                 output = Hmodel(data)
#                 loss += F.cross_entropy(output, residual, reduction='sum').item()
#                 pred = output.data.max(1, keepdim=True)[1]
#                 correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
#         loss /= len(residual_list)
#     # test_losses.append(loss)    
#         print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
#             loss, correct, len(residual_list),
#             100. * correct / len(residual_list)))
        
        
        
        
        gamma = torch.rand(1, requires_grad=True, device="cuda")
#         gamma[0] = 0.1
        print("Initialized gamma:", gamma)
#         Variable(gamma)
        Goptimizer = optim.Adam([gamma], lr=0.003)
        for i in range(10):
            print(i)
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = Variable(data), Variable(target)
                nb_digits = 10
                target_onehot = torch.FloatTensor(data.shape[0], nb_digits)
                if torch.cuda.is_available():
                    data = data.cuda()
                    target = target.cuda()
                    target_onehot = target_onehot.cuda()
                    Hmodel = Hmodel.cuda()
                    gamma = gamma.cuda()
                Goptimizer.zero_grad()  
                output = initial_model(data)
                for i in range(m):
                    model = models[i]
                    if torch.cuda.is_available():
                        model = model.cuda()
                        output = output.cuda()
                        gamma_temp = gamma_exp[i]
                        gamma_temp = gamma_temp.cuda()
                    output = output + gamma_temp * model(data)

                target = target.view(-1,1)
                target_onehot.zero_()
                target_onehot.scatter_(1, target, 1)
                temp = Hmodel(data)
    #             print('output is:', output)
    #             print('gamma is:', gamma.shape)
    #             print('Hmodel(data) is:', temp)
                predicted = output + gamma * temp
    #             print("predicted is:", predicted)
    #             predicted.double()
    #             target_onehot.double()
                loss = Hcriterion(predicted, target_onehot)
    #             print("target_onehot is:", target_onehot)
    #             train_losses.append(loss.item())
    #             torch.save(model.state_dict(), 'C:/Users/cozyn/Desktop/Research/results/model.pth')
    #             torch.save(optimizer.state_dict(), 'C:/Users/cozyn/Desktop/Research/results/optimizer.pth')
    #             print("loss is:", loss)
                loss.backward(retain_graph=True)
#                 print("gamma is before:", gamma)
    #             print("gamma's gradient is:", gamma.retain_grad())
                Goptimizer.step()
#                 print("gamma is after:", gamma)
        gamma_exp[m] = gamma
        print(gamma_exp)
    return models, gamma_exp

In [17]:
num_of_models = 3
models, gamma_exp = GradientBoosting(initial_model, num_of_models)

Initialized gamma: tensor([0.2921], device='cuda:0', requires_grad=True)
0
1
2
3
4
5
6
7
8
9
tensor([1.0040, 1.0000, 1.0000], device='cuda:0', dtype=torch.float64,
       grad_fn=<CopySlices>)
Initialized gamma: tensor([0.3568], device='cuda:0', requires_grad=True)
0
1
2
3
4
5
6
7
8
9
tensor([1.0040, 0.9936, 1.0000], device='cuda:0', dtype=torch.float64,
       grad_fn=<CopySlices>)
Initialized gamma: tensor([0.3610], device='cuda:0', requires_grad=True)
0
1
2
3
4
5
6
7
8
9
tensor([1.0040, 0.9936, 1.0037], device='cuda:0', dtype=torch.float64,
       grad_fn=<CopySlices>)


In [28]:
torch.save(gamma_exp, 'C:/Users/cozyn/Desktop/Research/Adversarial-Machine-Learning/results/gamma_exp.txt')

In [8]:
gamma_exp = torch.load('C:/Users/cozyn/Desktop/Research/Adversarial-Machine-Learning/results/gamma_exp.txt')
print(gamma_exp)

tensor([1.0040, 0.9936, 1.0037], device='cuda:0', dtype=torch.float64,
       requires_grad=True)


In [20]:
initial_model = torch.load('C:/Users/cozyn/Desktop/Research/Adversarial-Machine-Learning/results/initial_model.pth')
initial_model.eval()

loss = 0
correct = 0
    
with torch.no_grad():
    for batch_idx, (data, target) in enumerate(train_loader):
#             data, target = Variable(data, volatile=True), Variable(target)
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        
        output = initial_model(data)
#         if batch_idx == 937:
#             print("Output before is:",output)
        for i in range(num_of_models):
            model = models[i]
            if torch.cuda.is_available():
                model = model.cuda()
                output = output.cuda()
                gamma_temp = gamma_exp[i]
                gamma_temp = gamma_temp.cuda()
            output = output + gamma_temp * model(data)
#             if batch_idx == 937:
#                 print("Hmodel output is:", model(data))
#                 print("Output after is:",output)
        loss += F.cross_entropy(output, target, reduction='sum').item()
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
loss /= len(train_loader.dataset)
# test_losses.append(loss)    
print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
    loss, correct, len(train_loader.dataset),
    100. * correct / len(train_loader.dataset)))


Average loss: 0.0214, Accuracy: 59612/60000 (99.353%)



In [25]:
for i in range(num_of_models):
    model = models[i]
    torch.save(model, 'C:/Users/cozyn/Desktop/Research/Adversarial-Machine-Learning/results/model' + str(i) + '.pth')

In [9]:
num_of_models = 3
models = []
for x in range(num_of_models):
    globals()['model%s' % x] = torch.load('C:/Users/cozyn/Desktop/Research/Adversarial-Machine-Learning/results/model' + str(1) + '.pth')
    models.append(globals()['model%s' % x])

In [17]:
initial_model.eval()

correct = 0
total = 0

cw_attack = CW(initial_model)

for data, target in test_loader:

        images = cw_attack(data, target).cuda()
        outputs = initial_model(images)
        
        _, predicted = torch.max(outputs.data, 1)

        total += target.size(0)
        correct += (predicted == target.cuda()).sum()
    
print('Robust accuracy: %.2f %%' % (100 * float(correct) / total))

Robust accuracy: 83.42 %


In [10]:
class Attack_ensemble(object):
    r"""
    Base class for all attacks.
    .. note::
        It automatically set device to the device where given model is.
        It temporarily changes the original model's training mode to `test`
        by `.eval()` only during an attack process.
    """
    def __init__(self, name, model, models, gamma):
        r"""
        Initializes internal attack state.
        Arguments:
            name (str) : name of an attack.
            model (torch.nn.Module): model to attack.
        """

        self.attack = name
        self.model = model
        self.models = models
        self.gamma = gamma
        self.model_name = str(model).split("(")[0]

        self.training = model.training
        self.device = next(model.parameters()).device
        
        self._targeted = 1
        self._attack_mode = 'original'
        self._return_type = 'float'

    def forward(self, *input):
        r"""
        It defines the computation performed at every call.
        Should be overridden by all subclasses.
        """
        raise NotImplementedError
        
    def set_attack_mode(self, mode):
        r"""
        Set the attack mode.
  
        Arguments:
            mode (str) : 'original' (DEFAULT)
                         'targeted' - Use input labels as targeted labels.
                         'least_likely' - Use least likely labels as targeted labels.
        """
        if self._attack_mode is 'only_original':
            raise ValueError("Changing attack mode is not supported in this attack method.")
            
        if mode=="original":
            self._attack_mode = "original"
            self._targeted = 1
            self._transform_label = self._get_label
        elif mode=="targeted":
            self._attack_mode = "targeted"
            self._targeted = -1
            self._transform_label = self._get_label
        elif mode=="least_likely":
            self._attack_mode = "least_likely"
            self._targeted = -1
            self._transform_label = self._get_least_likely_label
        else:
            raise ValueError(mode + " is not a valid mode. [Options : original, targeted, least_likely]")
            
    def set_return_type(self, type):
        r"""
        Set the return type of adversarial images: `int` or `float`.
        Arguments:
            type (str) : 'float' or 'int'. (DEFAULT : 'float')
        """
        if type == 'float':
            self._return_type = 'float'
        elif type == 'int':
            self._return_type = 'int'
        else:
            raise ValueError(type + " is not a valid type. [Options : float, int]")

    def save(self, save_path, data_loader, verbose=True):
        r"""
        Save adversarial images as torch.tensor from given torch.utils.data.DataLoader.
        Arguments:
            save_path (str) : save_path.
            data_loader (torch.utils.data.DataLoader) : data loader.
            verbose (bool) : True for displaying detailed information. (DEFAULT : True)
        """
        self.model.eval()

        image_list = []
        label_list = []

        correct = 0
        total = 0

        total_batch = len(data_loader)

        for step, (images, labels) in enumerate(data_loader):
            adv_images = self.__call__(images, labels)

            image_list.append(adv_images.cpu())
            label_list.append(labels.cpu())

            if self._return_type == 'int':
                adv_images = adv_images.float()/255

            if verbose:
                outputs = self.model(adv_images)
                for i in range(len(self.models)):
                    sub_model = self.models[i]
                    outputs = outputs + self.gamma[i] * sub_model(adv_images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels.to(self.device)).sum()

                acc = 100 * float(correct) / total
                print('- Save Progress : %2.2f %% / Accuracy : %2.2f %%' % ((step+1)/total_batch*100, acc), end='\r')

        x = torch.cat(image_list, 0)
        y = torch.cat(label_list, 0)
        torch.save((x, y), save_path)
        print('\n- Save Complete!')

        self._switch_model()
        
    def _transform_label(self, images, labels):
        r"""
        Function for changing the attack mode.
        """
        return labels
        
    def _get_label(self, images, labels):
        r"""
        Function for changing the attack mode.
        Return input labels.
        """
        return labels
    
    def _get_least_likely_label(self, images, labels):
        r"""
        Function for changing the attack mode.
        Return least likely labels.
        """
        outputs = self.model(images)
        for i in range(len(self.models)):
            sub_model = self.models[i]
            outputs = outputs + self.gamma[i] * sub_model(images)
        _, labels = torch.min(outputs.data, 1)
        labels = labels.detach_()
        return labels
    
    def _to_uint(self, images):
        r"""
        Function for changing the return type.
        Return images as int.
        """
        return (images*255).type(torch.uint8)

    def _switch_model(self):
        r"""
        Function for changing the training mode of the model.
        """
        if self.training:
            self.model.train()
            for i in range(len(self.models)):
                self.models[i].train()
        else:
            self.model.eval()
            for i in range(len(self.models)):
                self.models[i].eval()

    def __str__(self):
        info = self.__dict__.copy()
        
        del_keys = ['model', 'attack']
        
        for key in info.keys():
            if key[0] == "_" :
                del_keys.append(key)
                
        for key in del_keys:
            del info[key]
        
        info['attack_mode'] = self._attack_mode
        if info['attack_mode'] == 'only_original' :
            info['attack_mode'] = 'original'
            
        info['return_type'] = self._return_type
        
        return self.attack + "(" + ', '.join('{}={}'.format(key, val) for key, val in info.items()) + ")"

    def __call__(self, *input, **kwargs):
        self.model.eval()
        for i in range(len(self.models)):
            self.models[i].eval()
        images = self.forward(*input, **kwargs)
        self._switch_model()

        if self._return_type == 'int':
            images = self._to_uint(images)

        return images

In [14]:
import warnings

class CW_Ensemble(Attack_ensemble):
    r"""
    CW in the paper 'Towards Evaluating the Robustness of Neural Networks'
    [https://arxiv.org/abs/1608.04644]
    Distance Measure : L2
        
    Arguments:
        model (nn.Module): model to attack.
        c (float): c in the paper. parameter for box-constraint. (DEFALUT : 1e-4)    
            :math:`minimize \Vert\frac{1}{2}(tanh(w)+1)-x\Vert^2_2+c\cdot f(\frac{1}{2}(tanh(w)+1))`    
        kappa (float): kappa (also written as 'confidence') in the paper. (DEFALUT : 0)
            :math:`f(x')=max(max\{Z(x')_i:i\neq t\} -Z(x')_t, - \kappa)`
        steps (int): number of steps. (DEFALUT : 1000)
        lr (float): learning rate of the Adam optimizer. (DEFALUT : 0.01)
        
    .. warning:: With default c, you can't easily get adversarial images. Set higher c like 1.
    
    Shape:
        - images: :math:`(N, C, H, W)` where `N = number of batches`, `C = number of channels`,        `H = height` and `W = width`. It must have a range [0, 1].
        - labels: :math:`(N)` where each value :math:`y_i` is :math:`0 \leq y_i \leq` `number of labels`.
        - output: :math:`(N, C, H, W)`.
          
    Examples::
        >>> attack = torchattacks.CW(model, targeted=False, c=1e-4, kappa=0, steps=1000, lr=0.01)
        >>> adv_images = attack(images, labels)
        
    .. note:: NOT IMPLEMENTED methods in the paper due to time consuming.
    
        (1) Binary search for c.
        
        (2) Choosing best L2 adversaries.
    """
    def __init__(self, model, models, gamma, c=1e-4, kappa=0, steps=1000, lr=0.01):
        super(CW_Ensemble, self).__init__("CW", model, models, gamma)
        self.c = c
        self.kappa = kappa
        self.steps = steps
        self.lr = lr

    def forward(self, images, labels):
        r"""
        Overridden.
        """
        images = images.to(self.device)
        labels = labels.to(self.device)
        labels = self._transform_label(images, labels)

        # f-function in the paper
        def f(x):
            outputs = self.model(x)
            for i in range(len(self.models)):
                sub_model = self.models[i]
                outputs = outputs + self.gamma[i] * sub_model(x)
            one_hot_labels = torch.eye(len(outputs[0]))[labels].to(self.device)

            i, _ = torch.max((1-one_hot_labels)*outputs, dim=1)
            j = torch.masked_select(outputs, one_hot_labels.bool())

            return torch.clamp(self._targeted*(j-i), min=-self.kappa)

        w = torch.zeros_like(images).to(self.device)
        w.detach_()
        w.requires_grad = True

        optimizer = optim.Adam([w], lr=self.lr)
        prev = 1e10

        for step in range(self.steps):

            a = 1/2*(nn.Tanh()(w) + 1)

            loss1 = nn.MSELoss(reduction='sum')(a, images)
            loss2 = torch.sum(self.c*f(a))

            cost = loss1 + loss2

            optimizer.zero_grad()
            cost.backward()
            optimizer.step()

            # Early Stop when loss does not converge.
            if step % (self.steps//10) == 0:
                if cost > prev:
                    warnings.warn("Early stopped because the loss is not converged.")
                    return (1/2*(nn.Tanh()(w) + 1)).detach()
                prev = cost

            # print('- CW Attack Progress : %2.2f %%        ' %((step+1)/self.steps*100), end='\r')

        adv_images = (1/2*(nn.Tanh()(w) + 1)).detach()

        return adv_images

In [16]:
initial_model.eval()
for i in range(num_of_models):
    models[i].eval()

correct = 0
total = 0

cw_attack = CW_Ensemble(model = initial_model,models = models,gamma = gamma_exp)

for data, target in test_loader:

        images = cw_attack(data, target).cuda()
        outputs = initial_model(images)
        for i in range(num_of_models):
            sub_model = models[i]
            outputs = outputs + gamma_exp[i] * sub_model(images)
        _, predicted = torch.max(outputs.data, 1)

        total += target.size(0)
        correct += (predicted == target.cuda()).sum()
    
print('Robust accuracy: %.2f %%' % (100 * float(correct) / total))

Robust accuracy: 83.39 %


In [15]:
def CW_ensemble_attack(initial_model, models, images, labels, targeted=False, c=1e-4, kappa=0, max_iter=1000, learning_rate=0.01) :

    images = images.to(device)     
    labels = labels.to(device)

    
    def f(x) :

        outputs = initial_model(x)
        for i in range(num_of_models):
            model = models[i]
            if torch.cuda.is_available():
                model = model.cuda()
                outputs = outputs.cuda()
                gamma_temp = gamma_exp[i]
                gamma_temp = gamma_temp.cuda()
            outputs = outputs + gamma_temp * model(data)
            
        one_hot_labels = torch.eye(len(outputs[0]))[labels].to(device)

        i, _ = torch.max((1-one_hot_labels)*outputs, dim=1)
        j = torch.masked_select(outputs, one_hot_labels.byte())
        
        # If targeted, optimize for making the other class most likely 
        if targeted :
            return torch.clamp(i-j, min=-kappa)
        
        # If untargeted, optimize for making the other class most likely 
        else :
            return torch.clamp(j-i, min=-kappa)
    
    w = torch.zeros_like(images, requires_grad=True).to(device)

    optimizer = optim.Adam([w], lr=learning_rate)

    prev = 1e10
    
    for step in range(max_iter) :

        a = 1/2*(nn.Tanh()(w) + 1)

        loss1 = nn.MSELoss(reduction='sum')(a, images)
        loss2 = torch.sum(c*f(a))

        cost = loss1 + loss2

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        # Early Stop when loss does not converge.
        if step % (max_iter//10) == 0 :
            if cost > prev :
                print('Attack Stopped due to CONVERGENCE....')
                return a
            prev = cost
        
        print('- Learning Progress : %2.2f %%        ' %((step+1)/max_iter*100), end='\r')

    attack_images = 1/2*(nn.Tanh()(w) + 1)

    return attack_images