## Import Packages

In [1]:
import pandas as pd
import numpy as np

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.utils import make_grid

import math
import random
import json

from PIL import Image, ImageOps, ImageEnhance
import numbers

import torchattacks
from torchattacks import CW

import matplotlib.pyplot as plt
%matplotlib inline

## Normalize Dataset

In [2]:
#Creating dataset using torch dataloaders
batch_size_train = 128
batch_size_test = 1000
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))]) # Normalizing dataset

# Training dataset
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=True, download=True,
                             transform=transform),
  batch_size=batch_size_train, shuffle=True)

# Test dataset
test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=False, download=True,
                             transform=transform),
  batch_size=batch_size_test, shuffle=True)

# Initialize GPU
use_cuda = True
device = torch.device("cuda" if use_cuda else "cpu")
print(device)

cuda


## Initial Model Structure

In [3]:
# Initial model structure
class HNet(nn.Module):    
    def __init__(self):
        super(HNet, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)


    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.softmax(x, dim = 1) # add this because I need one-hot label and MSE loss
        return x     

## SubModel Structure

In [4]:
# Submodel Structure for training residual
class NHNet(nn.Module):    
    def __init__(self):
        super(NHNet, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)


    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.softmax(x, dim = 1) # add this because I need one-hot label and MSE loss
        return x   

## Initialize Data Structures

In [16]:
# Initialize some data structures to store useful data
train_losses = []
train_counter = []
test_losses = []

## Initialize Parameters, optimizer and loss function

In [15]:
# Create the initial model
initial_model = HNet()

#Create the optimizer for the initial model
optimizer = optim.Adam(initial_model.parameters(), lr=0.003)

# Create Loss function for the intial model
criterion = nn.CrossEntropyLoss()

# exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

#Change model into cuda mode
if torch.cuda.is_available():
    initial_model = initial_model.cuda()
    criterion = criterion.cuda()

## Training Function for Initial Model

In [7]:
# Initial Model Training Function
def train(epoch):
    initial_model.train()
#     exp_lr_scheduler.step()
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        
        optimizer.zero_grad()
        output = initial_model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())
        train_counter.append(
                (batch_idx*64) + ((epoch-1)*len(train_loader.dataset)))
        # torch.save(initial_model.state_dict(), 'C:/Users/cozyn/Desktop/Research/Adversarial-Machine-Learning/results/model.pth')
        # torch.save(optimizer.state_dict(), 'C:/Users/cozyn/Desktop/Research/Adversarial-Machine-Learning/results/optimizer.pth')
        if (batch_idx + 1)% 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                100. * (batch_idx + 1) / len(train_loader), loss.item()))

## Evaluation Function for Intial Model

In [8]:
#Initial Model Evaluating Function
def evaluate(data_loader):
    initial_model.eval()
    loss = 0
    correct = 0
    
    with torch.no_grad():
        for data, target in data_loader:
            data, target = Variable(data, volatile=True), Variable(target)
            if torch.cuda.is_available():
                data = data.cuda()
                target = target.cuda()
        
            output = initial_model(data)
        
            loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
    loss /= len(data_loader.dataset)
    test_losses.append(loss)    
    print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        loss, correct, len(data_loader.dataset),
        100. * correct / len(data_loader.dataset)))

## Start Training the Initial Model

In [17]:
# Start Training
n_epochs = 65

for epoch in range(n_epochs):
    train(epoch)
    evaluate(train_loader)



  if __name__ == '__main__':



Average loss: 1.5576, Accuracy: 54379/60000 (90.632%)


Average loss: 1.5313, Accuracy: 55880/60000 (93.133%)


Average loss: 1.5157, Accuracy: 56773/60000 (94.622%)


Average loss: 1.5180, Accuracy: 56624/60000 (94.373%)


Average loss: 1.5074, Accuracy: 57218/60000 (95.363%)


Average loss: 1.5130, Accuracy: 56874/60000 (94.790%)


Average loss: 1.5114, Accuracy: 56975/60000 (94.958%)


Average loss: 1.5035, Accuracy: 57456/60000 (95.760%)


Average loss: 1.5338, Accuracy: 55620/60000 (92.700%)


Average loss: 1.5091, Accuracy: 57117/60000 (95.195%)


Average loss: 1.5123, Accuracy: 56931/60000 (94.885%)


Average loss: 1.5031, Accuracy: 57474/60000 (95.790%)


Average loss: 1.5046, Accuracy: 57395/60000 (95.658%)


Average loss: 1.5071, Accuracy: 57235/60000 (95.392%)


Average loss: 1.5061, Accuracy: 57281/60000 (95.468%)


Average loss: 1.5019, Accuracy: 57547/60000 (95.912%)


Average loss: 1.5007, Accuracy: 57609/60000 (96.015%)


Average loss: 1.5057, Accuracy: 57316/60000 (95


Average loss: 1.5023, Accuracy: 57525/60000 (95.875%)


Average loss: 1.5059, Accuracy: 57314/60000 (95.523%)


Average loss: 1.5080, Accuracy: 57187/60000 (95.312%)


Average loss: 1.5127, Accuracy: 56892/60000 (94.820%)


Average loss: 1.5122, Accuracy: 56936/60000 (94.893%)


Average loss: 1.5048, Accuracy: 57379/60000 (95.632%)


Average loss: 1.5127, Accuracy: 56906/60000 (94.843%)


Average loss: 1.5021, Accuracy: 57543/60000 (95.905%)


Average loss: 1.5064, Accuracy: 57278/60000 (95.463%)


Average loss: 1.5015, Accuracy: 57568/60000 (95.947%)


Average loss: 1.5219, Accuracy: 56351/60000 (93.918%)


Average loss: 1.5026, Accuracy: 57512/60000 (95.853%)


Average loss: 1.5174, Accuracy: 56618/60000 (94.363%)


Average loss: 1.5228, Accuracy: 56298/60000 (93.830%)


Average loss: 1.5119, Accuracy: 56951/60000 (94.918%)


Average loss: 1.5050, Accuracy: 57371/60000 (95.618%)


Average loss: 1.5210, Accuracy: 56408/60000 (94.013%)


Average loss: 1.5022, Accuracy: 57532/60000 (95


Average loss: 1.5082, Accuracy: 57166/60000 (95.277%)



## Save or Load the Initial Model

In [18]:
# Save this model so that I dont have to train again in the future
torch.save(initial_model, 'C:/Users/cozyn/OneDrive/Desktop/Research/Adversarial-Machine-Learning/results/initial_model.pth')

In [7]:
# Load the model from local file
initial_model = torch.load('C:/Users/cozyn/OneDrive/Desktop/Research/Adversarial-Machine-Learning/results/initial_model.pth')
initial_model.eval()

HNet(
  (flatten): Flatten()
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
)

## Calculatee mse residual and normalize the result to values between 0 ~ 1

In [10]:
# Function used to calculate mse residual based on the wiki
def mseresidual(y, F):
    residual = y - F
    absolute = torch.abs(residual)
    residual = residual / torch.max(absolute)
    return residual

## Actual Code for gradient Boosting

In [11]:
#Main Function used to train and find optimal gamma for submodels
#input: intial model that's already trained
#M is number of submodels needed to be trained
def GradientBoosting(initial_model, M):
    gamma_exp = torch.ones([M], dtype = torch.float64) # used to hold the final optimized gamma
    models = [] # used to hold all the models
    residual_list = [] # used to hold the residual of each batch calculated
    for m in range(M):
        # Intialize submodels
        Hmodel = NHNet()
        Hcriterion = nn.MSELoss()
        if torch.cuda.is_available():
            Hmodel = Hmodel.cuda()
            gamma_exp = gamma_exp.cuda()
            Hcriterion = Hcriterion.cuda()
            
        # Start Training
        epoch = 20
        Hoptimizer = optim.Adam(Hmodel.parameters(), lr=0.001)
        for i in range(epoch):
            Hmodel.train()
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = Variable(data), Variable(target)
                # Create one-hot label target tensor
                nb_digits = 10
                target_onehot = torch.FloatTensor(data.shape[0], nb_digits)
                if torch.cuda.is_available():
                    data = data.cuda()
                    target = target.cuda()
                    target_onehot = target_onehot.cuda()
                # Calculate F(x)
                output = initial_model(data)
                # Calculate the output from all the models
                for j in range(m):
                    model = models[j]
                    if torch.cuda.is_available():
                        output = output.cuda()
                        model = model.cuda()
                    output = output + gamma_exp[j] * model(data)
#                 print("output is:", output)
                #Convert into Onehot label so that it would be able to calculate the residual
                target = target.view(-1,1)
                target_onehot.zero_()
                target_onehot.scatter_(1, target, 1)
                #Calculate Residual
#                 print("target_onehot is:", target_onehot)
#                 print("output is:", output)
                residual = mseresidual(target_onehot, output)
                houtput = Hmodel(data)
#                 print("houtput is:", houtput)
                residual = residual
#                 print(residual)
                residual = residual.type(torch.cuda.FloatTensor)
                houtput = houtput.type(torch.cuda.FloatTensor)
                #Calculate the loss
                loss = Hcriterion(houtput, residual)
                Hoptimizer.zero_grad()
                loss.backward(retain_graph = True)
                Hoptimizer.step()
                # Print out current Process
#                 if (batch_idx + 1)% 100 == 0 and i % 10 == 0:
                print('Train Epoch: Model Number: {} {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                        m+1,i, (batch_idx + 1) * len(data), len(train_loader.dataset),
                        100. * (batch_idx + 1) / len(train_loader), loss.item()))
        models.append(Hmodel)
        
        # Initialize a random gamma
        gamma = torch.rand(1, requires_grad=True, device="cuda")
        Goptimizer = optim.Adam([gamma], lr=0.01)
        Gcriterion = nn.MSELoss()
        # Start finding the best gamma
        for i in range(20):
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = Variable(data), Variable(target)
                nb_digits = 10
                target_onehot = torch.FloatTensor(data.shape[0], nb_digits)
                if torch.cuda.is_available():
                    data = data.cuda()
                    target = target.cuda()
                    target_onehot = target_onehot.cuda()
                    Hmodel = Hmodel.cuda()
                    gamma = gamma.cuda()
                    
                #Calculate the initial output
                Goptimizer.zero_grad()  
                output = initial_model(data)
                #Calculate the final output by combining all previous models
                for j in range(m):
                    model = models[j]
                    if torch.cuda.is_available():
                        model = model.cuda()
                        output = output.cuda()
                        gamma_temp = gamma_exp[j]
                        gamma_temp = gamma_temp.cuda()
                    output = output + gamma_temp * model(data)
                # Covert into one-hot label
                target = target.view(-1,1)
                target_onehot.zero_()
                target_onehot.scatter_(1, target, 1)
                # Get the currect model output
                temp = Hmodel(data)
                # Find the current ensemble model output
                predicted = output + gamma * temp
                # Calculate the loss
                loss = Gcriterion(predicted, target_onehot)
                loss.backward(retain_graph = True)
                # Optimize the gamma
                Goptimizer.step()  
                
        gamma_exp[m] = gamma
    print(gamma_exp)
    return models, gamma_exp

## Start Gradient Boosting Training

In [12]:
num_of_models = 3
models, gamma_exp = GradientBoosting(initial_model, num_of_models)























































































































































































































































































































































































































































tensor([ 0.1407,  0.0500, -0.0431], device='cuda:0', dtype=torch.float64,
       grad_fn=<CopySlices>)


## Save or Load submodels and optimized gamma

In [24]:
# Save all the models trained
for i in range(num_of_models):
    model = models[i]
    torch.save(model, 'C:/Users/cozyn/OneDrive/Desktop/Research/Adversarial-Machine-Learning/results/model' + str(i) + '.pth')

In [25]:
# Save the optimized gamma
torch.save(gamma_exp, 'C:/Users/cozyn/OneDrive/Desktop/Research/Adversarial-Machine-Learning/results/gamma_exp.txt')

In [None]:
# Load the models from the local files
num_of_models = 3
models = []
for x in range(num_of_models):
    globals()['model%s' % x] = torch.load('C:/Users/cozyn/OneDrive/Desktop/Research/Adversarial-Machine-Learning/results/model' + str(1) + '.pth')
    models.append(globals()['model%s' % x])
print(models)

In [None]:
# Load the optimized gamma from the local files
gamma_exp = torch.load('C:/Users/cozyn/OneDrive/Desktop/Research/Adversarial-Machine-Learning/results/gamma_exp.txt')
print(gamma_exp)

## Ensemble Model Accuracy

In [19]:
#Find the accuracy of the ensemble model
# initial_model = torch.load('C:/Users/cozyn/OneDrive/Desktop/Research/Adversarial-Machine-Learning/results/initial_model.pth')
initial_model.eval()

loss = 0
correct = 0
    
with torch.no_grad():
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data, volatile=True), Variable(target)
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        
        output = initial_model(data)
        for i in range(num_of_models):
            model = models[i]
            if torch.cuda.is_available():
                model = model.cuda()
                output = output.cuda()
                gamma_temp = gamma_exp[i]
                gamma_temp = gamma_temp.cuda()
            output = output + gamma_temp * model(data)
        loss += F.cross_entropy(output, target, reduction='sum').item()
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
loss /= len(train_loader.dataset)  
print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
    loss, correct, len(train_loader.dataset),
    100. * correct / len(train_loader.dataset)))

  # Remove the CWD from sys.path while we load stuff.



Average loss: 1.5063, Accuracy: 57166/60000 (95.277%)



## CW Attack on Intial Model, Testing Robust Accuracy

In [20]:
# Attack the initial model using CW attack
initial_model.eval()

correct = 0
total = 0

cw_attack = CW(initial_model, c=1)

for data, target in test_loader:

        images = cw_attack(data, target).cuda()
        outputs = initial_model(images)
        
        _, predicted = torch.max(outputs.data, 1)

        total += target.size(0)
        correct += (predicted == target.cuda()).sum()
    
print('Robust accuracy: %.2f %%' % (100 * float(correct) / total))

Robust accuracy: 42.52 %


## Creating Ensemble CW Attack

In [21]:
# Create ensemble CW attack
class Attack_ensemble(object):
    r"""
    Base class for all attacks.
    .. note::
        It automatically set device to the device where given model is.
        It temporarily changes the original model's training mode to `test`
        by `.eval()` only during an attack process.
    """
    def __init__(self, name, model, models, gamma):
        r"""
        Initializes internal attack state.
        Arguments:
            name (str) : name of an attack.
            model (torch.nn.Module): model to attack.
        """

        self.attack = name
        self.model = model
        self.models = models
        self.gamma = gamma
        self.model_name = str(model).split("(")[0]

        self.training = model.training
        self.device = next(model.parameters()).device
        
        self._targeted = 1
        self._attack_mode = 'original'
        self._return_type = 'float'

    def forward(self, *input):
        r"""
        It defines the computation performed at every call.
        Should be overridden by all subclasses.
        """
        raise NotImplementedError
        
    def set_attack_mode(self, mode):
        r"""
        Set the attack mode.
  
        Arguments:
            mode (str) : 'original' (DEFAULT)
                         'targeted' - Use input labels as targeted labels.
                         'least_likely' - Use least likely labels as targeted labels.
        """
        if self._attack_mode is 'only_original':
            raise ValueError("Changing attack mode is not supported in this attack method.")
            
        if mode=="original":
            self._attack_mode = "original"
            self._targeted = 1
            self._transform_label = self._get_label
        elif mode=="targeted":
            self._attack_mode = "targeted"
            self._targeted = -1
            self._transform_label = self._get_label
        elif mode=="least_likely":
            self._attack_mode = "least_likely"
            self._targeted = -1
            self._transform_label = self._get_least_likely_label
        else:
            raise ValueError(mode + " is not a valid mode. [Options : original, targeted, least_likely]")
            
    def set_return_type(self, type):
        r"""
        Set the return type of adversarial images: `int` or `float`.
        Arguments:
            type (str) : 'float' or 'int'. (DEFAULT : 'float')
        """
        if type == 'float':
            self._return_type = 'float'
        elif type == 'int':
            self._return_type = 'int'
        else:
            raise ValueError(type + " is not a valid type. [Options : float, int]")

    def save(self, save_path, data_loader, verbose=True):
        r"""
        Save adversarial images as torch.tensor from given torch.utils.data.DataLoader.
        Arguments:
            save_path (str) : save_path.
            data_loader (torch.utils.data.DataLoader) : data loader.
            verbose (bool) : True for displaying detailed information. (DEFAULT : True)
        """
        self.model.eval()

        image_list = []
        label_list = []

        correct = 0
        total = 0

        total_batch = len(data_loader)

        for step, (images, labels) in enumerate(data_loader):
            adv_images = self.__call__(images, labels)

            image_list.append(adv_images.cpu())
            label_list.append(labels.cpu())

            if self._return_type == 'int':
                adv_images = adv_images.float()/255

            if verbose:
                outputs = self.model(adv_images)
                for i in range(len(self.models)):
                    sub_model = self.models[i]
                    outputs = outputs + self.gamma[i] * sub_model(adv_images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels.to(self.device)).sum()

                acc = 100 * float(correct) / total
                print('- Save Progress : %2.2f %% / Accuracy : %2.2f %%' % ((step+1)/total_batch*100, acc), end='\r')

        x = torch.cat(image_list, 0)
        y = torch.cat(label_list, 0)
        torch.save((x, y), save_path)
        print('\n- Save Complete!')

        self._switch_model()
        
    def _transform_label(self, images, labels):
        r"""
        Function for changing the attack mode.
        """
        return labels
        
    def _get_label(self, images, labels):
        r"""
        Function for changing the attack mode.
        Return input labels.
        """
        return labels
    
    def _get_least_likely_label(self, images, labels):
        r"""
        Function for changing the attack mode.
        Return least likely labels.
        """
        outputs = self.model(images)
        for i in range(len(self.models)):
            sub_model = self.models[i]
            outputs = outputs + self.gamma[i] * sub_model(images)
        _, labels = torch.min(outputs.data, 1)
        labels = labels.detach_()
        return labels
    
    def _to_uint(self, images):
        r"""
        Function for changing the return type.
        Return images as int.
        """
        return (images*255).type(torch.uint8)

    def _switch_model(self):
        r"""
        Function for changing the training mode of the model.
        """
        if self.training:
            self.model.train()
            for i in range(len(self.models)):
                self.models[i].train()
        else:
            self.model.eval()
            for i in range(len(self.models)):
                self.models[i].eval()

    def __str__(self):
        info = self.__dict__.copy()
        
        del_keys = ['model', 'attack']
        
        for key in info.keys():
            if key[0] == "_" :
                del_keys.append(key)
                
        for key in del_keys:
            del info[key]
        
        info['attack_mode'] = self._attack_mode
        if info['attack_mode'] == 'only_original' :
            info['attack_mode'] = 'original'
            
        info['return_type'] = self._return_type
        
        return self.attack + "(" + ', '.join('{}={}'.format(key, val) for key, val in info.items()) + ")"

    def __call__(self, *input, **kwargs):
        self.model.eval()
        for i in range(len(self.models)):
            self.models[i].eval()
        images = self.forward(*input, **kwargs)
        self._switch_model()

        if self._return_type == 'int':
            images = self._to_uint(images)

        return images

In [22]:
# Create Ensemble CW Attack
import warnings

class CW_Ensemble(Attack_ensemble):
    r"""
    CW in the paper 'Towards Evaluating the Robustness of Neural Networks'
    [https://arxiv.org/abs/1608.04644]
    Distance Measure : L2
        
    Arguments:
        model (nn.Module): model to attack.
        c (float): c in the paper. parameter for box-constraint. (DEFALUT : 1e-4)    
            :math:`minimize \Vert\frac{1}{2}(tanh(w)+1)-x\Vert^2_2+c\cdot f(\frac{1}{2}(tanh(w)+1))`    
        kappa (float): kappa (also written as 'confidence') in the paper. (DEFALUT : 0)
            :math:`f(x')=max(max\{Z(x')_i:i\neq t\} -Z(x')_t, - \kappa)`
        steps (int): number of steps. (DEFALUT : 1000)
        lr (float): learning rate of the Adam optimizer. (DEFALUT : 0.01)
        
    .. warning:: With default c, you can't easily get adversarial images. Set higher c like 1.
    
    Shape:
        - images: :math:`(N, C, H, W)` where `N = number of batches`, `C = number of channels`,        `H = height` and `W = width`. It must have a range [0, 1].
        - labels: :math:`(N)` where each value :math:`y_i` is :math:`0 \leq y_i \leq` `number of labels`.
        - output: :math:`(N, C, H, W)`.
          
    Examples::
        >>> attack = torchattacks.CW(model, targeted=False, c=1e-4, kappa=0, steps=1000, lr=0.01)
        >>> adv_images = attack(images, labels)
        
    .. note:: NOT IMPLEMENTED methods in the paper due to time consuming.
    
        (1) Binary search for c.
        
        (2) Choosing best L2 adversaries.
    """
    def __init__(self, model, models, gamma, c=1e-4, kappa=0, steps=1000, lr=0.01):
        super(CW_Ensemble, self).__init__("CW", model, models, gamma)
        self.c = c
        self.kappa = kappa
        self.steps = steps
        self.lr = lr

    def forward(self, images, labels):
        r"""
        Overridden.
        """
        images = images.to(self.device)
        labels = labels.to(self.device)
        labels = self._transform_label(images, labels)

        # f-function in the paper
        def f(x):
            outputs = self.model(x)
            for i in range(len(self.models)):
                sub_model = self.models[i]
                outputs = outputs + self.gamma[i] * sub_model(x)
            one_hot_labels = torch.eye(len(outputs[0]))[labels].to(self.device)

            i, _ = torch.max((1-one_hot_labels)*outputs, dim=1)
            j = torch.masked_select(outputs, one_hot_labels.bool())

            return torch.clamp(self._targeted*(j-i), min=-self.kappa)

        w = torch.zeros_like(images).to(self.device)
        w.detach_()
        w.requires_grad = True

        optimizer = optim.Adam([w], lr=self.lr)
        prev = 1e10

        for step in range(self.steps):

            a = 1/2*(nn.Tanh()(w) + 1)

            loss1 = nn.MSELoss(reduction='sum')(a, images)
            loss2 = torch.sum(self.c*f(a))

            cost = loss1 + loss2

            optimizer.zero_grad()
            cost.backward(retain_graph=True)
            optimizer.step()

            # Early Stop when loss does not converge.
            if step % (self.steps//10) == 0:
                if cost > prev:
                    warnings.warn("Early stopped because the loss is not converged.")
                    return (1/2*(nn.Tanh()(w) + 1)).detach()
                prev = cost

            # print('- CW Attack Progress : %2.2f %%        ' %((step+1)/self.steps*100), end='\r')

        adv_images = (1/2*(nn.Tanh()(w) + 1)).detach()

        return adv_images

## CW Attack Ensemble Model, Testing Robust Accuracy

In [23]:
#Attack the ensemble model
initial_model.eval()
for i in range(num_of_models):
    models[i].eval()

correct = 0
total = 0

cw_attack = CW_Ensemble(model = initial_model,models = models,gamma = gamma_exp, c=1)

for data, target in test_loader:

        images = cw_attack(data, target).cuda()
        outputs = initial_model(images)
        for i in range(num_of_models):
            sub_model = models[i]
            outputs = outputs + gamma_exp[i] * sub_model(images)
        _, predicted = torch.max(outputs.data, 1)

        total += target.size(0)
        correct += (predicted == target.cuda()).sum()
    
print('Robust accuracy: %.2f %%' % (100 * float(correct) / total))

Robust accuracy: 43.80 %
