In [1]:
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

In [2]:
batch_size = 64
num_classes = 10
learning_rate = 10e-3
num_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
#Loading the dataset and preprocessing
train_dataset = torchvision.datasets.MNIST(root = './data',
                                           train = True,
                                           transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                           download = True)


test_dataset = torchvision.datasets.MNIST(root = './data',
                                          train = False,
                                          transform = transforms.Compose([
                                                  transforms.Resize((32,32)),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1325,), std = (0.3105,))]),
                                          download=True)


train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)


test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = 64, # TEST BATCH SIZE
                                           shuffle = True)

In [4]:
#Defining the convolutional neural network
class LeNet5(nn.Module):
    def __init__(self, num_classes):
        super(LeNet5, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.fc = nn.Linear(400, 120)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(120, 84)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(84, num_classes)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.relu(out)
        out = self.fc1(out)
        out = self.relu1(out)
        out = self.fc2(out)
        return out

In [5]:
# LOAD FROM SAVED FILE
LeNet = torch.load('LeNet.pth')
LeNet.eval()

LeNet5(
  (layer1): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=400, out_features=120, bias=True)
  (relu): ReLU()
  (fc1): Linear(in_features=120, out_features=84, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=84, out_features=10, bias=True)
)

In [None]:
import torch
import numpy as np

def sfw(image_toattack, tgt_lab, model):
    """
    (Note: The model parameter is added to take a PyTorch model for predictions and gradients)
    """
    # Hyperparameters
    epsilon = 0.3    
    step = 0.4
    batch_size = 400
    T = 400

    lmo_calls = 0
    sfo_calls = 0

    # Assuming input is numpy, convert to torch tensor
    x_ori = torch.tensor(image_toattack, dtype=torch.float32)
    x = x_ori.clone()
    x_set = [x_ori]
    all_tuples = [(i,j) for i in range(28) for j in range(28)]

    for t in range(T):
        lmo_calls += 1
        sfo_calls += batch_size

        target_reached, _ = run_classification(x, tgt_lab, model)  # Assuming run_classification is modified for PyTorch
        if target_reached: break

        zetas_ind = torch.randint(0, 28*28, (batch_size,))
        zetas = [all_tuples[i] for i in zetas_ind]

        m = torch.zeros(1,28,28,1)
        g = evaluate_grads(x, tgt_lab, model)  # Assuming evaluate_grads is modified for PyTorch

        for index,tupla in enumerate(zetas):
            i, j = tupla
            m[0,i,j,0] = g[0,i,j,0]

        m = m * (-1/batch_size)
        v = epsilon * torch.sign(m) + x_ori

        d = v - x
        x = x + step * d

        x_set.append(x) 

    # Calculate distortion
    distortion = torch.max(torch.abs(x - x_ori))

    ind = np.random.choice(range(len(x_set)))
    x_alpha = x_set[ind]
    return x_alpha, sfo_calls, distortion, target_reached, lmo_calls

def run_classification(x, tgt_lab, model):
    """
    Modify this function for PyTorch.
    """
    # Dummy return for now
    return False, None

def evaluate_grads(x, tgt_lab, model):
    """
    Modify this function for PyTorch to compute gradients.
    """
    # Dummy return for now
    return torch.zeros_like(x)



In [None]:
import tqdm
estimator = "UniGE"
query_count = 0
d = 32*32
w = torch.zeros(d) # not sure if this goes here or somewhere else
def compute_loss(self, x, y, w=None):
        """
        Args:
            x (torch.Tensor) : batch data with shape [B x D].
            y (torch.Tensor) : batch label with shape [B].
            w (torch.Tensor) : batch w with shape [B x Q x D].
        """
        c = self.sigma
        if w is None:
            # Batch loss computing for loss
            w = self.w      # (D)
            res = -(y - x @ w)**2
            res = c * c / 2 * (1 - torch.exp(res / (c * c)))
            return res
        else:
            # Batch loss computing for gradient estimating
            pred = torch.bmm(w, x.unsqueeze(dim=2)).squeeze()       # (B, 2Q)
            res = -(y - pred)**2                                    # (B, 2Q)
            res = c * c / 2 * (1 - torch.exp(res / (c * c)))        # (B, 2Q)
            return res
        # return 1 / (1 + torch.exp(y * (x @ w)))
        # return (y - x @ w)**2
        return res

def UniGE(x, y, mu, noise=None, w=None):
    with torch.no_grad():
        B = x.size(0)
        Q = self.q
        
        #w = self.w if w is None else w      # (D)
        w_left = w.unsqueeze(dim=0).unsqueeze(dim=0).repeat(B, Q, 1)        # (B, Q, D)
        if noise is None:
            noise = torch.rand_like(w_left) * 2 - 1     # Scaling uniform distribution from [0, 1) to [-1, 1)
            norm = noise.view(B*Q, -1).norm(dim=-1).view(B, Q, 1)        # (B, Q, 1)
            noise = noise / norm        # (B, Q, D)

        w_all = torch.cat([w_left, w_left], dim=1)      # (B, 2Q, D)
        noise_all = torch.cat([noise, torch.zeros_like(w_left)], dim=1)

        target = y.unsqueeze(dim=1).repeat(1, 2*Q)      # (B, 2Q)

        loss = compute_loss(x, target, w_all + mu * noise_all)     # (B, 2Q)
        loss_left, loss_right = loss[:, :Q], loss[:, Q:]        # (B, Q), (B, Q)

        grad = (loss_left - loss_right).view(B, Q, 1) * noise   # (B, Q, D)
        grad = torch.mean(grad, dim=1)      # (B, D)
        grad = torch.mean(grad, dim=0)      # (D)
        grad = grad * (self.d / mu)
    return grad, 2 * Q * B, noise

def est_grad(x, y, mu, w_old=None):
    # Estimate gradient using GauGE/UniGE/CooGE
    #if self.estimator == 'GauGE':
        #grad, query_size, noise = self.GauGE(x, y, mu)
        #self.query_count += query_size
        #if w_old is not None:
        #    grad_old, query_size, _ = self.GauGE(x, y, mu, noise, w_old)
        #    self.query_count += query_size
    if estimator == 'UniGE':
        grad, query_size, noise = UniGE(x, y, mu)
        query_count += query_size
        if w_old is not None:
            grad_old, query_size, _ = UniGE(x, y, mu, noise, w_old)
            query_count += query_size
    #elif self.estimator == 'CooGE':
        #grad, query_size = self.CooGE(x, y, mu)
        #self.query_count += query_size
        #if w_old is not None:
        #    grad_old, query_size = self.CooGE(x, y, mu, w_old)
        #    self.query_count += query_size
    if w_old is not None:
        return grad, grad_old
    else:
        return grad
    
def LMO_L1(grad, theta):
    coord = torch.argmax(grad.abs())
    v = torch.zeros_like(grad)
    v[coord] = theta * torch.sign(grad[coord])
    return -v
def attack(theta = 1, T = 1000, base_lr = 1, log_step = 10):
        with torch.no_grad():
            batch_fetcher = DataFetcher(self.train_data, batch_size=self.batch_size)
            

            for iteration in tqdm(range(self.T)):
                
                if iteration % log_step == 0:
                    # Sanity check
                    weight_norm = w.abs().sum()
                    is_valid = weight_norm <= (self.theta + 1e-6)
                    if not is_valid:
                        print('!!! ??? weight out of range ({} > {})'.format(weight_norm, theta))
                        return 0
                    
                    train_loss = batch_compute_loss(self.model.compute_loss, self.train_data)
                    test_loss = batch_compute_loss(self.model.compute_loss, self.test_data)

                    desc = 'Iter {} | TrainLoss {:.6f}  | TestLoss {:.6f} | Norm {:.4f} | lr {:.6f} | beta {:.6f} | delta {:.6f} |'.format(iteration, train_loss, test_loss, weight_norm.item(), self.lr(iteration), self.beta(iteration), self.delta(iteration))
                    tqdm.write(desc)
                    #if writer is not None:
                    #    writer.add_scalar(flag[0], train_loss, global_step=iteration)
                    #    writer.add_scalar(flag[1], test_loss, global_step=iteration)

                x, y = batch_fetcher.fetch()
                grad = model.est_grad(x, y, self.delta(iteration))

                if iteration == 0:
                    m = grad
                m = (1 - self.beta(iteration)) * m + self.beta(iteration) * grad
                grad = m

                v = LMO_L1(grad, self.theta)
                d = v - w
                w = w + self.lr(iteration) * d                

        return train_loss, test_loss
