In [1]:
import torch
from torchvision import datasets, transforms
from torch import nn


(i)knowledge-oblivious–the attacker shall have no knowledge of the target
model’s parameters/structures, nor the original training datasets, 

(ii) cleanlabel–the attacker shall not be able to control the labeling process, and

(iii)clean-test–test-time instances shall not be required to be modified using added
adversarial perturbations for attacking effectiveness

https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123720137.pdf

Make sure that you do not reuse the data for training, testing and the attack/defence. - prof

In [2]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))
                               ])

trainset = datasets.FashionMNIST('MNIST_data/', download = True, train = True, transform = transform)
testset = datasets.FashionMNIST('MNIST_data/', download = True, train = False, transform = transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size = 12, shuffle = True)
testloader = torch.utils.data.DataLoader(testset, batch_size = 12, shuffle = True)

In [3]:
next(iter(trainloader))[0].size()

torch.Size([12, 1, 28, 28])

In [4]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [5]:
model = nn.Sequential(nn.Linear(784, 256),
                      nn.ReLU(),
                      nn.Dropout(0.2),
                      nn.Linear(256, 128),
                      nn.ReLU(),
                      nn.Dropout(0.2),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Dropout(0.2),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim = 1)
                ).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.002)
criterion = nn.NLLLoss()

In [6]:
train_losses, test_losses = [], []
epochs = 10


for e in range(epochs):
  running_loss = 0
  for images, labels in trainloader:
    # Flatten Fashion-MNIST images into a 784 long vector
    images = images.view(images.shape[0], -1).to(device)
    labels = labels.to(device)
    # Training pass
    optimizer.zero_grad()
    
    output = model.forward(images)
    loss = criterion(output, labels)
    loss.backward()
    optimizer.step()
    
    running_loss += loss.item()
  else:
    test_loss = 0
    accuracy = 0
    
    # Turn off gradients for validation, saves memory and computation
    with torch.no_grad():
      # Set the model to evaluation mode
      model.eval()
      
      # Validation pass
      for images, labels in testloader:
        images = images.view(images.shape[0], -1).to(device)
        labels = labels.to(device)
        log_ps = model(images)
        test_loss += criterion(log_ps, labels)
        
        ps = torch.exp(log_ps)
        top_p, top_class = ps.topk(1, dim = 1)
        equals = top_class == labels.view(*top_class.shape)
        accuracy += torch.mean(equals.type(torch.FloatTensor))
    
    model.train()
    train_losses.append(running_loss/len(trainloader))
    test_losses.append(test_loss/len(testloader))
    
    print("Epoch: {}/{}..".format(e+1, epochs),
          "Training loss: {:.3f}..".format(running_loss/len(trainloader)),
          "Test loss: {:.3f}..".format(test_loss/len(testloader)),
          "Test Accuracy: {:.3f}".format(accuracy/len(testloader)))

Epoch: 1/10.. Training loss: 0.645.. Test loss: 0.505.. Test Accuracy: 0.817
Epoch: 2/10.. Training loss: 0.539.. Test loss: 0.470.. Test Accuracy: 0.836
Epoch: 3/10.. Training loss: 0.514.. Test loss: 0.434.. Test Accuracy: 0.840
Epoch: 4/10.. Training loss: 0.501.. Test loss: 0.451.. Test Accuracy: 0.840
Epoch: 5/10.. Training loss: 0.487.. Test loss: 0.437.. Test Accuracy: 0.838
Epoch: 6/10.. Training loss: 0.479.. Test loss: 0.430.. Test Accuracy: 0.853
Epoch: 7/10.. Training loss: 0.468.. Test loss: 0.481.. Test Accuracy: 0.836
Epoch: 8/10.. Training loss: 0.477.. Test loss: 0.483.. Test Accuracy: 0.834
Epoch: 9/10.. Training loss: 0.465.. Test loss: 0.461.. Test Accuracy: 0.850
Epoch: 10/10.. Training loss: 0.456.. Test loss: 0.415.. Test Accuracy: 0.857


In [7]:
print("Epoch: {}/{}..".format(e+1, epochs),
          "Training loss: {:.3f}..".format(running_loss/len(trainloader)),
          "Test loss: {:.3f}..".format(test_loss/len(testloader)),
          "Test Accuracy: {:.3f}".format(accuracy/len(testloader)))

Epoch: 10/10.. Training loss: 0.456.. Test loss: 0.415.. Test Accuracy: 0.857


In [9]:
print(device)

cuda:1


# Clustering (poisoned) training data to check for wrong labels

In [68]:
# clustering code

from clustering_ae import Clustering

cae = Clustering(num_clusters=10,
        n_init = 10,
        epochs = 3000,
        tol = 1e-2,
        initialization = "k-means++") 

In [69]:
imgs, lbls = [], []
for images, labels in trainloader:
    # Flatten Fashion-MNIST images into a 784 long vector
    imgs.extend(images.view(images.shape[0], -1).numpy())
    lbls.extend(labels.numpy())

In [74]:
import numpy as np
lbls = np.array(lbls)

In [71]:
cae.train(imgs)

In [75]:
benchmark = cae.benchmark('Fashion mnist', imgs, lbls)

In [None]:
benchmark

In [72]:
p = cae.predict([imgs[10]])

In [73]:
print(p, lbls[10])

[7] 8


In [79]:
test_imgs = []
for i in range(len(lbls)):
    if lbls[i] == 3:
        test_imgs.append(imgs[i])

preds = cae.predict(test_imgs)

In [84]:
preds = preds.tolist()
pred_dict=dict(zip(preds,[preds.count(i) for i in preds]))

In [85]:
pred_dict

{6: 3117, 2: 1720, 4: 340, 1: 738, 3: 55, 8: 17, 5: 1, 9: 9, 7: 3}