In [133]:
from __future__ import print_function, division
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import torch
import torch.nn as nn
import torch.optim as optim

from torch.optim import lr_scheduler
from torchvision import transforms

import tqdm
import time
import copy
import numpy as np

from torchvision import datasets, models
from torch.utils.data import DataLoader, ConcatDataset

In [134]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [135]:
torch.cuda.random.manual_seed(0)
torch.random.manual_seed(0)
np.random.seed(0)

In [136]:
BATCH_SIZE = 128
EPOCH = 25
LR = 0.01

# Data Preparation

In [137]:
train_dataset = datasets.MNIST(
        root='data', train=True, download=True,
        transform= transforms.Compose([transforms.Resize((227, 227)), transforms.ToTensor()]))

test_dataset = datasets.MNIST(
        root='data', train=False, download=True,
        transform= transforms.Compose([transforms.Resize((227, 227)), transforms.ToTensor()]))

transformers = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.Resize((227, 227)),
        transforms.ToTensor()])

train_dataset_transformed = datasets.MNIST(root='data', train=True, download=True, transform=transformers)

In [138]:
final_dataset = ConcatDataset([train_dataset, train_dataset_transformed])

In [139]:
train_subset, valid_subset = torch.utils.data.random_split(
        final_dataset, [100_000, 20_000], generator=torch.Generator())

In [140]:
train_loader = DataLoader(
        dataset=train_subset, batch_size=BATCH_SIZE, shuffle=True)

valid_loader = DataLoader(
        dataset=valid_subset, batch_size=BATCH_SIZE, shuffle=False)

test_loader = DataLoader(
        dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [141]:
data = next(iter(train_loader))
data[0].shape

torch.Size([128, 1, 227, 227])

In [142]:
dataloaders_dict = {'train': train_loader, 'val': valid_loader}
dataset_sizes_dict = {'train': len(train_subset), 'val': len(valid_subset)}

# Definitions

In [143]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()

        self.alex = models.alexnet()
        self.alex.features[0] = nn.Conv2d(1, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
        self.alex.classifier.append(nn.Linear(in_features=1000, out_features=10, bias=True))

    def forward(self, x):
        return self.alex(x)

net = AlexNet().to(device)
net

AlexNet(
  (alex): AlexNet(
    (features): Sequential(
      (0): Conv2d(1, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
      (1): ReLU(inplace=True)
      (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (4): ReLU(inplace=True)
      (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
      (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (7): ReLU(inplace=True)
      (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (9): ReLU(inplace=True)
      (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
    (classifier): Sequential(
      (0): Dropout(p=0.5, inplace=False)
      (1):

In [144]:
net(data[0]).shape

torch.Size([128, 10])

In [145]:
def train_model(model, criterion, optimizer, scheduler, attack, dataloaders, dataset_sizes, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        for phase in ['train', 'val']:

            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in tqdm.tqdm(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                if attack:
                    _, inputs = attack(inputs)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):

                    outputs = model(inputs)
                    _, predictions = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(predictions == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('epoch:{} phase:{}'.format(epoch + 1, phase))
            print(
                '{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(best_model_wts, './best_model_wts')

    time_elapsed = time.time() - since

    print(
        'Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print(
        'Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)

    return model

In [146]:
criterion_md = nn.CrossEntropyLoss()

optimizer_md = optim.Adam(net.parameters(), lr=LR)

lr_scheduler_md = lr_scheduler.StepLR(optimizer_md, step_size=10, gamma=0.1)

In [147]:
# TODO: test eps
# adversary = RSAttack(model_clf, eps=10, verbose=True, n_queries=5_000, loss='ce')

In [None]:
# train model
model_fn = train_model(
        net, criterion_md, optimizer_md, lr_scheduler_md, None,
        dataloaders_dict, dataset_sizes_dict, num_epochs=EPOCH)

# Prediction

In [None]:
def predict(classifier, dataloader):

    test_pred = torch.LongTensor()
    classifier.eval()

    with torch.no_grad():

        for images in dataloader:

            images = torch.autograd.Variable(images[0])
            if torch.cuda.is_available():
                images = images.to(device)

            outputs = classifier(images)
            predicted = outputs.cpu().data.max(1, keepdim=True)[1]
            test_pred = torch.cat((test_pred, predicted), dim=0)

    return test_pred

In [None]:
results = predict(model_fn, test_loader)