In [1]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


# Import

In [2]:
import os
import numpy as np
import pandas as pd
import time
import argparse
# import apex.amp as amp
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.transforms as transforms

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import roc_auc_score

path = "/content/gdrive/My Drive/GameTheory/"


gpu = True
gpu = gpu and torch.cuda.is_available()
device = torch.device("cuda" if gpu else "cpu")

'''hparameters'''
# BATCH_SIZE = 200 
# NUM_WORKERS = 8 
# NUM_EPOCHS = 20 

# numEpochs = 20
num_feats = 3 # in_channels
num_classes = 10


learningRate = 0.01
weightDecay = 5e-4

best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

In [3]:
print(device)

cuda


# Data

In [4]:
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

==> Preparing data..
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


# Model

## MyResnet50

In [5]:
# kernel_size=3, padding=1
def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                     stride=stride, padding=1, bias=False)
    
# kernel_size=1, padding=0
def conv1x1(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=1, 
                     stride=stride, bias=False)

num_classes = 10
class Bottleneck(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1, isDownSample=False):
        super(Bottleneck, self).__init__()
        
        self.inchannel = inchannel
        self.expansion = 4
        self.isDownSample = isDownSample
        
        self.conv1 = conv1x1(inchannel, outchannel)
        self.norm1 = nn.BatchNorm2d(outchannel)

        self.conv2 = conv3x3(outchannel, outchannel, stride)
        self.norm2 = nn.BatchNorm2d(outchannel)
        
        self.conv3 = conv1x1(outchannel, outchannel * self.expansion)
        self.norm3 = nn.BatchNorm2d(outchannel * self.expansion)
        
        self.relu = nn.ReLU(inplace=True)
        
        if isDownSample:
            self.downsample = nn.Sequential(
                conv1x1(inchannel, outchannel * self.expansion, stride),
                nn.BatchNorm2d(outchannel * self.expansion)
            )

    def forward(self, x):
        identity = x

        out = self.relu(self.norm1(self.conv1(x)))
        out = self.relu(self.norm2(self.conv2(out)))
        out = self.relu(self.norm3(self.conv3(out)))

        if self.isDownSample:
            out += self.downsample(identity)
        
        out = self.relu(out)
        return out


class ResNet50(nn.Module):
    def __init__(self, block, layers, num_classes=num_classes, outs=[64, 128, 256, 512]):
        super(ResNet50, self).__init__()
        """
        Arguments:
            block (class): BasicBlock(nn.Module)
            layers (list): A ResNet’s layer is composed of the same blocks stacked one after the other.
            num_classes (int): num_classes = 4000
            outs (list): dim before expension(*4)
        """
        self.expansion = 4
        self.inchannel = 64*self.expansion
        self.conv0 = conv3x3(3, 64*self.expansion, stride=1)
        
        self.layer1=self.make_layer(block,outs[0],layers[0],stride=1) # 3
        self.layer2=self.make_layer(block,outs[1],layers[1],stride=2) # 4
        self.layer3=self.make_layer(block,outs[2],layers[2],stride=2) # 6
        self.layer4=self.make_layer(block,outs[3],layers[3],stride=2) # 3

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512*4, num_classes)
        
        # self.cfc = nn.Linear(512*4, outFeat)
        # self.crelu = nn.ReLU(inplace=True)

    def make_layer(self, block, out_channels, block_num, stride=1):
        """
            block (class): BottleneckBlock(nn.Module)
            out_channels (int)：output size of layer
            block_num (int)：total blocks
            stride (int)：Conv Block stride
        """

        if stride!=1 or self.inchannel!=(out_channels*self.expansion):
            isDownsample = True
        else: isDownsample = False
            
        layers = []
        #Conv Block: different size
        conv_block=block(self.inchannel, out_channels, stride, isDownsample)
        layers.append(conv_block)
        self.inchannel = out_channels*self.expansion
        
        #Identity Block: same size
        for i in range(1, block_num):
            layers.append(block(self.inchannel, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x, ver=False):
        out = x
        out = self.conv0(out)

        out=self.layer1(out)
        out=self.layer2(out)
        out=self.layer3(out)
        out=self.layer4(out)

        out = self.avgpool(out)
        # out = torch.squeeze(out)
        out = out.reshape(out.shape[0], out.shape[1])
        
        # embed = out
        out = self.fc(out)
        # cout = self.cfc(out)
        # cout = self.crelu(cout)
        return out

def init_weights(m):
    if type(m) == nn.Conv2d or type(m) == nn.Linear:
        torch.nn.init.xavier_normal_(m.weight.data)

## BasicBlock

In [6]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

## BottleNeck

In [7]:
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


## ResNet

In [8]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [9]:
def init_weights(m):
    if type(m) == nn.Conv2d or type(m) == nn.Linear:
        torch.nn.init.xavier_normal_(m.weight.data)
        
def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2])


def ResNet34():
    return ResNet(BasicBlock, [3, 4, 6, 3])


def ResNet50():
    return ResNet(Bottleneck, [3, 4, 6, 3])


def ResNet101():
    return ResNet(Bottleneck, [3, 4, 23, 3])


def ResNet152():
    return ResNet(Bottleneck, [3, 8, 36, 3])

# Training

In [10]:
def train(net, epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
        torch.cuda.empty_cache()
        del inputs
        del targets

        # progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
        #              % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
    acc = correct/total
    avg_loss = train_loss/total
    
    return avg_loss, acc


def test(net, epoch):
    # global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
            torch.cuda.empty_cache()
            del inputs
            del targets
            
            # progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            #              % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
    acc = correct/total
    avg_loss = test_loss/total
    
    return avg_loss, acc
    # Save checkpoint.
    # acc = 100.*correct/total
    # if acc > best_acc:
    #     print('Saving..')
    #     state = {
    #         'net': net.state_dict(),
    #         'acc': acc,
    #         'epoch': epoch,
    #     }
    #     if not os.path.isdir('checkpoint'):
    #         os.mkdir('checkpoint')
    #     torch.save(state, './checkpoint/ckpt.pth')
    #     best_acc = acc

# Adversarial training

## FGSM

In [15]:
def fgsm_train(net, epoch, eps=0.01):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        inputs.requires_grad = True

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        data_grad = inputs.grad.data
        perturbed_data = fgsm_attack(inputs, eps, data_grad)
        new_outputs = net(perturbed_data)
        new_loss = criterion(new_outputs, targets)
        new_loss.backward()
        
        optimizer.step()

        train_loss += new_loss.item()
        _, new_predicted = new_outputs.max(1)
        total += targets.size(0)
        correct += new_predicted.eq(targets).sum().item()
        
        torch.cuda.empty_cache()
        del inputs
        del targets

        # progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
        #              % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
    acc = correct/total
    avg_loss = train_loss/total
    
    return avg_loss, acc

## PGD

In [None]:
def clamp(X, lower_limit, upper_limit):
    return torch.max(torch.min(X, upper_limit), lower_limit)

In [None]:
cifar10_mean = (0.4914, 0.4822, 0.4465)
cifar10_std = (0.2471, 0.2435, 0.2616)

mu = torch.tensor(cifar10_mean).view(3,1,1).cuda()
std = torch.tensor(cifar10_std).view(3,1,1).cuda()

upper_limit = ((1 - mu) / std)
lower_limit = ((0 - mu) / std)

epsilon = (8/255.) / std

step_size = 2

iters = 5

In [None]:
# Training
def pgd_train(model, epoch):
    # start_train_time = time.time()

    # logger.info('Epoch \t Seconds \t LR \t \t Train Loss \t Train Acc')

# for epoch in range(args.epochs):
# for epoch in range(start_epoch, start_epoch+20):
    start_epoch_time = time.time()
    
    train_loss = 0
    train_acc = 0
    train_n = 0

    for i, (X, y) in enumerate(trainloader):
        X, y = X.cuda(), y.cuda()
        delta = torch.zeros_like(X).cuda()

        # if args.delta_init == 'random':
        for i in range(len(epsilon)):
            delta[:, i, :, :].uniform_(-epsilon[i][0][0].item(), epsilon[i][0][0].item())
        delta.data = clamp(delta, lower_limit-X, upper_limit-X)

        delta.requires_grad = True
        for _ in range(iters):
            output = model(X + delta)
            loss = criterion(output, y)

            # with amp.scale_loss(loss, optimizer) as scaled_loss:
            loss.backward()
            
            grad = delta.grad.detach()
            delta.data = clamp(delta + step_size*torch.sign(grad), -epsilon, epsilon)
            delta.data = clamp(delta, lower_limit-X, upper_limit-X)
            delta.grad.zero_()
        
        delta = delta.detach()
        output = model(X + delta)
        loss = criterion(output, y)
        
        optimizer.zero_grad()
        # with amp.scale_loss(loss, optimizer) as scaled_loss:
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * y.size(0)
        train_acc += (output.max(1)[1] == y).sum().item()
        train_n += y.size(0)
        
        # scheduler.step()
    epoch_time = time.time()
    # lr = scheduler.get_lr()[0]
    # logger.info('%d \t %.1f \t \t %.4f \t %.4f \t %.4f',
    #             epoch, epoch_time-start_epoch_time, train_loss/train_n, train_acc/train_n)
    
    print(epoch_time-start_epoch_time)
    return train_loss/train_n, train_acc/train_n
# train_time = time.time()

# torch.save(model.state_dict(), os.path.join(args.out_dir, 'model.pth'))

# logger.info('Total train time: %.4f minutes', (train_time - start_train_time)/60)

In [None]:
# Train the model
# optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'],
def adv_train2(net, epoch, loader_train):

# for epoch in range(epoch):
    print('Starting epoch %d / %d' % (epoch + 1, epoch))

    for t, (x, y) in enumerate(loader_train):

        x_var, y_var = to_var(x), to_var(y.long())
        loss = criterion(net(x_var), y_var)

        # adversarial training
        if epoch+1 > param['delay']:
            # use predicted label to prevent label leaking
            y_pred = pred_batch(x, net)
            x_adv = adv_train(x, y_pred, net, criterion, adversary)
            x_adv_var = to_var(x_adv)
            loss_adv = criterion(net(x_adv_var), y_var)
            loss = (loss + loss_adv) / 2

        if (t + 1) % 100 == 0:
            print('t = %d, loss = %.8f' % (t + 1, loss.data[0]))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Adversarial Attack

## FGSM

In [None]:
def testattack(net, testloader, epsilon):
    # global best_acc
    net.eval()

    # test_loss = 0
    correct = 0
    # total = 0
    # with torch.no_grad():
    for b, (inputs, targets) in enumerate(testloader):
        inputs, targets = inputs.to(device), targets.to(device)
        
        inputs.requires_grad = True
        outputs = net(inputs)
        _, predicted = outputs.max(1)
        # if predicted.item() != targets.item():
        #     continue
        loss = criterion(outputs, targets)
        net.zero_grad()

        # test_loss += loss.item()
        loss.backward()
        data_grad = inputs.grad.data
        perturbed_data = fgsm_attack(inputs, epsilon, data_grad)
        new_outputs = net(perturbed_data)
        _, new_predicted = new_outputs.max(1)

        # total += targets.size(0)
        correct += new_predicted.eq(targets).sum().item()

        torch.cuda.empty_cache()
        del inputs
        del targets
        
    acc = correct/float(len(testloader))
    # avg_loss = test_loss/total
    print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(testloader), acc))

    return acc

In [None]:
def attack(model, test_loader, epsilon):
    model.eval()

    correct = 0
    adv_examples = []

    # Loop over all examples in test set
    for data, target in test_loader:

        # Send the data and label to the device
        data, target = data.to(device), target.to(device)

        # Set requires_grad attribute of tensor. Important for Attack
        data.requires_grad = True

        # Forward pass the data through the model
        output = model(data)
        init_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        # _, predicted = output.max(1)

        # If the initial prediction is wrong, dont bother attacking, just move on
        if init_pred.item() != target.item():
            continue

        # Calculate the loss
        # loss = F.nll_loss(output, target)
        loss = criterion(output, target)

        # Zero all existing gradients
        model.zero_grad()

        # Calculate gradients of model in backward pass
        loss.backward()

        # Collect datagrad
        data_grad = data.grad.data

        # Call FGSM Attack
        perturbed_data = fgsm_attack(data, epsilon, data_grad)

        # Re-classify the perturbed image
        output = model(perturbed_data)

        # Check for success
        final_pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        if final_pred.item() == target.item():
            correct += 1
            # Special case for saving 0 epsilon examples
            if (epsilon == 0) and (len(adv_examples) < 5):
                adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )
        else:
            # Save some adv examples for visualization later
            if len(adv_examples) < 5:
                adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
                adv_examples.append( (init_pred.item(), final_pred.item(), adv_ex) )

        torch.cuda.empty_cache()
        del data
        del target

    # Calculate final accuracy for this epsilon
    final_acc = correct/float(len(test_loader))
    print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(test_loader), final_acc))

    # Return the accuracy and an adversarial example
    return final_acc, adv_examples

## PGD

In [None]:
def testattack2(model, testloader):
    print("Attack Image & Predicted Label")

    model.eval()

    correct = 0
    total = 0

    for images, labels in testloader:
        
        images = pgd_attack(model, images, labels)
        labels = labels.to(device)
        outputs = model(images)

        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        # imshow(torchvision.utils.make_grid(images.cpu().data, normalize=True), [normal_data.classes[i] for i in pre])
        
    final_acc = float(correct)/total
    print('Accuracy of test text: %f %%' % (100 * float(correct)/total))

    return final_acc

# Main

In [13]:
"""ResNet50 Model"""
# resmodel = ResNet50(Bottleneck, [3,4,6,3]).to(device)
resmodel = ResNet50().to(device)
resmodel.apply(init_weights)

if device == 'cuda':
    resmodel = torch.nn.DataParallel(resmodel)
    cudnn.benchmark = True

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resmodel.parameters(), lr=learningRate, weight_decay=weightDecay, momentum=0.9)
# optimizer = torch.optim.Adam(resmodel.parameters(), lr=1e-3)
# scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)

## free train

In [None]:
for epoch in range(start_epoch, start_epoch+50):
    train_loss, train_acc = train(resmodel, epoch)
    val_loss, val_acc = test(resmodel, epoch)
    
    print('[Epoch: {}]\nTrain Loss: {:.4f}\tTrain Accuracy: {:.4f}\tVal Loss: {:.4f}\tVal Accuracy: {:.4f}'.
          format(epoch, train_loss, train_acc, val_loss, val_acc))
    
    if (epoch+1)%10 == 0:
        torch.save({'model_state_dict': resmodel.state_dict(),},
                    path + "ResNet50_{}.pth".format(str(epoch)))


Epoch: 0
[Epoch: 0]
Train Loss: 0.0034	Train Accuracy: 0.8527	Val Loss: 0.0052	Val Accuracy: 0.8247

Epoch: 1
[Epoch: 1]
Train Loss: 0.0031	Train Accuracy: 0.8616	Val Loss: 0.0058	Val Accuracy: 0.8096

Epoch: 2
[Epoch: 2]
Train Loss: 0.0029	Train Accuracy: 0.8715	Val Loss: 0.0047	Val Accuracy: 0.8422

Epoch: 3
[Epoch: 3]
Train Loss: 0.0027	Train Accuracy: 0.8816	Val Loss: 0.0050	Val Accuracy: 0.8370

Epoch: 4
[Epoch: 4]
Train Loss: 0.0025	Train Accuracy: 0.8867	Val Loss: 0.0051	Val Accuracy: 0.8370

Epoch: 5
[Epoch: 5]
Train Loss: 0.0024	Train Accuracy: 0.8933	Val Loss: 0.0049	Val Accuracy: 0.8442

Epoch: 6
[Epoch: 6]
Train Loss: 0.0022	Train Accuracy: 0.9012	Val Loss: 0.0045	Val Accuracy: 0.8586

Epoch: 7
[Epoch: 7]
Train Loss: 0.0021	Train Accuracy: 0.9076	Val Loss: 0.0044	Val Accuracy: 0.8590

Epoch: 8
[Epoch: 8]
Train Loss: 0.0020	Train Accuracy: 0.9092	Val Loss: 0.0042	Val Accuracy: 0.8685

Epoch: 9
[Epoch: 9]
Train Loss: 0.0019	Train Accuracy: 0.9155	Val Loss: 0.0046	Val Accurac

## fgsm train

### eps=0.01

In [17]:
for epoch in range(start_epoch, start_epoch+30):
    train_loss, train_acc = train(resmodel, epoch)
    adv_train_loss, adv_train_acc = fgsm_train(resmodel, epoch)
    val_loss, val_acc = test(resmodel, epoch)
    
    print('[Epoch: {}]\nTrain Loss: {:.4f}\tTrain Accuracy: {:.4f}\tAdv_Train Loss: {:.4f}\tAdv_Train Accuracy: {:.4f}\nVal Loss: {:.4f}\tVal Accuracy: {:.4f}'.
          format(epoch, train_loss, train_acc, adv_train_loss, adv_train_acc, val_loss, val_acc))
    
    if (epoch+1)%10 == 0:
        torch.save({'model_state_dict': resmodel.state_dict(),},
                    path + "Adv_ResNet50_eps0.01_{}.pth".format(str(epoch)))


Epoch: 0

Epoch: 0
[Epoch: 0]
Train Loss: 0.0113	Train Accuracy: 0.4799	Adv_Train Loss: 0.0110	Adv_Train Accuracy: 0.4954
Val Loss: 0.0115	Val Accuracy: 0.6115

Epoch: 1

Epoch: 1
[Epoch: 1]
Train Loss: 0.0073	Train Accuracy: 0.6678	Adv_Train Loss: 0.0082	Adv_Train Accuracy: 0.6219
Val Loss: 0.0084	Val Accuracy: 0.7129

Epoch: 2

Epoch: 2
[Epoch: 2]
Train Loss: 0.0050	Train Accuracy: 0.7759	Adv_Train Loss: 0.0066	Adv_Train Accuracy: 0.6996
Val Loss: 0.0079	Val Accuracy: 0.7487

Epoch: 3

Epoch: 3
[Epoch: 3]
Train Loss: 0.0039	Train Accuracy: 0.8276	Adv_Train Loss: 0.0057	Adv_Train Accuracy: 0.7394
Val Loss: 0.0060	Val Accuracy: 0.7936

Epoch: 4

Epoch: 4
[Epoch: 4]
Train Loss: 0.0032	Train Accuracy: 0.8604	Adv_Train Loss: 0.0051	Adv_Train Accuracy: 0.7696
Val Loss: 0.0052	Val Accuracy: 0.8264

Epoch: 5

Epoch: 5
[Epoch: 5]
Train Loss: 0.0027	Train Accuracy: 0.8821	Adv_Train Loss: 0.0046	Adv_Train Accuracy: 0.7865
Val Loss: 0.0044	Val Accuracy: 0.8503

Epoch: 6

Epoch: 6
[Epoch: 6]
Tra

### eps=0.1

In [None]:
for epoch in range(start_epoch, start_epoch+50):
    train_loss, train_acc = train(resmodel, epoch)
    adv_train_loss, adv_train_acc = fgsm_train(resmodel, epoch)
    val_loss, val_acc = test(resmodel, epoch)
    
    print('[Epoch: {}]\nTrain Loss: {:.4f}\tTrain Accuracy: {:.4f}\tAdv_Train Loss: {:.4f}\tAdv_Train Accuracy: {:.4f}\nVal Loss: {:.4f}\tVal Accuracy: {:.4f}'.
          format(epoch, train_loss, train_acc, adv_train_loss, adv_train_acc, val_loss, val_acc))
    
    if (epoch+1)%10 == 0:
        torch.save({'model_state_dict': resmodel.state_dict(),},
                    path + "Adv_ResNet50_{}.pth".format(str(epoch)))


Epoch: 0

Epoch: 0
[Epoch: 0]
Train Loss: 0.0075	Train Accuracy: 0.6590	Adv_Train Loss: 0.0141	Adv_Train Accuracy: 0.3317
Val Loss: 0.0096	Val Accuracy: 0.6717

Epoch: 1

Epoch: 1
[Epoch: 1]
Train Loss: 0.0057	Train Accuracy: 0.7436	Adv_Train Loss: 0.0128	Adv_Train Accuracy: 0.3875
Val Loss: 0.0085	Val Accuracy: 0.7025

Epoch: 2

Epoch: 2
[Epoch: 2]
Train Loss: 0.0046	Train Accuracy: 0.7939	Adv_Train Loss: 0.0119	Adv_Train Accuracy: 0.4296
Val Loss: 0.0077	Val Accuracy: 0.7319

Epoch: 3

Epoch: 3
[Epoch: 3]
Train Loss: 0.0040	Train Accuracy: 0.8239	Adv_Train Loss: 0.0106	Adv_Train Accuracy: 0.4880
Val Loss: 0.0072	Val Accuracy: 0.7495

Epoch: 4

Epoch: 4
[Epoch: 4]
Train Loss: 0.0033	Train Accuracy: 0.8529	Adv_Train Loss: 0.0094	Adv_Train Accuracy: 0.5574
Val Loss: 0.0073	Val Accuracy: 0.7622

Epoch: 5

Epoch: 5
[Epoch: 5]
Train Loss: 0.0029	Train Accuracy: 0.8733	Adv_Train Loss: 0.0088	Adv_Train Accuracy: 0.5882
Val Loss: 0.0068	Val Accuracy: 0.7696

Epoch: 6

Epoch: 6
[Epoch: 6]
Tra

KeyboardInterrupt: ignored

## pgd train

In [None]:
for epoch in range(start_epoch, start_epoch+30):
    adv_train_loss, adv_train_acc = pgd_train(resmodel, epoch)
    val_loss, val_acc = test(resmodel, epoch)
    
    print('[Epoch: {}]\nAdv_Train Loss: {:.4f}\tAdv_Train Accuracy: {:.4f}\tVal Loss: {:.4f}\tVal Accuracy: {:.4f}'.
          format(epoch, adv_train_loss, adv_train_acc, val_loss, val_acc))
    
    if (epoch+1)%10 == 0:
        torch.save({'model_state_dict': resmodel.state_dict(),},
                    path + "PGD_ResNet50_{}.pth".format(str(epoch)))

278.2911500930786
[Epoch: 0]
Adv_Train Loss: 1.3248	Adv_Train Accuracy: 0.4844	Val Loss: 0.0062	Val Accuracy: 0.8229
277.60825300216675
[Epoch: 1]
Adv_Train Loss: 1.2920	Adv_Train Accuracy: 0.5032	Val Loss: 0.0062	Val Accuracy: 0.8067
277.5205228328705
[Epoch: 2]
Adv_Train Loss: 1.2489	Adv_Train Accuracy: 0.5185	Val Loss: 0.0071	Val Accuracy: 0.7766
277.53821444511414
[Epoch: 3]
Adv_Train Loss: 1.2150	Adv_Train Accuracy: 0.5302	Val Loss: 0.0067	Val Accuracy: 0.7932
277.5419387817383
[Epoch: 4]
Adv_Train Loss: 1.1983	Adv_Train Accuracy: 0.5368	Val Loss: 0.0065	Val Accuracy: 0.8036
277.5575520992279
[Epoch: 5]
Adv_Train Loss: 1.1725	Adv_Train Accuracy: 0.5442	Val Loss: 0.0063	Val Accuracy: 0.8073
277.56264328956604
[Epoch: 6]
Adv_Train Loss: 1.1576	Adv_Train Accuracy: 0.5548	Val Loss: 0.0067	Val Accuracy: 0.8017
277.5425045490265
[Epoch: 7]
Adv_Train Loss: 1.1449	Adv_Train Accuracy: 0.5585	Val Loss: 0.0071	Val Accuracy: 0.7785
277.55950021743774
[Epoch: 8]
Adv_Train Loss: 1.1357	Adv_Trai

## FGSM attack
------------------
batch size = 1

In [None]:
epsilons = [0, .05, .1, .15, .2, .25, .3]
# use_cuda=True

"""Load Model"""
# epoch=7, the highest AUC score
modelpath = path + "Adc_ResNet50_19.pth"
loadmodel = ResNet50().to(device)
loadmodel.load_state_dict(torch.load(modelpath)['model_state_dict'])

<All keys matched successfully>

In [None]:
attackloader = torch.utils.data.DataLoader(
    testset, batch_size=1, shuffle=False, num_workers=2)

In [16]:
# FGSM attack code
def fgsm_attack(image, epsilon, data_grad):
    # Collect the element-wise sign of the data gradient
    sign_data_grad = data_grad.sign()
    # Create the perturbed image by adjusting each pixel of the input image
    perturbed_image = image + epsilon*sign_data_grad
    # Adding clipping to maintain [0,1] range
    # perturbed_image = torch.clamp(perturbed_image, 0, 1)
    # Return the perturbed image
    return perturbed_image

In [None]:
"""free train"""

accuracies = []
# examples = []

# Run test for each epsilon
for eps in epsilons:
    accuracy = testattack(loadmodel, attackloader, eps)
    accuracies.append(accuracy)
    print(accuracies)
    # examples.append(example)

Epsilon: 0	Test Accuracy = 9081 / 10000 = 0.9081
[0.9081]
Epsilon: 0.05	Test Accuracy = 2477 / 10000 = 0.2477
[0.9081, 0.2477]
Epsilon: 0.1	Test Accuracy = 1323 / 10000 = 0.1323
[0.9081, 0.2477, 0.1323]
Epsilon: 0.15	Test Accuracy = 1035 / 10000 = 0.1035
[0.9081, 0.2477, 0.1323, 0.1035]
Epsilon: 0.2	Test Accuracy = 933 / 10000 = 0.0933
[0.9081, 0.2477, 0.1323, 0.1035, 0.0933]
Epsilon: 0.25	Test Accuracy = 888 / 10000 = 0.0888
[0.9081, 0.2477, 0.1323, 0.1035, 0.0933, 0.0888]
Epsilon: 0.3	Test Accuracy = 854 / 10000 = 0.0854
[0.9081, 0.2477, 0.1323, 0.1035, 0.0933, 0.0888, 0.0854]


In [None]:
accuracies = []
examples = []

# Run test for each epsilon
for eps in epsilons:
    acc, ex = attack(loadmodel, attackloader, eps)
    accuracies.append(acc)
    examples.append(ex)

Epsilon: 0	Test Accuracy = 4932 / 10000 = 0.4932
Epsilon: 0.05	Test Accuracy = 3525 / 10000 = 0.3525
Epsilon: 0.1	Test Accuracy = 2746 / 10000 = 0.2746
Epsilon: 0.15	Test Accuracy = 2338 / 10000 = 0.2338
Epsilon: 0.2	Test Accuracy = 2089 / 10000 = 0.2089
Epsilon: 0.25	Test Accuracy = 1925 / 10000 = 0.1925
Epsilon: 0.3	Test Accuracy = 1820 / 10000 = 0.182


In [None]:
"""fgsm train"""
accuracies = []

for eps in epsilons:
    acc = testattack(loadmodel, attackloader, eps)
    accuracies.append(acc)

Epsilon: 0	Test Accuracy = 8404 / 10000 = 0.8404
Epsilon: 0.05	Test Accuracy = 6389 / 10000 = 0.6389
Epsilon: 0.1	Test Accuracy = 4542 / 10000 = 0.4542
Epsilon: 0.15	Test Accuracy = 3190 / 10000 = 0.319
Epsilon: 0.2	Test Accuracy = 2227 / 10000 = 0.2227
Epsilon: 0.25	Test Accuracy = 1606 / 10000 = 0.1606
Epsilon: 0.3	Test Accuracy = 1221 / 10000 = 0.1221


## PGD attack

In [None]:
def pgd_attack(model, images, labels, eps=0.3, alpha=2/255, iters=5) :
    images = images.to(device)
    labels = labels.to(device)
    loss = nn.CrossEntropyLoss()
        
    ori_images = images.data
        
    for i in range(iters) :    
        images.requires_grad = True
        outputs = model(images)

        model.zero_grad()
        cost = loss(outputs, labels).to(device)
        cost.backward()

        adv_images = images + alpha*images.grad.sign()
        eta = torch.clamp(adv_images - ori_images, min=-eps, max=eps)
        images = torch.clamp(ori_images + eta, min=0, max=1).detach_()
            
    return images

In [None]:
"""Load Model"""
# epoch=29, Adv_Train Accuracy: 0.9046	Val Accuracy: 0.8175
modelpath = path + "PGD_ResNet50_29.pth"
loadmodel = ResNet50().to(device)
loadmodel.load_state_dict(torch.load(modelpath)['model_state_dict'])

<All keys matched successfully>

In [None]:
test_acc = testattack2(loadmodel, attackloader)
test_acc

Attack Image & Predicted Label
Accuracy of test text: 5.870000 %


0.0587

# Draft

In [None]:
"""ResNet50 Model"""
resmodel = ResNet50(Bottleneck, [3,4,6,3]).to(device)
resmodel.apply(init_weights)

criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(resmodel.parameters(), lr=learningRate, weight_decay=weightDecay, momentum=0.9)
optimizer = torch.optim.Adam(resmodel.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)


"""
classification train and dev
"""
def train(model, data_loader, class_test_loader, verify_test_loader, scheduler):
    model.train()

    scaler = torch.cuda.amp.GradScaler()

    for epoch in range(numEpochs):
        for batch_num, (feats, labels) in enumerate(data_loader):
            feats, labels = feats.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            with torch.cuda.amp.autocast():
                outputs = model(feats)[1]
                loss = criterion(outputs, labels.long())
            
            # loss.backward()
            scaler.scale(loss).backward()
            
            # optimizer.step()
            scaler.step(optimizer)
            scaler.update()

            torch.cuda.empty_cache()
            del feats
            del labels
            del loss

        print('Classification')
        train_loss, train_acc = test_classify(model, data_loader)
        val_loss, val_acc = test_classify(model, class_test_loader)
        print('[Epoch: {}]\nTrain Loss: {:.4f}\tTrain Accuracy: {:.4f}\tVal Loss: {:.4f}\tVal Accuracy: {:.4f}'.
              format(epoch, train_loss, train_acc, val_loss, val_acc))
            # task = 'Verification'
        scheduler.step(val_loss)

        torch.save({'model_state_dict': model.state_dict(),},
                    path + "ResNet50_{}.pth".format(str(epoch)))
            

        # else: task = 'Classification'
        if (epoch+1)%2 == 0:
            print('Verification')
            auc = test_verify(model, verify_test_loader)
            print('[Epoch: {}]\tAUC: {:.4f}'.format(epoch, auc))

def test_classify(model, test_loader):
    model.eval()

    test_loss = []
    accuracy = 0
    total = 0

    with torch.no_grad():
        for batch_num, (feats, labels) in enumerate(test_loader):
            feats, labels = feats.to(device), labels.to(device)
            
            outputs = model(feats)[1]
            loss = criterion(outputs, labels.long()).detach()
            test_loss.extend([loss.item()]*feats.size()[0])

            # transform the prediction to one-hot form
            _, pred_labels = torch.max(F.softmax(outputs, dim=1), 1)
            pred_labels = pred_labels.view(-1)
            
            accuracy += torch.sum(torch.eq(pred_labels, labels)).item()
            total += len(labels)
            del feats
            del labels

    acc = accuracy/total
    avg_loss = np.mean(test_loss)
    
    model.train()
    return avg_loss, acc


def test_verify(model, test_loader):
    model.eval()

    score_list = []
    truth_list = []

    for img1, img2, label1, label2, truth in test_loader:
        
        img1 = img1.to(device)
        img2 = img2.to(device)

        with torch.no_grad():
            feat1 = model(img1)[0].cpu().numpy()
            feat2 = model(img2)[0].cpu().numpy()

            value = cosine_similarity(feat1,feat2).diagonal()
            score_list.append(value)

            truth_list.append(truth.item())

            del img1
            del img2
            del label1
            del label2
            del truth

    similarity = np.array(score_list)
    true_label = np.array(truth_list)
    auc = roc_auc_score(true_label, similarity)

    model.train()
    return auc


resmodel.train()
train(resmodel, train_dataloader, dev_dataloader, verify_val_loader, scheduler)


"""Load Model"""
# epoch=7, the highest AUC score
modelpath = path + "ResNet50_7.pth"
loadmodel = ResNet50(Bottleneck, [3,4,6,3]).to(device)
loadmodel.load_state_dict(torch.load(modelpath)['model_state_dict'])


"""
Compute verify_val AUC and Write verify_test Prediction Results
"""
def verification_val(model, data_loader, device):
    model.eval()

    score_list = []
    truth_list = []

    for img1, img2, label1, label2, truth in data_loader:
        
        img1 = img1.to(device)
        img2 = img2.to(device)

        with torch.no_grad():
            feat1 = model(img1).cpu().numpy()
            feat2 = model(img2).cpu().numpy()

            value = cosine_similarity(feat1,feat2).diagonal()
            score_list.append(value)

            truth_list.append(truth.item())

    similarity = np.array(score_list)
    true_label = np.array(truth_list)
    auc = roc_auc_score(true_label, similarity)
    return auc

def verification_test(model, data_loader, device):
    model.eval()

    name_list1 = []
    name_list2 = []
    score_list = []

    for fig1, fig2, name1, name2 in data_loader:
        
        fig1 = fig1.to(device)
        fig2 = fig2.to(device)

        with torch.no_grad():
            feat1 = model(fig1).cpu().numpy()
            feat2 = model(fig2).cpu().numpy()

        value = cosine_similarity(feat1, feat2).diagonal()
        
        name_list1.append(name1)
        name_list2.append(name2)
        score_list.append(value)
    return name_list1, name_list2, score_list


verify_val_auc = verification_val(loadmodel, verify_val_loader, device)

names, scores = verification_test(loadmodel, verify_test_loader, device)
verify_results = pd.DataFrame({'Id':names, 'Category': scores})
verify_results.to_csv('results.csv', index=False)

# !pip install kaggle 
# !kaggle competitions submit -c 11-785-fall-20-homework-2-part-2 -f verify_results.csv -m "Message"