In [14]:
!pip install foolbox
!pip install -U tensorly
!pip install -U tensorly-torch

In [15]:
import tltorch
import torch
from torch import nn
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import CIFAR100
import torchvision.transforms as T
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from IPython.display import clear_output
import foolbox as fb

In [17]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [18]:
def get_cifar100_transform(train=True):
    if train:
        transform = T.Compose([
            T.RandomCrop(32, padding=4),
            T.RandomHorizontalFlip(),
            T.ToTensor(),
            T.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761), inplace=True)

        ])

    else:
        transform = T.Compose([
            T.ToTensor(),
            T.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
        ])

    return transform

In [19]:
batch_size = 128

train_set = CIFAR100('CIFAR100', train=True, download=True,
                    transform=get_cifar100_transform(train=True))
test_set = CIFAR100('CIFAR100', train=False, download=True,
                   transform=get_cifar100_transform(train=False))
train_loader = DataLoader(train_set, batch_size=batch_size, pin_memory=True, num_workers=2, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size, pin_memory=True, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [20]:
import tensorly as tl
tl.set_backend('pytorch')
from tensorly import tenalg
from tltorch.factorized_tensors import FactorizedTensor, TuckerTensor, TTTensor

def transform_core(core, lambdas):
    l = tenalg.outer(lambdas)
    new_core = torch.nn.Parameter(core * l.to(device))
    return new_core.to(device)

def transform_tt(tt, gammas):
    new_tt = FactorizedTensor.new(shape=tt.shape, rank=tt.rank, factorization='TT')
    for i in range(len(tt.factors[:-1])):
        new_tt.factors[i] = (torch.nn.Parameter(torch.einsum('ijk,k->ijk', \
                                                             tt.factors[i].to(device), gammas[i].to(device))))
    new_tt.factors[-1] = torch.nn.Parameter(tt.factors[-1].to(device))
    return new_tt

def tttucker_rtrl(x, tucker_core, tucker_factors, training, p=0.9, q=0.9, bias=None):
    n_input = tl.ndim(x) - 1
    
    if training:
        lambdas = [torch.Tensor(np.random.binomial(1, p, size=r)) for r in tucker_core.shape]
        droppedout_core = transform_core(tucker_core, lambdas)

        droppedout_factors = []
        for i in range(len(tucker_factors)):
            if isinstance(tucker_factors[i], TTTensor):
                gammas = [torch.Tensor(np.random.binomial(1, q, size=r)) for r in tucker_factors[i].rank[1:-1]]
                droppedout_factor = transform_tt(tucker_factors[i], gammas)
                droppedout_factors.append(droppedout_factor.to_tensor().reshape(-1, droppedout_factor.shape[-1]))
            else:
                droppedout_factors.append(tucker_factors[i].to(device))
        regression_weights = tl.tucker_tensor.tucker_to_tensor((droppedout_core, droppedout_factors))
    else:
        constructed_factors = []
        for i in range(len(tucker_factors)):
            if isinstance(tucker_factors[i], TTTensor):
                constructed_factors.append(tucker_factors[i].to_tensor().reshape(-1, \
                                                                                 tucker_factors[i].shape[-1]).to(device))
            else:
                constructed_factors.append(tucker_factors[i].to(device))
        regression_weights = tl.tucker_tensor.tucker_to_tensor((tucker_core, constructed_factors))

    if bias is None:
        return tenalg.inner(x, regression_weights, n_modes=tl.ndim(x)-1)
    else:
        return tenalg.inner(x, regression_weights, n_modes=tl.ndim(x)-1) + bias

class TTTuckerRTRL(nn.Module):
    def __init__(self, input_shape, output_shape, bias=False, verbose=0,
                 rank='same', tt_ranks=None, tt_inputshapes=None, n_layers=1,
                device=None, dtype=None, p=0.9, q=0.9, **kwargs):
        super().__init__(**kwargs)
        self.verbose = verbose

        if isinstance(input_shape, int):
            self.input_shape = (input_shape, )
        else:
            self.input_shape = tuple(input_shape)
            
        if isinstance(output_shape, int):
            self.output_shape = (output_shape, )
        else:
            self.output_shape = tuple(output_shape)
        
        self.n_input = len(self.input_shape)
        self.n_output = len(self.output_shape)
        self.weight_shape = self.input_shape + self.output_shape
        self.order = len(self.weight_shape)

        if bias:
            self.bias = nn.Parameter(torch.empty(self.output_shape, device=device, dtype=dtype))
        else:
            self.bias = None

        if n_layers == 1:
            factorization_shape = self.weight_shape
        elif isinstance(n_layers, int):
            factorization_shape = (n_layers, ) + self.weight_shape
        elif isinstance(n_layers, tuple):
            factorization_shape = n_layers + self.weight_shape
        
        # initialize with decomposed normal distribution
        full_weight = torch.normal(0.0, 0.02, size=self.weight_shape)
        
        tucker_decomposition = FactorizedTensor.new(factorization_shape, rank=rank, factorization='tucker', dtype=dtype)
        tucker_decomposition.init_from_tensor(full_weight)
        self.tucker_core = torch.nn.Parameter(tucker_decomposition.core.to(device))
        if tt_inputshapes == None:
            self.tucker_factors = torch.nn.ParameterList([torch.nn.Parameter(factor).to(device) for factor in tucker_decomposition.factors])
        elif len(tt_inputshapes) != len(self.weight_shape):
            raise ValueError('tt_inputshapes should be None or have len of weight shape')
        else:
            if tt_ranks != None and len(tt_ranks) != len(self.weight_shape):
                raise ValueError('tt_ranks should be None or have len of weight shape')
            self.tucker_factors = []
            for i, factor in enumerate(tucker_decomposition.factors):
                if tt_inputshapes[i] != None and len(tt_inputshapes[i]) > 1:
                    new_shape = tuple(tt_inputshapes[i]) + (factor.shape[-1], )
                    rank = 'same' if tt_ranks == None else tt_ranks[i]
                    self.tucker_factors.append(FactorizedTensor.new(shape=new_shape,
                                                                    rank=rank, factorization='TT',
                                           device=device, dtype=dtype).init_from_tensor(torch.reshape(factor, new_shape)))
                else:
                    self.tucker_factors.append(torch.nn.Parameter(factor).to(device))
            self.tucker_factors = torch.nn.ParameterList(self.tucker_factors)
    
        self.factorization = 'tttucker'
        self.p = p
        self.q = q

    def forward(self, x):
        return tttucker_rtrl(x, self.tucker_core, self.tucker_factors, self.training, bias=self.bias, p=self.p, q=self.q)

In [21]:
import tensorly as tl
tl.set_backend('pytorch')
from tensorly import tenalg
from tltorch.factorized_tensors import FactorizedTensor, TuckerTensor, TTTensor
from tensorly.decomposition import tensor_train

class TTRTRL(nn.Module):
    def __init__(self, input_shape, output_shape, bias=False, verbose=0,
                 rank='same', p=0.9, device=None, dtype=None, **kwargs):
        super().__init__(**kwargs)
        self.verbose = verbose
        
        if len(input_shape) != len(output_shape):
            raise ValueError(f'input and output shape lenghts must be the same. got {input_shape} input_shape and {output_shape} output_shape')

        if isinstance(input_shape, int):
            self.input_shape = (input_shape, )
        else:
            self.input_shape = tuple(input_shape)
            
        if isinstance(output_shape, int):
            self.output_shape = (output_shape, )
        else:
            self.output_shape = tuple(output_shape)
        
        self.n_input = len(self.input_shape)
        self.n_output = len(self.output_shape)
        self.weight_shape = []
        for i in range(len(self.input_shape)):
            self.weight_shape.extend([self.input_shape[i], self.output_shape[i]])
        print(self.weight_shape)
        self.order = len(self.weight_shape)

        if bias:
            self.bias = nn.Parameter(torch.empty(self.output_shape, device=device, dtype=dtype))
        else:
            self.bias = self.register_parameter("bias", None)
        
        # initialize with decomposed normal distribution
        full_weight = torch.normal(0.0, 0.02, size=tuple(np.array(input_shape) * np.array(output_shape)))
        
        factors = tensor_train(full_weight, rank=rank)
        self.rank = factors.rank
        self.factors =  torch.nn.ParameterList([torch.nn.Parameter(factor).to(device) \
                        for i, factor in enumerate(factors)])
        self.factorization = 'tt'
        self.p=p

    def forward(self, x):
        return self.tt_rtrl(x).reshape((x.shape[0], ) + (np.prod(self.output_shape), ))
    
    def tt_rtrl(self, x, p=0.9):
        x = x.reshape((x.shape[0], ) + self.input_shape)
        n_input = tl.ndim(x) - 1
        
        if self.training:
            droppedout_factors = []
            gammas = [torch.Tensor(np.random.binomial(1, self.p, size=r)).to(device) for r in self.rank[1:-1]]

            new_tt = FactorizedTensor.new(shape=tuple(np.array(self.input_shape) * np.array(self.output_shape)), \
                                          rank=self.rank, factorization='TT')
            for i, factor in enumerate(self.factors[:-1]):
                new_tt.factors[i] = (torch.nn.Parameter(torch.einsum('ijk,k->ijk', factor, gammas[i])))
            new_tt.factors[-1] = torch.nn.Parameter(self.factors[-1])

            regression_weights = new_tt.to_tensor().reshape(self.weight_shape)
            regression_weights = torch.permute(regression_weights, \
                                              tuple(np.hstack((np.arange(0, len(self.weight_shape), 2), \
                                                               np.arange(1, len(self.weight_shape), 2)))))
        else:
            new_tt = FactorizedTensor.new(shape=tuple(np.array(self.input_shape) * np.array(self.output_shape)), \
                                          rank=self.rank, factorization='TT')
            for i, factor in enumerate(self.factors):
                new_tt.factors[i] = torch.nn.Parameter(factor.to(device))
            regression_weights = new_tt.to_tensor().reshape(self.weight_shape)
            regression_weights = torch.permute(regression_weights, \
                                              tuple(np.hstack((np.arange(0, len(self.weight_shape), 2), \
                                                               np.arange(1, len(self.weight_shape), 2)))))

        if self.bias is None:
            return tenalg.inner(x, regression_weights, n_modes=tl.ndim(x)-1)
        else:
            return tenalg.inner(x, regression_weights, n_modes=tl.ndim(x)-1) + self.bias

In [22]:
import tensorly as tl
tl.set_backend('pytorch')
from tensorly import tenalg
from tltorch.factorized_tensors import FactorizedTensor, TuckerTensor

def transform_tensor(tensor, lambdas):
    transformed_tensor = tltorch.FactorizedTensor.new(tensor.shape, tensor.rank, factorization='tucker')
    for i in range(len(lambdas)):
        transformed_tensor.factors[i] = torch.nn.Parameter(tensor.factors[i])
    l = tenalg.outer(lambdas)
    transformed_tensor.core = torch.nn.Parameter(tensor.core * l)
    return tensor

def tucker_rtrl(x, weight, training, p=0.9, bias=None):
    n_input = tl.ndim(x) - 1
    if training:
        lambdas = [torch.Tensor(np.random.binomial(1, p, size=r)).to(device) for r in weight.rank]
        droppedout_weight = transform_tensor(weight, lambdas)

        regression_weights = droppedout_weight.to_tensor()
    else:
        regression_weights = weight.to_tensor()

    if bias is None:
        return tenalg.inner(x, regression_weights, n_modes=tl.ndim(x)-1)
    else:
        return tenalg.inner(x, regression_weights, n_modes=tl.ndim(x)-1) + bias

class TuckerRTRL(nn.Module):
    def __init__(self, input_shape, output_shape, bias=False, p=0.9, verbose=0, rank='same', n_layers=1,
                device=None, dtype=None, **kwargs):
        super().__init__(**kwargs)
        self.verbose = verbose
        factorization = 'tucker'

        if isinstance(input_shape, int):
            self.input_shape = (input_shape, )
        else:
            self.input_shape = tuple(input_shape)
            
        if isinstance(output_shape, int):
            self.output_shape = (output_shape, )
        else:
            self.output_shape = tuple(output_shape)
        
        self.n_input = len(self.input_shape)
        self.n_output = len(self.output_shape)
        self.weight_shape = self.input_shape + self.output_shape
        self.order = len(self.weight_shape)
        self.p = p

        if bias:
            self.bias = nn.Parameter(torch.empty(self.output_shape, device=device, dtype=dtype))
        else:
            self.bias = None

        if n_layers == 1:
            factorization_shape = self.weight_shape
        elif isinstance(n_layers, int):
            factorization_shape = (n_layers, ) + self.weight_shape
        elif isinstance(n_layers, tuple):
            factorization_shape = n_layers + self.weight_shape
        
        if isinstance(factorization, FactorizedTensor):
            self.weight = factorization.to(device).to(dtype)
        else:
            self.weight = FactorizedTensor.new(factorization_shape, rank=rank, factorization=factorization,
                                               device=device, dtype=dtype)
            self.init_from_random()
    
        self.factorization = self.weight.name

    def forward(self, x):
        return tucker_rtrl(x, self.weight, self.training, p=self.p, bias=self.bias)
    
    def init_from_random(self, decompose_full_weight=False):
        with torch.no_grad():
            if decompose_full_weight:
                full_weight = torch.normal(0.0, 0.02, size=self.weight_shape)
                self.weight.init_from_tensor(full_weight)
            else:
                self.weight.normal_()
            if self.bias is not None:
                self.bias.uniform_(-1, 1)

In [23]:
#@title
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init

from torch.autograd import Variable

def _weights_init(m):
    classname = m.__class__.__name__
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight)

class LambdaLayer(nn.Module):
    def __init__(self, lambd):
        super(LambdaLayer, self).__init__()
        self.lambd = lambd

    def forward(self, x):
        return self.lambd(x)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, option='A'):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            if option == 'A':
                """
                For CIFAR10 ResNet paper uses option A.
                """
                self.shortcut = LambdaLayer(lambda x:
                                            F.pad(x[:, :, ::2, ::2], (0, 0, 0, 0, planes//4, planes//4), "constant", 0))
            elif option == 'B':
                self.shortcut = nn.Sequential(
                     nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                     nn.BatchNorm2d(self.expansion * planes)
                )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, tp, num_classes=100, p=0.9, rank='same'):
        super(ResNet, self).__init__()
        self.in_planes = 16

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.apply(_weights_init)
        
        if tp == 'TTTuckerRTRL':
            self.islinear = False
            self.trl = TTTuckerRTRL((64, 8, 8), (100), tt_inputshapes=[[4, 4, 4], None, None, None], p=p, q=p, device=device)
        elif tp == 'TTRTRL':
            self.islinear = False
            self.trl = TTRTRL((4, 4, 4, 8, 8), (2, 5, 2, 5, 1), p=p, device=device)
        elif tp == 'TuckerRTRL':
            self.islinear = False
            self.trl = TuckerRTRL((64, 8, 8), (num_classes), rank='same', p=p)
            self.trl.init_from_random(decompose_full_weight=True)
        elif tp == 'linear':
            self.islinear = True
            self.linear = nn.Linear(64, num_classes)
        else:
            raise ValueError(f'tp should be different. now it is {tp}')

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion

        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        if self.islinear:
            out = F.avg_pool2d(out, out.size()[3])
            out = out.view(out.size(0), -1)
            out = self.linear(out)
        else:
            out = self.trl(out)
        return out


def resnet20Linear(p=0.9):
    return ResNet(BasicBlock, [3, 3, 3], 'linear')
def resnet20TTTuckerRTRL(p=0.9):
    return ResNet(BasicBlock, [3, 3, 3], 'TTTuckerRTRL', p=0.9)
def resnet20TTRTRL(p=0.9):
    return ResNet(BasicBlock, [3, 3, 3], 'TTRTRL', p=0.9)
def resnet20TuckerRTRL(p=0.9):
    return ResNet(BasicBlock, [3, 3, 3], 'TuckerRTRL', p=0.9)

In [24]:
def load_checkpoint(path, p, modelname):
    model = modelname(p=p).to(device)

    checkpoint = torch.load(path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    epoch = checkpoint['epoch']
    train_loss = checkpoint['train_loss']
    test_loss = checkpoint['test_loss']
    train_accuracy = checkpoint['train_accuracy']
    test_accuracy = checkpoint['test_accuracy']

    return epoch, model, train_loss, test_loss, train_accuracy, test_accuracy

In [25]:
def eval_adv_acc(fmodel, val_dataloader, epsilons, attack, device="cuda:0", desc='Evaluating...'):
    model.to(device)
    model.eval()

    accuracy = np.zeros(len(epsilons))

    for images, labels in tqdm(val_dataloader, desc=desc):

        images = images.to(device)
        labels = labels.to(device)
        raw, clipped, is_adv = attack(fmodel, images, labels, epsilons=epsilons)
    
        accuracy += (1 - is_adv.double().mean(axis=-1)).cpu().numpy()

    return accuracy / len(val_dataloader)

In [28]:
epsilons = np.linspace(0.0, 1e-1, num=5)
attack = fb.attacks.PGD()

epoch, model, train_loss, test_loss, train_accuracy, test_accuracy = load_checkpoint('linear-epoch180.pt', 0.9, resnet20Linear)
model = model.eval()
fmodel = fb.PyTorchModel(model, bounds=(-3, 3))
adv_acc_linear = eval_adv_acc(fmodel, test_loader, epsilons, attack, device=device)

Evaluating...:   0%|          | 0/79 [00:00<?, ?it/s]

In [None]:
model_names = ['TTTuckerRTRL', 'TTRTRL', 'TuckerRTRL']
model_funcs = [resnet20TTTuckerRTRL, resnet20TTRTRL, resnet20TuckerRTRL]
model_paths = ['RTTTRL-', 'RTT-', 'RTRL-']
ps = [1, 0.9, 0.8, 0.7, 0.6]

adv_accs = []

for i, (model_name, model_func, model_path) in enumerate(zip(model_names, model_funcs, model_paths)):
    adv_accs.append([])
    for p in ps:
        path = model_path + f'p-{p}.pt'
            
        epoch, model, train_loss, test_loss, train_accuracy, test_accuracy = load_checkpoint(path, p, model_func)

        model = model.eval()
        fmodel = fb.PyTorchModel(model, bounds=(-3, 3))

        adv_acc = eval_adv_acc(fmodel, test_loader, epsilons, attack, device=device)
        adv_accs[-1].append(adv_acc)