In [1]:
from torchvision.models import resnet50
from torch.utils.data import Dataset
import torch
import torch.nn.utils.parametrize as parametrize
from torch.utils.data import DataLoader
import torch.nn as nn

import lora


class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        label = self.labels[idx]

        return sample, label

def add_lora(model):

    # the list layers contains all the elements of the neural architecture on which you want to add lora matrices
    layers = [ model.conv1, model.layer1[0].conv1, model.layer1[0].conv2, model.layer1[0].conv3,
                model.layer2[0].conv1, model.layer2[0].conv2, model.layer2[0].conv3,
                model.layer3[0].conv1, model.layer3[0].conv2, model.layer3[0].conv3,
                model.layer4[0].conv1, model.layer4[0].conv2, model.layer4[0].conv3, model.fc ]

    for conv_layer in layers:
        lora_param = lora.layer_parametrization(conv_layer, device="cuda", rank=10, lora_alpha=1)
        parametrize.register_parametrization(conv_layer, 'weight', lora_param)

    lora.set_lora_gradients(model, layers)

# model = resnet50().to('cuda')
from models_local import resnet_imagenet
model = resnet_imagenet.ResNet(resnet_imagenet.Bottleneck, [3, 4, 6, 3], )
state_dict = torch.load('./state_dicts/resnet50_imagenet1k.pt')
model.load_state_dict(state_dict)
model.to('cuda')

add_lora(model)

size = 224
data = [torch.rand(3, size, size) for _ in range(100)]
labels = torch.randint(0, 10, (100,))
dataset = CustomDataset(data, labels)
loader = DataLoader(dataset, batch_size=28)


In [2]:
for name, param in model.named_parameters():
    print(name, param.dtype)

conv1.parametrizations.weight.original torch.float32
conv1.parametrizations.weight.0.mat_A torch.float32
conv1.parametrizations.weight.0.mat_B torch.float32
bn1.weight torch.float32
bn1.bias torch.float32
layer1.0.conv1.parametrizations.weight.original torch.float32
layer1.0.conv1.parametrizations.weight.0.mat_A torch.float32
layer1.0.conv1.parametrizations.weight.0.mat_B torch.float32
layer1.0.bn1.weight torch.float32
layer1.0.bn1.bias torch.float32
layer1.0.conv2.parametrizations.weight.original torch.float32
layer1.0.conv2.parametrizations.weight.0.mat_A torch.float32
layer1.0.conv2.parametrizations.weight.0.mat_B torch.float32
layer1.0.bn2.weight torch.float32
layer1.0.bn2.bias torch.float32
layer1.0.conv3.parametrizations.weight.original torch.float32
layer1.0.conv3.parametrizations.weight.0.mat_A torch.float32
layer1.0.conv3.parametrizations.weight.0.mat_B torch.float32
layer1.0.bn3.weight torch.float32
layer1.0.bn3.bias torch.float32
layer1.0.downsample.0.weight torch.float32
la

In [4]:
print(model.conv1.parametrizations.weight[0].mat_A[0])

tensor([-0.3666,  0.7376, -2.0797,  0.0657,  1.8145, -0.0556, -0.6642, -0.1099,
         1.6018, -0.8198, -0.9710,  0.6459, -0.1881, -1.1766, -1.0657,  0.2742,
        -0.6090,  1.9299, -0.6550,  1.0151, -2.0453, -0.3560,  0.3401,  1.1605,
         0.1847, -0.5019,  1.4328, -0.7946, -0.5052, -1.8266, -0.5565, -0.6208,
         0.4370,  0.9058, -0.7025, -0.1474,  0.6897, -0.5001,  0.7955, -0.3704,
        -1.3695,  1.0997,  0.2136, -0.7356, -1.3617,  1.6388, -0.5650,  0.8238,
        -0.7352,  1.4763,  2.7770,  0.4328,  0.3671, -0.2704, -0.7148,  1.1171,
         0.1192, -0.8432, -0.2024,  2.1278,  0.3821, -0.3201, -0.7054, -0.5404],
       device='cuda:0', grad_fn=<SelectBackward0>)


In [5]:
print(model.conv1.parametrizations.weight[0].mat_B[0])

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SelectBackward0>)


In [6]:
print(model.conv1.parametrizations.weight.original[0][0])

tensor([[ 0.0133,  0.0147, -0.0154, -0.0230, -0.0409, -0.0430, -0.0708],
        [ 0.0041,  0.0058,  0.0149,  0.0206,  0.0022, -0.0209, -0.0385],
        [ 0.0223,  0.0236,  0.0161,  0.0588,  0.1028,  0.0626,  0.0520],
        [ 0.0232,  0.0042, -0.0459, -0.0487, -0.0164,  0.0402,  0.0658],
        [-0.0009,  0.0278, -0.0101, -0.0554, -0.1272, -0.0766,  0.0078],
        [ 0.0036,  0.0480,  0.0621,  0.0844,  0.0243, -0.0337, -0.0157],
        [-0.0800, -0.0322, -0.0178,  0.0342,  0.0354,  0.0224,  0.0017]],
       device='cuda:0')


In [2]:
import torch
import trades
from torch.cuda.amp import autocast, GradScaler

# Assuming 'model' and 'optimizer' are already defined and configured
model.train()  # Ensure the model is in training mode
scaler = GradScaler()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for data, target in loader:
    data, target = data.to('cuda'), target.to('cuda')

    optimizer.zero_grad()
                # logits = model(data)
                # loss = criterion(logits,target)
    with autocast():
        logits, loss = trades.trades_loss(model=model, x_natural=data, y=target, optimizer=optimizer,)

    scaler.scale(loss).backward()
    scaler.step(optimizer)
    scaler.update()


    # print('loss', loss)
    # loss.backward()
    # optimizer.step()

    # optimizer.zero_grad()  # Clear previous gradients
    # logits_nat = model(data)  # Forward pass

    # # Assuming you have a loss function and target labels to compare with
    # loss = criterion(logits_nat, target)  # Calculate loss
    # loss.backward()  # Backward pass to calculate gradients

    # Access gradients of a particular parameter, for example, the first layer's weights
    # for name, parameter in model.named_parameters():
    #     if parameter.grad is not None:
    #         print(f"Gradient size for {name}: {parameter.grad.size()}")

    # # Proceed with optimizer step if needed
    # optimizer.step()

init x_adv
infer
kl loss
gradient compute
other operations
infer


OutOfMemoryError: CUDA out of memory. Tried to allocate 44.00 MiB. GPU 0 has a total capacity of 2.94 GiB of which 4.12 MiB is free. Including non-PyTorch memory, this process has 2.93 GiB memory in use. Of the allocated memory 2.78 GiB is allocated by PyTorch, and 43.66 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [12]:

        
for data, target in loader:
    
    data, target = data.to('cuda'), target.to('cuda')
    optimizer.zero_grad()
    logits_nat = model(data)
    # loss = criterion(logits,target)
    # print('loss', loss)
    # loss.backward()
    # optimizer.step()

In [13]:
logits_adv.shape

torch.Size([4, 1000])

In [13]:
print(model.conv1.parametrizations.weight[0].mat_A[0])

tensor([ 1.9575,  0.1928, -0.4407,  0.6611,  1.2258, -0.7848,  0.0152, -0.4324,
        -2.3184,  1.0024,  0.9970, -1.7276, -1.5179, -0.9680, -1.0733, -0.5053,
         0.0181, -0.5896,  0.4828,  0.1066, -0.6343,  0.0386, -0.3664, -0.9748,
         1.0746, -0.9033,  1.0556,  0.7733, -0.9709, -2.2500, -0.0554, -2.2023,
        -0.1340,  0.7774, -0.1766, -0.0161,  0.4462, -1.7782,  1.8442,  1.7463,
         0.6906,  1.0282, -0.8912,  1.0148, -1.5610, -1.0916, -1.3502,  0.4384,
         0.1075, -0.2373,  1.3168,  0.2664,  1.6963,  1.3352, -0.3247, -1.3422,
        -0.4286, -0.5899, -0.0792, -0.4842, -2.9525,  0.2606, -0.8432, -1.0796],
       device='cuda:0', grad_fn=<SelectBackward0>)


In [14]:
print(model.conv1.parametrizations.weight[0].mat_B[0])

tensor([ 0.0030,  0.0023,  0.0011,  0.0021, -0.0024,  0.0011,  0.0032,  0.0028,
        -0.0031,  0.0034], device='cuda:0', grad_fn=<SelectBackward0>)


In [19]:
print(model.conv1.parametrizations.weight.original[0][0])

tensor([[-0.0002,  0.0204,  0.0120, -0.0117,  0.0048,  0.0408,  0.0248],
        [-0.0418, -0.0147, -0.0225,  0.0543, -0.0298,  0.0335,  0.0003],
        [-0.0220, -0.0219,  0.0298, -0.0174,  0.0036,  0.0399, -0.0435],
        [-0.0197, -0.0251,  0.0499, -0.0140, -0.0510, -0.0500,  0.0119],
        [-0.0058, -0.0120,  0.0267,  0.0366, -0.0328,  0.0133, -0.0200],
        [ 0.0522,  0.0435, -0.0193,  0.0181,  0.0252,  0.0232,  0.0311],
        [ 0.0139,  0.0073, -0.0141, -0.0039,  0.0301, -0.0426,  0.0161]],
       device='cuda:0')
