In [8]:
from torchvision.models import resnet50
from torch.utils.data import Dataset
import torch
import torch.nn.utils.parametrize as parametrize
from torch.utils.data import DataLoader
import torch.nn as nn

import lora


class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        label = self.labels[idx]

        return sample, label

def add_lora(model):

    # the list layers contains all the elements of the neural architecture on which you want to add lora matrices
    layers = [ model.conv1, model.layer1[0].conv1, model.layer1[0].conv2, model.layer1[0].conv3,
                model.layer2[0].conv1, model.layer2[0].conv2, model.layer2[0].conv3,
                model.layer3[0].conv1, model.layer3[0].conv2, model.layer3[0].conv3,
                model.layer4[0].conv1, model.layer4[0].conv2, model.layer4[0].conv3, model.fc ]

    for conv_layer in layers:
        lora_param = lora.layer_parametrization(conv_layer, device="cuda", rank=10, lora_alpha=1)
        parametrize.register_parametrization(conv_layer, 'weight', lora_param)

    lora.set_lora_gradients(model, layers)

model = resnet50().to('cuda')
add_lora(model)

size = 32
data = [torch.rand(3, size, size) for _ in range(100)]
labels = torch.randint(0, 10, (100,))
dataset = CustomDataset(data, labels)
loader = DataLoader(dataset, batch_size=32)


In [9]:
print(model.conv1.parametrizations.weight[0].mat_A[0])

tensor([ 1.9553,  0.1934, -0.4419,  0.6612,  1.2280, -0.7828,  0.0132, -0.4347,
        -2.3182,  1.0015,  0.9982, -1.7255, -1.5165, -0.9702, -1.0734, -0.5069,
         0.0177, -0.5905,  0.4840,  0.1061, -0.6364,  0.0383, -0.3670, -0.9739,
         1.0738, -0.9023,  1.0535,  0.7725, -0.9731, -2.2512, -0.0550, -2.2016,
        -0.1342,  0.7784, -0.1760, -0.0151,  0.4480, -1.7760,  1.8451,  1.7483,
         0.6928,  1.0304, -0.8913,  1.0149, -1.5610, -1.0897, -1.3492,  0.4384,
         0.1059, -0.2361,  1.3165,  0.2678,  1.6965,  1.3362, -0.3240, -1.3411,
        -0.4276, -0.5909, -0.0803, -0.4822, -2.9535,  0.2598, -0.8454, -1.0809],
       device='cuda:0', grad_fn=<SelectBackward0>)


In [10]:
print(model.conv1.parametrizations.weight[0].mat_B[0])

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       grad_fn=<SelectBackward0>)


In [11]:
print(model.conv1.parametrizations.weight.original[0][0])

tensor([[-0.0002,  0.0204,  0.0120, -0.0117,  0.0048,  0.0408,  0.0248],
        [-0.0418, -0.0147, -0.0225,  0.0543, -0.0298,  0.0335,  0.0003],
        [-0.0220, -0.0219,  0.0298, -0.0174,  0.0036,  0.0399, -0.0435],
        [-0.0197, -0.0251,  0.0499, -0.0140, -0.0510, -0.0500,  0.0119],
        [-0.0058, -0.0120,  0.0267,  0.0366, -0.0328,  0.0133, -0.0200],
        [ 0.0522,  0.0435, -0.0193,  0.0181,  0.0252,  0.0232,  0.0311],
        [ 0.0139,  0.0073, -0.0141, -0.0039,  0.0301, -0.0426,  0.0161]],
       device='cuda:0')


In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        
for data, target in loader:

    data, target = data.to('cuda'), target.to('cuda')
    optimizer.zero_grad()
    logits = model(data)
    loss = criterion(logits,target)
    print('loss', loss)
    loss.backward()
    optimizer.step()

loss tensor(7.4598, device='cuda:0', grad_fn=<NllLossBackward0>)
loss tensor(6.9148, device='cuda:0', grad_fn=<NllLossBackward0>)
loss tensor(6.9586, device='cuda:0', grad_fn=<NllLossBackward0>)
loss tensor(7.0233, device='cuda:0', grad_fn=<NllLossBackward0>)


In [13]:
print(model.conv1.parametrizations.weight[0].mat_A[0])

tensor([ 1.9575,  0.1928, -0.4407,  0.6611,  1.2258, -0.7848,  0.0152, -0.4324,
        -2.3184,  1.0024,  0.9970, -1.7276, -1.5179, -0.9680, -1.0733, -0.5053,
         0.0181, -0.5896,  0.4828,  0.1066, -0.6343,  0.0386, -0.3664, -0.9748,
         1.0746, -0.9033,  1.0556,  0.7733, -0.9709, -2.2500, -0.0554, -2.2023,
        -0.1340,  0.7774, -0.1766, -0.0161,  0.4462, -1.7782,  1.8442,  1.7463,
         0.6906,  1.0282, -0.8912,  1.0148, -1.5610, -1.0916, -1.3502,  0.4384,
         0.1075, -0.2373,  1.3168,  0.2664,  1.6963,  1.3352, -0.3247, -1.3422,
        -0.4286, -0.5899, -0.0792, -0.4842, -2.9525,  0.2606, -0.8432, -1.0796],
       device='cuda:0', grad_fn=<SelectBackward0>)


In [14]:
print(model.conv1.parametrizations.weight[0].mat_B[0])

tensor([ 0.0030,  0.0023,  0.0011,  0.0021, -0.0024,  0.0011,  0.0032,  0.0028,
        -0.0031,  0.0034], device='cuda:0', grad_fn=<SelectBackward0>)


In [19]:
print(model.conv1.parametrizations.weight.original[0][0])

tensor([[-0.0002,  0.0204,  0.0120, -0.0117,  0.0048,  0.0408,  0.0248],
        [-0.0418, -0.0147, -0.0225,  0.0543, -0.0298,  0.0335,  0.0003],
        [-0.0220, -0.0219,  0.0298, -0.0174,  0.0036,  0.0399, -0.0435],
        [-0.0197, -0.0251,  0.0499, -0.0140, -0.0510, -0.0500,  0.0119],
        [-0.0058, -0.0120,  0.0267,  0.0366, -0.0328,  0.0133, -0.0200],
        [ 0.0522,  0.0435, -0.0193,  0.0181,  0.0252,  0.0232,  0.0311],
        [ 0.0139,  0.0073, -0.0141, -0.0039,  0.0301, -0.0426,  0.0161]],
       device='cuda:0')
