In [1]:
import torch
from torch import nn
from models import MultiLayeredPerceptron as mlp, ConvolutionalNetwork as cnn
from lora_models import LoRAModel

# Testing LoRAModel (MLP)

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
x = torch.randn(size=(4, 784))
x = x.to(device)

In [4]:
model = mlp()
model.to(device)
print(model)

num_trainable_parameters_in_model = 0
num_non_trainable_parameters_in_model = 0
for parameter in model.parameters():
    if parameter.requires_grad:
        num_trainable_parameters_in_model += parameter.numel()
    else:
        num_non_trainable_parameters_in_model += parameter.numel()
print(f'Num trainable/non-trainable parameters in model: {num_trainable_parameters_in_model}/{num_non_trainable_parameters_in_model}')

MultiLayeredPerceptron(
  (linear_0): Linear(in_features=784, out_features=512, bias=True)
  (dropout_0): Dropout(p=0.2, inplace=False)
  (relu_0): ReLU(inplace=True)
  (linear_1): Linear(in_features=512, out_features=512, bias=True)
  (dropout_1): Dropout(p=0.2, inplace=False)
  (relu_1): ReLU(inplace=True)
  (linear_2): Linear(in_features=512, out_features=512, bias=True)
  (dropout_2): Dropout(p=0.2, inplace=False)
  (relu_2): ReLU(inplace=True)
  (output): Linear(in_features=512, out_features=10, bias=True)
)
Num trainable/non-trainable parameters in model: 932362/0


In [5]:
model.eval()
model(x)

tensor([[-0.0544,  0.0270, -0.0016,  0.0003, -0.0600,  0.1292, -0.0314, -0.0365,
          0.0223, -0.0423],
        [-0.0544,  0.0515,  0.0384, -0.0698, -0.0652,  0.0702, -0.0182,  0.0059,
         -0.0304,  0.0099],
        [-0.0273, -0.0057,  0.0567, -0.0642, -0.0040,  0.1113,  0.0086, -0.0040,
         -0.0086,  0.0204],
        [-0.0375,  0.0358, -0.0106, -0.0492, -0.0339,  0.0610,  0.0005, -0.0034,
         -0.0138,  0.0236]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [6]:
lora_config = {
    'linear_*': {
        'rank': 4,
        'alpha': 2,
        'delta_bias': True
    },
    'output': {
        'rank': 4,
        'alpha': 2,
        'delta_bias': True
    }
}

In [7]:
lora_model = LoRAModel(model, lora_config)
print(lora_model)

num_trainable_parameters_in_lora_model = 0
num_non_trainable_parameters_in_lora_model = 0
for parameter in lora_model.parameters():
    if parameter.requires_grad:
        num_trainable_parameters_in_lora_model += parameter.numel()
    else:
        num_non_trainable_parameters_in_lora_model += parameter.numel()
print(f'Num trainable/non-trainable parameters in LoRA Model: {num_trainable_parameters_in_lora_model}/{num_non_trainable_parameters_in_lora_model}')

LoRAModel(
  (base_model): MultiLayeredPerceptron(
    (linear_0): LoRALinear(Linear(in_features=784, out_features=512, bias=True) + ((α=2.0/r=4) × Adapter(in_features=784, rank=4, out_features=512, delta_bias=True)))
    (dropout_0): Dropout(p=0.2, inplace=False)
    (relu_0): ReLU(inplace=True)
    (linear_1): LoRALinear(Linear(in_features=512, out_features=512, bias=True) + ((α=2.0/r=4) × Adapter(in_features=512, rank=4, out_features=512, delta_bias=True)))
    (dropout_1): Dropout(p=0.2, inplace=False)
    (relu_1): ReLU(inplace=True)
    (linear_2): LoRALinear(Linear(in_features=512, out_features=512, bias=True) + ((α=2.0/r=4) × Adapter(in_features=512, rank=4, out_features=512, delta_bias=True)))
    (dropout_2): Dropout(p=0.2, inplace=False)
    (relu_2): ReLU(inplace=True)
    (output): LoRALinear(Linear(in_features=512, out_features=10, bias=True) + ((α=2.0/r=4) × Adapter(in_features=512, rank=4, out_features=10, delta_bias=True)))
  )
)
Num trainable/non-trainable parameters 

In [8]:
lora_model.disable_adapter()
lora_model.eval()
lora_model(x)

tensor([[-0.0544,  0.0270, -0.0016,  0.0003, -0.0600,  0.1292, -0.0314, -0.0365,
          0.0223, -0.0423],
        [-0.0544,  0.0515,  0.0384, -0.0698, -0.0652,  0.0702, -0.0182,  0.0059,
         -0.0304,  0.0099],
        [-0.0273, -0.0057,  0.0567, -0.0642, -0.0040,  0.1113,  0.0086, -0.0040,
         -0.0086,  0.0204],
        [-0.0375,  0.0358, -0.0106, -0.0492, -0.0339,  0.0610,  0.0005, -0.0034,
         -0.0138,  0.0236]], device='cuda:0')

In [9]:
lora_model.enable_adapter()
lora_model(x)

tensor([[ 0.0037,  0.0947, -0.0024,  0.0040, -0.1061,  0.1092, -0.0352, -0.0432,
          0.0057, -0.0423],
        [-0.0278,  0.0786,  0.0784, -0.0598, -0.0853,  0.0683, -0.0071,  0.0340,
         -0.0380, -0.0046],
        [-0.0026,  0.0545,  0.0501, -0.0565, -0.0271,  0.0867,  0.0021, -0.0303,
         -0.0201,  0.0183],
        [ 0.0089,  0.1021, -0.0008, -0.0393, -0.0519,  0.0457, -0.0124,  0.0288,
         -0.0451,  0.0193]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [10]:
merged_model = lora_model.get_merged_model()
merged_model.to(device)
merged_model.eval()
merged_model(x)

tensor([[ 0.0037,  0.0947, -0.0024,  0.0040, -0.1061,  0.1092, -0.0352, -0.0432,
          0.0057, -0.0423],
        [-0.0278,  0.0786,  0.0784, -0.0598, -0.0853,  0.0683, -0.0071,  0.0340,
         -0.0380, -0.0046],
        [-0.0026,  0.0545,  0.0501, -0.0565, -0.0271,  0.0867,  0.0021, -0.0303,
         -0.0201,  0.0183],
        [ 0.0089,  0.1021, -0.0008, -0.0393, -0.0519,  0.0457, -0.0124,  0.0288,
         -0.0451,  0.0193]], device='cuda:0', grad_fn=<AddmmBackward0>)

# Testing LoRAModel (CNN)

In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [12]:
x = torch.randn(size=(4, 1, 28, 28))
x = x.to(device)

In [13]:
model = cnn()
model.to(device)
print(model)

num_trainable_parameters_in_model = 0
num_non_trainable_parameters_in_model = 0
for parameter in model.parameters():
    if parameter.requires_grad:
        num_trainable_parameters_in_model += parameter.numel()
    else:
        num_non_trainable_parameters_in_model += parameter.numel()
print(f'Num trainable/non-trainable parameters in model: {num_trainable_parameters_in_model}/{num_non_trainable_parameters_in_model}')

ConvolutionalNetwork(
  (conv2d_0): Conv2d(1, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (dropout_0): Dropout(p=0.2, inplace=False)
  (relu_0): ReLU(inplace=True)
  (conv2d_1): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (dropout_1): Dropout(p=0.2, inplace=False)
  (relu_1): ReLU(inplace=True)
  (conv2d_2): Conv2d(64, 1, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (dropout_2): Dropout(p=0.2, inplace=False)
  (relu_2): ReLU(inplace=True)
  (output): Linear(in_features=784, out_features=10, bias=True)
)
Num trainable/non-trainable parameters in model: 113579/0


In [14]:
model.eval()
model(x)

tensor([[ 0.0241, -0.0310, -0.0251, -0.0225, -0.0193, -0.0497,  0.0420,  0.0044,
         -0.0239, -0.0384],
        [ 0.0162, -0.0485, -0.0169, -0.0236, -0.0067, -0.0422,  0.0184,  0.0161,
          0.0030, -0.0555],
        [-0.0250, -0.0069, -0.0354, -0.0179,  0.0139, -0.0380,  0.0015,  0.0284,
          0.0081, -0.0346],
        [ 0.0536, -0.0803, -0.0117, -0.0010, -0.0234, -0.0283,  0.0063,  0.0015,
          0.0073, -0.0364]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [15]:
lora_config = {
    'conv2d_*': {
        'alpha': 8,
        'rank': 4,
        'rank_for': 'channels',
        'delta_bias': True
    }
}

In [16]:
lora_model = LoRAModel(model, lora_config)
print(lora_model)

num_trainable_parameters_in_lora_model = 0
num_non_trainable_parameters_in_lora_model = 0
for parameter in lora_model.parameters():
    if parameter.requires_grad:
        num_trainable_parameters_in_lora_model += parameter.numel()
    else:
        num_non_trainable_parameters_in_lora_model += parameter.numel()
print(f'Num trainable/non-trainable parameters in LoRA Model: {num_trainable_parameters_in_lora_model}/{num_non_trainable_parameters_in_lora_model}')

LoRAModel(
  (base_model): ConvolutionalNetwork(
    (conv2d_0): LoRAConv2d(Conv2d(1, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) + ((α=8.0/r=4) × Adapter(in_channels=1, rank=4, out_features=64, delta_bias=True)))
    (dropout_0): Dropout(p=0.2, inplace=False)
    (relu_0): ReLU(inplace=True)
    (conv2d_1): LoRAConv2d(Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) + ((α=8.0/r=4) × Adapter(in_channels=64, rank=4, out_features=64, delta_bias=True)))
    (dropout_1): Dropout(p=0.2, inplace=False)
    (relu_1): ReLU(inplace=True)
    (conv2d_2): LoRAConv2d(Conv2d(64, 1, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) + ((α=8.0/r=4) × Adapter(in_channels=64, rank=4, out_features=1, delta_bias=True)))
    (dropout_2): Dropout(p=0.2, inplace=False)
    (relu_2): ReLU(inplace=True)
    (output): Linear(in_features=784, out_features=10, bias=True)
  )
)
Num trainable/non-trainable parameters in LoRA Model: 25929/113585


In [17]:
lora_model.disable_adapter()
lora_model.eval()
lora_model(x)

tensor([[ 0.0241, -0.0310, -0.0251, -0.0225, -0.0193, -0.0497,  0.0420,  0.0044,
         -0.0239, -0.0384],
        [ 0.0162, -0.0485, -0.0169, -0.0236, -0.0067, -0.0422,  0.0184,  0.0161,
          0.0030, -0.0555],
        [-0.0250, -0.0069, -0.0354, -0.0179,  0.0139, -0.0380,  0.0015,  0.0284,
          0.0081, -0.0346],
        [ 0.0536, -0.0803, -0.0117, -0.0010, -0.0234, -0.0283,  0.0063,  0.0015,
          0.0073, -0.0364]], device='cuda:0')

In [18]:
lora_model.enable_adapter()
lora_model(x)

tensor([[ 0.0162, -0.0355, -0.0097, -0.0323, -0.0102, -0.0644,  0.0630, -0.0073,
          0.0091, -0.0367],
        [ 0.0133, -0.0720, -0.0157, -0.0315, -0.0305, -0.0206,  0.0352,  0.0101,
          0.0234, -0.0886],
        [-0.0321,  0.0075, -0.0406, -0.0119,  0.0126, -0.0357, -0.0085,  0.0455,
          0.0117, -0.0522],
        [ 0.0394, -0.0989, -0.0014, -0.0230, -0.0404, -0.0065,  0.0279, -0.0050,
          0.0265, -0.0495]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [19]:
merged_model = lora_model.get_merged_model()
merged_model.to(device)
merged_model.eval()
merged_model(x)

tensor([[ 0.0162, -0.0355, -0.0097, -0.0323, -0.0102, -0.0644,  0.0630, -0.0073,
          0.0091, -0.0367],
        [ 0.0133, -0.0720, -0.0157, -0.0315, -0.0305, -0.0206,  0.0352,  0.0101,
          0.0234, -0.0886],
        [-0.0321,  0.0075, -0.0406, -0.0119,  0.0126, -0.0357, -0.0085,  0.0455,
          0.0117, -0.0522],
        [ 0.0394, -0.0989, -0.0014, -0.0230, -0.0404, -0.0065,  0.0279, -0.0050,
          0.0265, -0.0495]], device='cuda:0', grad_fn=<AddmmBackward0>)