In [1]:
import torch
from torch import nn
from models import MultiLayeredPerceptron as mlp
from lora_models import LoRAModel

# Testing LoRAModel

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
x = torch.randn(size=(4, 784))
x = x.to(device)

In [4]:
model = mlp()
model.to(device)
print(model)

num_trainable_parameters_in_model = 0
num_non_trainable_parameters_in_model = 0
for parameter in model.parameters():
    if parameter.requires_grad:
        num_trainable_parameters_in_model += parameter.numel()
    else:
        num_non_trainable_parameters_in_model += parameter.numel()
print(f'Num trainable/non-trainable parameters in model: {num_trainable_parameters_in_model}/{num_non_trainable_parameters_in_model}')

MultiLayeredPerceptron(
  (linear_0): Linear(in_features=784, out_features=512, bias=True)
  (dropout_0): Dropout(p=0.2, inplace=False)
  (relu_0): ReLU(inplace=True)
  (linear_1): Linear(in_features=512, out_features=512, bias=True)
  (dropout_1): Dropout(p=0.2, inplace=False)
  (relu_1): ReLU(inplace=True)
  (linear_2): Linear(in_features=512, out_features=512, bias=True)
  (dropout_2): Dropout(p=0.2, inplace=False)
  (relu_2): ReLU(inplace=True)
  (output): Linear(in_features=512, out_features=10, bias=True)
)
Num trainable/non-trainable parameters in model: 932362/0


In [5]:
model.eval()
# model.linear_0(x)
# model.linear_1(model.linear_0(x))
# model.linear_2(model.linear_1(model.linear_0(x)))
model(x)

tensor([[ 0.0837, -0.0374, -0.0703,  0.0148, -0.0619,  0.0325,  0.0116,  0.0557,
         -0.0203,  0.0228],
        [-0.0030, -0.0545, -0.0350,  0.0060, -0.0148,  0.0459,  0.0160,  0.0443,
          0.0676, -0.0091],
        [ 0.0846, -0.0083, -0.0556,  0.0164, -0.0621,  0.0111,  0.0044,  0.0324,
          0.0208,  0.0181],
        [ 0.0436, -0.0069, -0.0604,  0.0010, -0.0319,  0.0533,  0.0149,  0.0526,
         -0.0134,  0.0044]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [6]:
lora_config = {
    'linear_*': {
        'rank': 4,
        'alpha': 2,
        'delta_bias': True
    },
    'output': {
        'rank': 4,
        'alpha': 2,
        'delta_bias': True
    }
}

In [7]:
lora_model = LoRAModel(model, lora_config)
print(lora_model)

num_trainable_parameters_in_lora_model = 0
num_non_trainable_parameters_in_lora_model = 0
for parameter in lora_model.parameters():
    if parameter.requires_grad:
        num_trainable_parameters_in_lora_model += parameter.numel()
    else:
        num_non_trainable_parameters_in_lora_model += parameter.numel()
print(f'Num trainable/non-trainable parameters in LoRA Model: {num_trainable_parameters_in_lora_model}/{num_non_trainable_parameters_in_lora_model}')

LoRAModel(
  (base_model): MultiLayeredPerceptron(
    (linear_0): LoRALinear(Linear(in_features=784, out_features=512, bias=True) + ((α=2.0/r=4) × Adapter(in_features=784, rank=4, out_features=512, delta_bias=True)))
    (dropout_0): Dropout(p=0.2, inplace=False)
    (relu_0): ReLU(inplace=True)
    (linear_1): LoRALinear(Linear(in_features=512, out_features=512, bias=True) + ((α=2.0/r=4) × Adapter(in_features=512, rank=4, out_features=512, delta_bias=True)))
    (dropout_1): Dropout(p=0.2, inplace=False)
    (relu_1): ReLU(inplace=True)
    (linear_2): LoRALinear(Linear(in_features=512, out_features=512, bias=True) + ((α=2.0/r=4) × Adapter(in_features=512, rank=4, out_features=512, delta_bias=True)))
    (dropout_2): Dropout(p=0.2, inplace=False)
    (relu_2): ReLU(inplace=True)
    (output): LoRALinear(Linear(in_features=512, out_features=10, bias=True) + ((α=2.0/r=4) × Adapter(in_features=512, rank=4, out_features=10, delta_bias=True)))
  )
)
Num trainable/non-trainable parameters 

In [8]:
lora_model.disable_adapter()
lora_model.eval()
# lora_model.base_model.linear_0(x)
# lora_model.base_model.linear_1(lora_model.base_model.linear_0(x))
# lora_model.base_model.linear_1(lora_model.base_model.linear_1(lora_model.base_model.linear_0(x)))
lora_model(x)

tensor([[ 0.0837, -0.0374, -0.0703,  0.0148, -0.0619,  0.0325,  0.0116,  0.0557,
         -0.0203,  0.0228],
        [-0.0030, -0.0545, -0.0350,  0.0060, -0.0148,  0.0459,  0.0160,  0.0443,
          0.0676, -0.0091],
        [ 0.0846, -0.0083, -0.0556,  0.0164, -0.0621,  0.0111,  0.0044,  0.0324,
          0.0208,  0.0181],
        [ 0.0436, -0.0069, -0.0604,  0.0010, -0.0319,  0.0533,  0.0149,  0.0526,
         -0.0134,  0.0044]], device='cuda:0')

In [9]:
lora_model.enable_adapter()
lora_model(x)

tensor([[ 0.0770, -0.0724, -0.0822, -0.0045, -0.1111, -0.0148,  0.0157,  0.0833,
         -0.0413,  0.0405],
        [ 0.0415, -0.0679, -0.0668,  0.0092, -0.0389,  0.0161, -0.0070,  0.0340,
          0.0932,  0.0049],
        [ 0.0981, -0.0483, -0.0365, -0.0075, -0.1120, -0.0235,  0.0196,  0.0278,
          0.0033,  0.0180],
        [ 0.0289, -0.0766, -0.1286, -0.0011, -0.0875,  0.0370,  0.0016,  0.0805,
         -0.0231,  0.0235]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [10]:
merged_model = lora_model.get_merged_model()
merged_model.eval()
merged_model(x)

tensor([[ 0.0770, -0.0724, -0.0822, -0.0045, -0.1111, -0.0148,  0.0157,  0.0833,
         -0.0413,  0.0405],
        [ 0.0415, -0.0679, -0.0668,  0.0092, -0.0389,  0.0161, -0.0070,  0.0340,
          0.0932,  0.0049],
        [ 0.0981, -0.0483, -0.0365, -0.0075, -0.1120, -0.0235,  0.0196,  0.0278,
          0.0033,  0.0180],
        [ 0.0289, -0.0766, -0.1286, -0.0011, -0.0875,  0.0370,  0.0016,  0.0805,
         -0.0231,  0.0235]], device='cuda:0', grad_fn=<AddmmBackward0>)