In [1]:
import torch
from torch import nn
from models import MultiLayeredPerceptron as mlp
from lora_models import LoRAModel

# Testing LoRAModel

In [2]:
x = torch.randn(size=(4, 784))

In [3]:
model = mlp()
print(model)

num_trainable_parameters_in_model = 0
num_non_trainable_parameters_in_model = 0
for parameter in model.parameters():
    if parameter.requires_grad:
        num_trainable_parameters_in_model += parameter.numel()
    else:
        num_non_trainable_parameters_in_model += parameter.numel()
print(f'Num trainable/non-trainable parameters in model: {num_trainable_parameters_in_model}/{num_non_trainable_parameters_in_model}')

MultiLayeredPerceptron(
  (linear_0): Linear(in_features=784, out_features=512, bias=True)
  (dropout_0): Dropout(p=0.2, inplace=False)
  (relu_0): ReLU(inplace=True)
  (linear_1): Linear(in_features=512, out_features=512, bias=True)
  (dropout_1): Dropout(p=0.2, inplace=False)
  (relu_1): ReLU(inplace=True)
  (linear_2): Linear(in_features=512, out_features=512, bias=True)
  (dropout_2): Dropout(p=0.2, inplace=False)
  (relu_2): ReLU(inplace=True)
  (output): Linear(in_features=512, out_features=10, bias=True)
)
Num trainable/non-trainable parameters in model: 932362/0


In [5]:
lora_model = LoRAModel()
lora_model.add_base_model(base_model=model)
lora_config = {
    'rank': 4,
    'alpha': 2,
    'delta_bias': False
}
lora_target_module_names = ['linear_*', 'output']
lora_model.build_new_adapter(lora_target_module_names=lora_target_module_names, lora_config=lora_config)
print(lora_model)

num_trainable_parameters_in_lora_model = 0
num_non_trainable_parameters_in_lora_model = 0
for parameter in lora_model.parameters():
    if parameter.requires_grad:
        num_trainable_parameters_in_lora_model += parameter.numel()
    else:
        num_non_trainable_parameters_in_lora_model += parameter.numel()
print(f'Num trainable/non-trainable parameters in LoRA Model: {num_trainable_parameters_in_lora_model}/{num_non_trainable_parameters_in_lora_model}')

LoRAModel(
  (base_model): MultiLayeredPerceptron(
    (linear_0): LoRALinear(Linear(in_features=784, out_features=512, bias=True) + ((α=2/r=4) × Adapter(in_features=784, rank=4, out_features=512, delta_bias=False)))
    (dropout_0): Dropout(p=0.2, inplace=False)
    (relu_0): ReLU(inplace=True)
    (linear_1): LoRALinear(Linear(in_features=512, out_features=512, bias=True) + ((α=2/r=4) × Adapter(in_features=512, rank=4, out_features=512, delta_bias=False)))
    (dropout_1): Dropout(p=0.2, inplace=False)
    (relu_1): ReLU(inplace=True)
    (linear_2): LoRALinear(Linear(in_features=512, out_features=512, bias=True) + ((α=2/r=4) × Adapter(in_features=512, rank=4, out_features=512, delta_bias=False)))
    (dropout_2): Dropout(p=0.2, inplace=False)
    (relu_2): ReLU(inplace=True)
    (output): LoRALinear(Linear(in_features=512, out_features=10, bias=True) + ((α=2/r=4) × Adapter(in_features=512, rank=4, out_features=10, delta_bias=False)))
  )
)
Num trainable/non-trainable parameters in L

In [6]:
model.eval()
# model.linear_0(x)
# model.linear_1(model.linear_0(x))
# model.linear_2(model.linear_1(model.linear_0(x)))
model(x)

tensor([[-0.0750,  0.0100,  0.0068,  0.0678, -0.0509,  0.0307,  0.0163,  0.0675,
         -0.0730, -0.0218],
        [-0.0088, -0.0137,  0.0012,  0.0447, -0.0641,  0.0054, -0.0130,  0.0240,
         -0.0884,  0.0288],
        [-0.0220,  0.0503, -0.0186,  0.1189, -0.0717,  0.0893,  0.0617,  0.0566,
         -0.0444,  0.0046],
        [-0.0362,  0.0226, -0.0505,  0.0802, -0.0975,  0.0138, -0.0094,  0.0921,
         -0.0454,  0.0067]])

In [7]:
lora_model.disable_adapter()
lora_model.eval()
# lora_model.base_model.linear_0(x)
# lora_model.base_model.linear_1(lora_model.base_model.linear_0(x))
# lora_model.base_model.linear_1(lora_model.base_model.linear_1(lora_model.base_model.linear_0(x)))
lora_model(x)

tensor([[-0.0750,  0.0100,  0.0068,  0.0678, -0.0509,  0.0307,  0.0163,  0.0675,
         -0.0730, -0.0218],
        [-0.0088, -0.0137,  0.0012,  0.0447, -0.0641,  0.0054, -0.0130,  0.0240,
         -0.0884,  0.0288],
        [-0.0220,  0.0503, -0.0186,  0.1189, -0.0717,  0.0893,  0.0617,  0.0566,
         -0.0444,  0.0046],
        [-0.0362,  0.0226, -0.0505,  0.0802, -0.0975,  0.0138, -0.0094,  0.0921,
         -0.0454,  0.0067]])

In [8]:
lora_model.enable_adapter()
lora_model(x)

tensor([[-217355.9375, -293397.1875,   38750.7031,  140768.0156,  -61815.7070,
          -49068.4023,  135163.4062,   23110.6602,  216566.7344, -532693.8750],
        [ -41830.3438,  -56958.0977,   18479.0781,    2725.8491,   21184.8359,
            9369.0840,   22974.5195,   12114.3467,   32646.7109,  -37039.6445],
        [-202302.9531, -282877.9375,   43790.0547,   22540.9668,   68376.0859,
           27912.7383,  126027.2109,   64679.0977,  176971.3125, -226896.2500],
        [-132472.2344, -170983.7812,   20667.1797,   55350.4727,   14791.7998,
           -1081.7465,   81299.0078,   26734.1914,  118614.3828, -230755.5156]],
       grad_fn=<AddBackward0>)

In [9]:
merged_model = lora_model.get_merged_model()
merged_model.eval()
merged_model(x)

tensor([[-217355.9375, -293397.4062,   38750.6953,  140768.0156,  -61815.7188,
          -49068.4375,  135163.4219,   23110.6562,  216566.7188, -532693.8750],
        [ -41830.3477,  -56958.1094,   18479.0703,    2725.8516,   21184.8555,
            9369.0898,   22974.5176,   12114.3535,   32646.7031,  -37039.6641],
        [-202302.9375, -282877.9062,   43790.0547,   22540.8750,   68376.2031,
           27912.7734,  126027.1953,   64679.1836,  176971.3281, -226896.0312],
        [-132472.1562, -170983.8281,   20667.1250,   55350.4297,   14791.7969,
           -1081.7734,   81299.0312,   26734.2031,  118614.3906, -230755.4062]],
       grad_fn=<AddmmBackward0>)

In [15]:
lora_model.lora_module_names

['linear_0', 'linear_1', 'linear_2', 'output']