In [1]:
import torch
from torch import nn
from models import MultiLayeredPerceptron as mlp
from lora_models import LoRAModel

# Testing LoRAModel

In [2]:
x = torch.randn(size=(4, 784))

In [3]:
model = mlp()
print(model)

num_trainable_parameters_in_model = 0
num_non_trainable_parameters_in_model = 0
for parameter in model.parameters():
    if parameter.requires_grad:
        num_trainable_parameters_in_model += parameter.numel()
    else:
        num_non_trainable_parameters_in_model += parameter.numel()
print(f'Num trainable/non-trainable parameters in model: {num_trainable_parameters_in_model}/{num_non_trainable_parameters_in_model}')

MultiLayeredPerceptron(
  (linear_0): Linear(in_features=784, out_features=512, bias=True)
  (dropout_0): Dropout(p=0.2, inplace=False)
  (relu_0): ReLU(inplace=True)
  (linear_1): Linear(in_features=512, out_features=512, bias=True)
  (dropout_1): Dropout(p=0.2, inplace=False)
  (relu_1): ReLU(inplace=True)
  (linear_2): Linear(in_features=512, out_features=512, bias=True)
  (dropout_2): Dropout(p=0.2, inplace=False)
  (relu_2): ReLU(inplace=True)
  (output): Linear(in_features=512, out_features=10, bias=True)
)
Num trainable/non-trainable parameters in model: 932362/0


In [4]:
model.eval()
# model.linear_0(x)
# model.linear_1(model.linear_0(x))
# model.linear_2(model.linear_1(model.linear_0(x)))
model(x)

tensor([[-0.0385, -0.0739, -0.0023, -0.0079,  0.0436, -0.0159,  0.0154,  0.0247,
         -0.0538,  0.0112],
        [-0.0350, -0.0507,  0.0483, -0.0785, -0.0365, -0.0622,  0.0421,  0.0132,
         -0.0086, -0.0257],
        [-0.0182, -0.0469,  0.0529, -0.0721,  0.0079, -0.0915,  0.0192, -0.0106,
         -0.0646,  0.0350],
        [-0.0076, -0.0508, -0.0328, -0.0568,  0.0235, -0.0257,  0.0242, -0.0563,
         -0.0564,  0.0637]], grad_fn=<AddmmBackward0>)

In [5]:
lora_model = LoRAModel()
lora_model.add_base_model(base_model=model)
lora_config = {
    'rank': 4,
    'alpha': 2,
    'delta_bias': True
}
lora_target_module_names = ['linear_*', 'output']
lora_model.build_new_adapter(lora_target_module_names=lora_target_module_names, lora_config=lora_config)
print(lora_model)

num_trainable_parameters_in_lora_model = 0
num_non_trainable_parameters_in_lora_model = 0
for parameter in lora_model.parameters():
    if parameter.requires_grad:
        num_trainable_parameters_in_lora_model += parameter.numel()
    else:
        num_non_trainable_parameters_in_lora_model += parameter.numel()
print(f'Num trainable/non-trainable parameters in LoRA Model: {num_trainable_parameters_in_lora_model}/{num_non_trainable_parameters_in_lora_model}')

MultiLayeredPerceptron(
  (linear_0): LoRALinear(Linear(in_features=784, out_features=512, bias=True) + ((α=2/r=4) × Adapter(in_features=784, rank=4, out_features=512, delta_bias=True)))
  (dropout_0): Dropout(p=0.2, inplace=False)
  (relu_0): ReLU(inplace=True)
  (linear_1): LoRALinear(Linear(in_features=512, out_features=512, bias=True) + ((α=2/r=4) × Adapter(in_features=512, rank=4, out_features=512, delta_bias=True)))
  (dropout_1): Dropout(p=0.2, inplace=False)
  (relu_1): ReLU(inplace=True)
  (linear_2): LoRALinear(Linear(in_features=512, out_features=512, bias=True) + ((α=2/r=4) × Adapter(in_features=512, rank=4, out_features=512, delta_bias=True)))
  (dropout_2): Dropout(p=0.2, inplace=False)
  (relu_2): ReLU(inplace=True)
  (output): LoRALinear(Linear(in_features=512, out_features=10, bias=True) + ((α=2/r=4) × Adapter(in_features=512, rank=4, out_features=10, delta_bias=True)))
)
Num trainable/non-trainable parameters in LoRA Model: 17010/932370


In [6]:
lora_model.lora_module_names

['linear_0', 'linear_1', 'linear_2', 'output']

In [7]:
lora_model.disable_adapter()
lora_model.eval()
# lora_model.base_model.linear_0(x)
# lora_model.base_model.linear_1(lora_model.base_model.linear_0(x))
# lora_model.base_model.linear_1(lora_model.base_model.linear_1(lora_model.base_model.linear_0(x)))
lora_model(x)

tensor([[-0.0385, -0.0739, -0.0023, -0.0079,  0.0436, -0.0159,  0.0154,  0.0247,
         -0.0538,  0.0112],
        [-0.0350, -0.0507,  0.0483, -0.0785, -0.0365, -0.0622,  0.0421,  0.0132,
         -0.0086, -0.0257],
        [-0.0182, -0.0469,  0.0529, -0.0721,  0.0079, -0.0915,  0.0192, -0.0106,
         -0.0646,  0.0350],
        [-0.0076, -0.0508, -0.0328, -0.0568,  0.0235, -0.0257,  0.0242, -0.0563,
         -0.0564,  0.0637]])

In [8]:
lora_model.enable_adapter()
lora_model(x)

tensor([[ -87049.5547, -106407.2656,  -21747.7207, -179911.1250,  -81629.9609,
          234928.7500,   25671.4902,   54547.1289, -157710.0938,  190898.9062],
        [  -7636.5898,  -26221.3164,  -26860.4238,   -5352.1958,  -25560.0000,
           57498.2383,   -2451.0200,  -42766.2734,  -26559.9395,   29143.9199],
        [ -94373.1641, -124760.3281,  -61997.8594, -174638.4688,  -93002.6484,
          293159.3438,    9188.9229,   17679.0898, -181154.5625,  203632.7344],
        [ -23100.7070,  -16771.1582,   10287.8262,  -41455.0312,  -15927.0791,
           38074.0742,    8886.5898,   21427.4922,  -29487.0918,   41675.3633]],
       grad_fn=<AddBackward0>)

In [9]:
merged_model = lora_model.get_merged_model()
merged_model.eval()
merged_model(x)

tensor([[ -87049.5625, -106407.2500,  -21747.7402, -179911.0938,  -81629.9688,
          234928.7188,   25671.5000,   54547.1172, -157710.1250,  190898.9062],
        [  -7636.5918,  -26221.3125,  -26860.3906,   -5352.2051,  -25560.0176,
           57498.2148,   -2450.9971,  -42766.2617,  -26559.9336,   29143.9238],
        [ -94373.1953, -124760.3359,  -61997.8320, -174638.5625,  -93002.6094,
          293159.3750,    9188.8838,   17679.1406, -181154.5938,  203632.7500],
        [ -23100.6914,  -16771.1562,   10287.8164,  -41455.0469,  -15927.0859,
           38074.0859,    8886.5957,   21427.4844,  -29487.0820,   41675.3672]],
       grad_fn=<AddmmBackward0>)