In [60]:
from lora import LoraLinear, SharedLoraLinear
from peft import PeftConfig, PeftModel
import torch.nn as nn
import torch
import torch.nn.functional as F
import json
from transformers import AutoTokenizer, AutoModelForCausalLM

In [61]:
opt_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m")
# opt_model

In [62]:
model_state_dict = opt_model.state_dict()
# model_state_dict

In [63]:
opt_config = json.load(open('/data/chris/adapters/peft-opt125m-dummylora/adapter_config.json'))
opt_config

{'base_model_name_or_path': 'facebook/opt-125m',
 'bias': 'none',
 'fan_in_fan_out': False,
 'inference_mode': True,
 'init_lora_weights': True,
 'layers_pattern': None,
 'layers_to_transform': None,
 'lora_alpha': 16,
 'lora_dropout': 0.05,
 'modules_to_save': None,
 'peft_type': 'LORA',
 'r': 8,
 'revision': None,
 'target_modules': ['q_proj', 'v_proj'],
 'task_type': 'CAUSAL_LM'}

In [64]:
# get the tensor for the 
weight = model_state_dict['model.decoder.layers.0.self_attn.v_proj.weight']
bias = model_state_dict['model.decoder.layers.0.self_attn.v_proj.bias']

In [65]:
adapter_weights_one = torch.load('/data/chris/adapters/peft-opt125m-dummylora/adapter_model.bin')
adapter_weights_one.keys()

dict_keys(['base_model.model.model.decoder.layers.0.self_attn.v_proj.lora_A.weight', 'base_model.model.model.decoder.layers.0.self_attn.v_proj.lora_B.weight', 'base_model.model.model.decoder.layers.0.self_attn.q_proj.lora_A.weight', 'base_model.model.model.decoder.layers.0.self_attn.q_proj.lora_B.weight', 'base_model.model.model.decoder.layers.1.self_attn.v_proj.lora_A.weight', 'base_model.model.model.decoder.layers.1.self_attn.v_proj.lora_B.weight', 'base_model.model.model.decoder.layers.1.self_attn.q_proj.lora_A.weight', 'base_model.model.model.decoder.layers.1.self_attn.q_proj.lora_B.weight', 'base_model.model.model.decoder.layers.2.self_attn.v_proj.lora_A.weight', 'base_model.model.model.decoder.layers.2.self_attn.v_proj.lora_B.weight', 'base_model.model.model.decoder.layers.2.self_attn.q_proj.lora_A.weight', 'base_model.model.model.decoder.layers.2.self_attn.q_proj.lora_B.weight', 'base_model.model.model.decoder.layers.3.self_attn.v_proj.lora_A.weight', 'base_model.model.model.dec

In [66]:
lora_A = adapter_weights_one["base_model.model.model.decoder.layers.0.self_attn.v_proj.lora_A.weight"]
lora_B = adapter_weights_one["base_model.model.model.decoder.layers.0.self_attn.v_proj.lora_B.weight"]

In [67]:
adapter_weights_two = torch.load('/data/chris/adapters/peft-opt125m-dummylora2/adapter_model.bin')
# adapter_weights_two.keys()

In [68]:
lora_A_two = adapter_weights_two["base_model.model.model.decoder.layers.0.self_attn.v_proj.lora_A.weight"]
lora_B_two= adapter_weights_two["base_model.model.model.decoder.layers.0.self_attn.v_proj.lora_B.weight"]

In [69]:
in_features = 768
out_features = 8
linear = SharedLoraLinear(in_features, out_features)

In [70]:
linear.update_layer("dummy-lora-one", 8, 16, 0.0)
linear.update_layer("dummy-lora-two", 8, 16, 0.0)

In [71]:
linear.weight = nn.Parameter(weight)
linear.bias = nn.Parameter(bias)

In [72]:
linear.loras

ModuleDict(
  (dummy-lora-one): LoraLinear(
    in_features=768, out_features=8, bias=True
    (lora_dropout): Dropout(p=0.0, inplace=False)
    (lora_A): Linear(in_features=768, out_features=8, bias=False)
    (lora_B): Linear(in_features=8, out_features=768, bias=False)
  )
  (dummy-lora-two): LoraLinear(
    in_features=768, out_features=8, bias=True
    (lora_dropout): Dropout(p=0.0, inplace=False)
    (lora_A): Linear(in_features=768, out_features=8, bias=False)
    (lora_B): Linear(in_features=8, out_features=768, bias=False)
  )
)

In [73]:
linear.loras["dummy-lora-one"].lora_A.weight = nn.Parameter(lora_A.float())
linear.loras["dummy-lora-one"].lora_B.weight = nn.Parameter(lora_B.float())
linear.loras["dummy-lora-two"].lora_A.weight = nn.Parameter(lora_A_two.float())
linear.loras["dummy-lora-two"].lora_B.weight = nn.Parameter(lora_B_two.float())

In [77]:
model_input_one = torch.rand(1, 768).float()
model_input_two = torch.rand(1, 768).float()
# concat queries into one tensor

model_input = torch.cat((model_input_one, model_input_two), dim=0)
model_input.shape

torch.Size([2, 768])

In [84]:
%%time
result = linear.forward(model_input, ["dummy-lora-one", "dummy-lora-two"])
result.shape

CPU times: user 74.7 ms, sys: 0 ns, total: 74.7 ms
Wall time: 5.03 ms


torch.Size([2, 768])

In [85]:
%%time
scaling = 8/ 16
manual_result = F.linear(model_input_one, linear.weight, linear.bias)
manual_result += (linear.loras["dummy-lora-one"].lora_B((linear.loras["dummy-lora-one"].lora_A(model_input_one)))) * scaling

manual_result_two = F.linear(model_input_two, linear.weight, linear.bias)
manual_result_two += (linear.loras["dummy-lora-two"].lora_B((linear.loras["dummy-lora-two"].lora_A(model_input_two)))) * scaling

# concat manual result and manual result two
manual_final_result = torch.cat((manual_result, manual_result_two), dim=0)
manual_final_result

CPU times: user 236 ms, sys: 12.7 ms, total: 249 ms
Wall time: 21.5 ms


tensor([[-1.4346e-01,  1.4589e-04,  1.4613e-01,  ..., -1.1955e-01,
         -1.6227e-01,  8.0859e-02],
        [-2.9381e-03, -3.8881e-02, -9.4747e-02,  ...,  8.7724e-03,
         -1.6797e-01,  1.8147e-01]], grad_fn=<CatBackward0>)

In [86]:
torch.allclose(result, manual_final_result, atol=1e-06)

True

In [118]:
%%time 
model_inputs = torch.rand(100, 1, 768).float()
adapter_names = ["dummy-lora-one", "dummy-lora-two"] * 50
result = linear.forward(model_inputs, adapter_names)

CPU times: user 173 ms, sys: 5.05 ms, total: 178 ms
Wall time: 24.2 ms


In [119]:
%%time
manual_final_results = torch.zeros(100, 1, 768)
for i in range(100):
    result_i = F.linear(model_inputs[i], linear.weight, linear.bias) + (linear.loras[adapter_names[i]].lora_B((linear.loras[adapter_names[i]].lora_A(model_inputs[i])))) * scaling       
    manual_final_results[i] = result_i

torch.allclose(result, manual_final_results, atol=1e-06)

CPU times: user 8.47 s, sys: 159 ms, total: 8.63 s
Wall time: 609 ms


True