In [1]:
%cd ..

/home/akkirr/annotated-diffusion


In [2]:
import mylora
import torch
import mylib
from torch import nn

In [3]:
class Attention(nn.Module):
    def __init__(self):
        super().__init__()
        self.QKV = nn.Linear(1, 1)
        self.C = nn.Linear(1, 1)
        self.lrelu = nn.LeakyReLU()

    def forward(self, x):
        return self.C(self.lrelu(self.QKV(x)))


class TimeEmbedding(nn.Module):
    def __init__(self):
        super().__init__()
        self.time_proj = nn.Linear(1, 1)
        self.lrelu = nn.LeakyReLU()

    def forward(self, x):
        return self.lrelu(self.time_proj(x))


class A(nn.Module):
    def __init__(self):
        super().__init__()
        self.just_linear = nn.Linear(1, 1)
        self.attn = Attention()
        self.time_embedder = TimeEmbedding()

    def forward(self, x):
        return self.attn(self.just_linear(x) + self.time_embedder(x))

In [4]:
model = A()
mylora.inject_lora(
    model, 2, 0, ["Attention"], [nn.Linear], [mylora.LoraInjectedLinear], verbose=True
)
mylora.freeze_module(model)

Injected lora (1x2x1) in attn.QKV
Injected lora (1x2x1) in attn.C


In [5]:
list(mylora.get_lora_modules(model))

[('attn.QKV',
  LoraInjectedLinear(
    (src_linear): Linear(in_features=1, out_features=1, bias=True)
    (lora_down): Linear(in_features=1, out_features=2, bias=False)
    (lora_up): Linear(in_features=2, out_features=1, bias=False)
    (dropout_layer): Dropout1d(p=0, inplace=False)
  )),
 ('attn.C',
  LoraInjectedLinear(
    (src_linear): Linear(in_features=1, out_features=1, bias=True)
    (lora_down): Linear(in_features=1, out_features=2, bias=False)
    (lora_up): Linear(in_features=2, out_features=1, bias=False)
    (dropout_layer): Dropout1d(p=0, inplace=False)
  ))]

In [6]:
list(mylora.get_lora_parameters(model))

[('attn.QKV.lora_down.weight',
  Parameter containing:
  tensor([[-0.6649],
          [ 0.8149]])),
 ('attn.QKV.lora_up.weight',
  Parameter containing:
  tensor([[0., 0.]])),
 ('attn.C.lora_down.weight',
  Parameter containing:
  tensor([[ 0.5499],
          [-0.2450]])),
 ('attn.C.lora_up.weight',
  Parameter containing:
  tensor([[0., 0.]]))]

In [7]:
p = model.get_parameter('attn.QKV.lora_up.weight')
p.requires_grad = True
print(model.get_parameter('attn.QKV.lora_up.weight').requires_grad)
p.requires_grad = False
print(model.get_parameter('attn.QKV.lora_up.weight').requires_grad)

True
False


In [8]:
mylib.set_all_seeds(0)
model = A()
mylora.inject_lora(
    model, 2, 0, ["Attention"], [nn.Linear], [mylora.LoraInjectedLinear], verbose=True
)
mylora.freeze_module(model)
model.get_parameter('attn.QKV.lora_down.weight')

Injected lora (1x2x1) in attn.QKV
Injected lora (1x2x1) in attn.C


Parameter containing:
tensor([[0.2017],
        [0.4190]])

In [9]:
mylora.save_lora(model, 'tmp/lora1.pt')

In [10]:
mylib.set_all_seeds(1)
model = A()
mylora.inject_lora(
    model, 2, 0, ["Attention"], [nn.Linear], [mylora.LoraInjectedLinear], verbose=True
)
mylora.freeze_module(model)
model.get_parameter('attn.QKV.lora_down.weight')

Injected lora (1x2x1) in attn.QKV
Injected lora (1x2x1) in attn.C


Parameter containing:
tensor([[-0.7614],
        [ 0.1908]])

In [11]:
mylora.load_lora(model, 'tmp/lora1.pt')
model.get_parameter('attn.QKV.lora_down.weight')

Parameter containing:
tensor([[0.2017],
        [0.4190]])

In [12]:
mylora.model_summary(model)

total layers:                12
trainable layers:             0
frozen layers:               12

total params:                16
trainable params:             0
frozen params:               16


In [13]:
mylora.unfreeze_lora(model)
mylora.model_summary(model)

total layers:                12
trainable layers:             4
frozen layers:                8

total params:                16
trainable params:             8
frozen params:                8


In [14]:
mylora.set_scale(model, 0.23)

In [18]:
model.get_submodule('attn.QKV').scale

0.23