In [1]:
import os

os.chdir("/root/dev/hf/diffusers/examples/triplane_diffusion")
os.getcwd()

'/root/dev/hf/diffusers/examples/triplane_diffusion'

In [2]:
!gpustat

[1m[37m6f37a742720d              [m  Fri Apr 12 15:43:31 2024  [1m[30m525.89.02[m
[36m[0][m [34mNVIDIA GeForce RTX 4090[m |[31m 42°C[m, [32m 20 %[m | [36m[1m[33m 1033[m / [33m24564[m MB |
[36m[1][m [34mNVIDIA GeForce RTX 4090[m |[31m 38°C[m, [32m  0 %[m | [36m[1m[33m   10[m / [33m24564[m MB |


In [3]:
import torch

torch.cuda.set_device(1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Huggingface PEFT tutorial

In [4]:
import peft
import accelerate
import datasets
import transformers
import diffusers

print("peft version: ", peft.__version__)
print("accelerate version: ", accelerate.__version__)
print("datasets version: ", datasets.__version__)
print("transformers version: ", transformers.__version__)
print("diffusers version: ", diffusers.__version__)

peft version:  0.10.0
accelerate version:  0.29.1
datasets version:  2.18.0
transformers version:  4.39.3
diffusers version:  0.27.2


In [5]:
from diffusers import UNet2DConditionModel

# unet = UNet2DConditionModel.from_pretrained(
#     "stabilityai/stable-diffusion-xl-base-1.0", subfolder="unet"
# )
unet = UNet2DConditionModel.from_pretrained(
    "stabilityai/stable-diffusion-2-base", subfolder="unet"
)

In [6]:
# unet.train()

In [7]:
# unet.to(device)
# x = torch.rand((8, 4, 64, 64)).to(device)
# time_step = torch.randn((8,)).to(device)
# enc_h = torch.rand((8, 77, 1024)).to(device)
# output = unet(x, timestep=time_step, encoder_hidden_states=enc_h).sample

In [8]:
model_size = 0
for param in unet.parameters():
    model_size += param.data.nelement()
print("trainable params: ", model_size)

trainable params:  865910724


In [9]:
# find modules
def find_modules(module, module_types):
    matching_modules = []
    for name, mod in module.named_modules():
        if isinstance(mod, module_types):
            module_name = name.split(".")[-1]
            if len(module_name) == 1:
                # print(name)
                module_name = name
            matching_modules.append(module_name)
    return matching_modules

In [10]:
module_types = (torch.nn.Linear, torch.nn.Embedding, torch.nn.Conv2d)
target_modules = find_modules(unet, module_types)
target_modules = list(set(target_modules))
print(target_modules)

['up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0', 'conv_out', 'up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2', 'down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0', 'up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0', 'up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0', 'conv_shortcut', 'proj_in', 'up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0', 'up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.0', 'up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0', 'down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0', 'down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2', 'to_v', 'down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0', 'down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2', 'time_emb_proj', 'conv1', 'up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2', 'linear_2', 'up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2', 'mid_block.attentions.0.transformer_b

## LoRA

In [6]:
from peft import LoraConfig, get_peft_model

unet_lora = unet.to(device)

# freeze params of models to save more memory
unet_lora.requires_grad_(False)

config = LoraConfig(
    r=4,
    lora_alpha=4,
    lora_dropout=0.1,
    target_modules=["to_q", "to_k", "to_v", "to_out.0"],
    init_lora_weights= "gaussian",
    bias="none"
) # scale = alpha / r

unet_lora.add_adapter(config)
lora_layers = filter(lambda p: p.requires_grad, unet.parameters())

trainable_params = 0
all_params = 0
for _, param in unet.named_parameters():
    num_params = param.numel()
    all_params += num_params
    if param.requires_grad:
        trainable_params += num_params
print(f"trainable params: {trainable_params:,d} || all params: {all_params:,d} || trainable%: {100 * trainable_params / all_params}")
unet_lora.train();

trainable params: 829,952 || all params: 866,740,676 || trainable%: 0.09575551522864031


In [7]:
x = torch.rand((8, 4, 64, 64)).to(device)
time_step = torch.randn((8,)).to(device)
enc_h = torch.rand((8, 77, 1024)).to(device)
output = unet_lora(x, timestep=time_step, encoder_hidden_states=enc_h).sample

## LoKr (slightly different to KAdaptation)

In [6]:
from peft import LoKrConfig, get_peft_model

unet_lokr = unet.to(device)

# freeze params of models to save more memory
unet_lokr.requires_grad_(False)

config = LoKrConfig(
    r=4,
    alpha=4,
    # rank_dropout=0.1,
    module_dropout=0.1,
    use_effective_conv2d=True,
    target_modules=["to_q", "to_k", "to_v", "to_out.0"]
)

unet_lokr.add_adapter(config)
lokr_layers = filter(lambda p: p.requires_grad, unet.parameters())

trainable_params = 0
all_params = 0
for _, param in unet.named_parameters():
    num_params = param.numel()
    all_params += num_params
    if param.requires_grad:
        trainable_params += num_params
print(f"trainable params: {trainable_params:,d} || all params: {all_params:,d} || trainable%: {100 * trainable_params / all_params}")
unet_lokr.train();
# unet = get_peft_model(unet, config).to(device)
# unet.print_trainable_parameters()
# unet.train();

trainable params: 112,448 || all params: 866,023,172 || trainable%: 0.012984410075346113


In [7]:
x = torch.rand((8, 4, 64, 64)).to(device)
time_step = torch.randn((8,)).to(device)
enc_h = torch.rand((8, 77, 1024)).to(device)
output = unet(x, timestep=time_step, encoder_hidden_states=enc_h).sample

# Layer replication

In [14]:
torch.tensor([0,1,0,2,0,3,1,2,1,3,2,3], dtype=torch.long, device='cuda').shape

torch.Size([12])