In [1]:
%load_ext autoreload
%autoreload 2
%cd ..

/home/akkirr/annotated-diffusion


In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from pathlib import Path

import torch
from torch import nan_to_num
from torchvision import transforms as T
from torch.utils.data import DataLoader

import numpy as np
from PIL import Image
import requests

from datasets import load_dataset
from torchvision.utils import save_image
from torch.optim import Adam

from copy import deepcopy

In [3]:
from mylib import *
import mylora

In [4]:
sampler = Sampler(linear_beta_schedule, 300)

In [5]:
settings = Settings(
    results_folder = Path("./4-results-lora-exps-2"),
    image_size = 28,
    channels = 1,
    batch_size = 128,
    device = "cuda" if torch.cuda.is_available() else "cpu",
    checkpoint = 'checkpoints/2-trained-on-1:last.pt'
)
settings

{
    "results_folder": "PosixPath('4-results-lora-exps-2')",
    "image_size": 28,
    "channels": 1,
    "batch_size": 128,
    "device": "cuda",
    "checkpoint": "checkpoints/2-trained-on-1:last.pt"
}

In [42]:
settings.results_folder.mkdir(exist_ok = True)

In [6]:
dataset = load_dataset("fashion_mnist")
# define image transformations (e.g. using torchvision)
transform = Compose([
            T.RandomHorizontalFlip(),
            T.ToTensor(),
            T.Lambda(lambda t: (t * 2) - 1)
])

# define function
def transforms(examples):
   examples["pixel_values"] = [transform(image.convert("L")) for image in examples["image"]]
   del examples["image"]

   return examples

transformed_dataset = dataset.with_transform(transforms).filter(lambda x: x['label'] == 0).remove_columns("label")

# create dataloader
dataloader = DataLoader(transformed_dataset["train"], batch_size=settings.batch_size, shuffle=True)

Found cached dataset fashion_mnist (/home/akkirr/.cache/huggingface/datasets/fashion_mnist/fashion_mnist/1.0.0/0a671f063342996f19779d38c0ab4abef9c64f757b35af8134b331c294d7ba48)


  0%|          | 0/2 [00:00<?, ?it/s]

Loading cached processed dataset at /home/akkirr/.cache/huggingface/datasets/fashion_mnist/fashion_mnist/1.0.0/0a671f063342996f19779d38c0ab4abef9c64f757b35af8134b331c294d7ba48/cache-e1de293369d54b3d.arrow
Loading cached processed dataset at /home/akkirr/.cache/huggingface/datasets/fashion_mnist/fashion_mnist/1.0.0/0a671f063342996f19779d38c0ab4abef9c64f757b35af8134b331c294d7ba48/cache-99238c540bf086fb.arrow


# Train Lora

### Mylora 1

In [15]:
set_all_seeds()
model = Unet(
    dim=settings.image_size,
    channels=settings.channels,
    dim_mults=(1, 2, 4,)
)
model.load_state_dict(torch.load(settings.checkpoint))

mylora.inject_lora(
    model, 1, 0.1,
    ['LinearAttention'],
    [nn.Conv2d]
)
model.to(settings.device)

mylora.freeze_lora(model)
print()
mylora.model_summary(model)

Injected lora    28 x 1 x 384   in downs.0.2.fn.fn.to_qkv
Injected lora   128 x 1 x 28    in downs.0.2.fn.fn.0
Injected lora    28 x 1 x 384   in downs.1.2.fn.fn.to_qkv
Injected lora   128 x 1 x 28    in downs.1.2.fn.fn.0
Injected lora    56 x 1 x 384   in downs.2.2.fn.fn.to_qkv
Injected lora   128 x 1 x 56    in downs.2.2.fn.fn.0
Injected lora   112 x 1 x 384   in ups.0.2.fn.fn.to_qkv
Injected lora   128 x 1 x 112   in ups.0.2.fn.fn.0
Injected lora    56 x 1 x 384   in ups.1.2.fn.fn.to_qkv
Injected lora   128 x 1 x 56    in ups.1.2.fn.fn.0
Injected lora    28 x 1 x 384   in ups.2.2.fn.fn.to_qkv
Injected lora   128 x 1 x 28    in ups.2.2.fn.fn.0

trainable layers:            24
frozen layers:              231
total params:           2023945


In [16]:
optimizer = Adam(model.parameters(), lr=1e-3)
train(model, optimizer, dataloader, sampler, settings, epochs=10)

sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03347987309098244


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.032416168600320816


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03221919387578964


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03209693729877472


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.032007846981287


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03193720802664757


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03187677264213562


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.031828027218580246


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03178706765174866


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03175035119056702


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

In [20]:
%%bash
DIR=4-results-lora-exps-2
ffmpeg -f image2 -framerate 7 -i $DIR/sample-%d.png -loop -0 $DIR/sample.gif

ffmpeg version 4.3 Copyright (c) 2000-2020 the FFmpeg developers
  built with gcc 7.3.0 (crosstool-NG 1.23.0.449-a04d0)
  configuration: --prefix=/opt/conda/conda-bld/ffmpeg_1597178665428/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeh --cc=/opt/conda/conda-bld/ffmpeg_1597178665428/_build_env/bin/x86_64-conda_cos6-linux-gnu-cc --disable-doc --disable-openssl --enable-avresample --enable-gnutls --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame
  libavutil      56. 51.100 / 56. 51.100
  libavcodec     58. 91.100 / 58. 91.100
  libavformat    58. 45.100 / 58. 45.100
  libavdevice    58. 10.100 / 58. 10.100
  libavfilter     7. 85.100 /  7. 85.100
  libavresample   4.  0.  0 /  4.  0.  0
  libsw

### Official lora

In [47]:
import lora_diffusion

set_all_seeds()
model = Unet(
    dim=settings.image_size,
    channels=settings.channels,
    dim_mults=(1, 2, 4,)
)
model.load_state_dict(torch.load(settings.checkpoint))

model.requires_grad_(False)
unet_lora_params, train_names = lora_diffusion.inject_trainable_lora_extended(
    model,
    target_replace_module=['LinearAttention'],
    r=1,
)
model.to(settings.device)

print()
mylora.model_summary(model)


trainable layers:            24
frozen layers:              231
total params:           2023945


In [50]:
settings.results_folder = Path("./5-results-lora-official")
settings.results_folder.mkdir(exist_ok=True)
settings

{
    "results_folder": "PosixPath('5-results-lora-official')",
    "image_size": 28,
    "channels": 1,
    "batch_size": 128,
    "device": "cuda",
    "checkpoint": "checkpoints/2-trained-on-1:last.pt"
}

In [51]:
optimizer = Adam(model.parameters(), lr=1e-3)
train(model, optimizer, dataloader, sampler, settings, epochs=10)

sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03206548094749451


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03116554580628872


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.031047526746988297


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.030967460945248604


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03090790845453739


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03086230717599392


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.030824465677142143


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.0307917483150959


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.030763352289795876


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03073890693485737


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

In [52]:
%%bash
DIR=5-results-lora-official
ffmpeg -f image2 -framerate 7 -i $DIR/sample-%d.png -loop -0 $DIR/sample.gif

ffmpeg version 4.3 Copyright (c) 2000-2020 the FFmpeg developers
  built with gcc 7.3.0 (crosstool-NG 1.23.0.449-a04d0)
  configuration: --prefix=/opt/conda/conda-bld/ffmpeg_1597178665428/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeh --cc=/opt/conda/conda-bld/ffmpeg_1597178665428/_build_env/bin/x86_64-conda_cos6-linux-gnu-cc --disable-doc --disable-openssl --enable-avresample --enable-gnutls --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame
  libavutil      56. 51.100 / 56. 51.100
  libavcodec     58. 91.100 / 58. 91.100
  libavformat    58. 45.100 / 58. 45.100
  libavdevice    58. 10.100 / 58. 10.100
  libavfilter     7. 85.100 /  7. 85.100
  libavresample   4.  0.  0 /  4.  0.  0
  libsw

### Dropout instead Dropout2d

In [53]:
set_all_seeds()
model = Unet(
    dim=settings.image_size,
    channels=settings.channels,
    dim_mults=(1, 2, 4,)
)
model.load_state_dict(torch.load(settings.checkpoint))

mylora.inject_lora(
    model, 1, 0.1,
    ['LinearAttention'],
    [nn.Conv2d]
)
model.to(settings.device)

mylora.freeze_lora(model)
print()
mylora.model_summary(model)

Injected lora    28 x 1 x 384   in downs.0.2.fn.fn.to_qkv
Injected lora   128 x 1 x 28    in downs.0.2.fn.fn.0
Injected lora    28 x 1 x 384   in downs.1.2.fn.fn.to_qkv
Injected lora   128 x 1 x 28    in downs.1.2.fn.fn.0
Injected lora    56 x 1 x 384   in downs.2.2.fn.fn.to_qkv
Injected lora   128 x 1 x 56    in downs.2.2.fn.fn.0
Injected lora   112 x 1 x 384   in ups.0.2.fn.fn.to_qkv
Injected lora   128 x 1 x 112   in ups.0.2.fn.fn.0
Injected lora    56 x 1 x 384   in ups.1.2.fn.fn.to_qkv
Injected lora   128 x 1 x 56    in ups.1.2.fn.fn.0
Injected lora    28 x 1 x 384   in ups.2.2.fn.fn.to_qkv
Injected lora   128 x 1 x 28    in ups.2.2.fn.fn.0

trainable layers:            24
frozen layers:              231
total params:           2023945


In [55]:
settings.results_folder = Path("./6-results-lora-dropout0d")
settings.results_folder.mkdir(exist_ok=True)
settings

{
    "results_folder": "PosixPath('6-results-lora-dropout0d')",
    "image_size": 28,
    "channels": 1,
    "batch_size": 128,
    "device": "cuda",
    "checkpoint": "checkpoints/2-trained-on-1:last.pt"
}

In [56]:
optimizer = Adam(model.parameters(), lr=1e-3)
train(model, optimizer, dataloader, sampler, settings, epochs=10)

sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03206548094749451


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.031228838488459587


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.031087977811694145


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.031006233766674995


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.030953308567404747


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.030914517119526863


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03088298626244068


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03085593320429325


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.030830703675746918


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03080744668841362


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Лосс стал больше похожим на лосс официальной лоры: он стабилизировался на значении 0.307-0.308, а не 0.317

In [57]:
%%bash
DIR=6-results-lora-dropout0d
ffmpeg -f image2 -framerate 7 -i $DIR/sample-%d.png -loop -0 $DIR/sample.gif

ffmpeg version 4.3 Copyright (c) 2000-2020 the FFmpeg developers
  built with gcc 7.3.0 (crosstool-NG 1.23.0.449-a04d0)
  configuration: --prefix=/opt/conda/conda-bld/ffmpeg_1597178665428/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeh --cc=/opt/conda/conda-bld/ffmpeg_1597178665428/_build_env/bin/x86_64-conda_cos6-linux-gnu-cc --disable-doc --disable-openssl --enable-avresample --enable-gnutls --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame
  libavutil      56. 51.100 / 56. 51.100
  libavcodec     58. 91.100 / 58. 91.100
  libavformat    58. 45.100 / 58. 45.100
  libavdevice    58. 10.100 / 58. 10.100
  libavfilter     7. 85.100 /  7. 85.100
  libavresample   4.  0.  0 /  4.  0.  0
  libsw

Еще генерации на 10 эпохе модели с Dropout0d и официальной лоры практически идентичны

мне генерации dropout0d нравятся больше: картинки более ровные, без артефактов

### rank=2

In [58]:
set_all_seeds()
model = Unet(
    dim=settings.image_size,
    channels=settings.channels,
    dim_mults=(1, 2, 4,)
)
model.load_state_dict(torch.load(settings.checkpoint))

mylora.inject_lora(
    model, 4, 0.1,
    ['LinearAttention'],
    [nn.Conv2d]
)
model.to(settings.device)

mylora.freeze_lora(model)
print()
mylora.model_summary(model)

Injected lora    28 x 4 x 384   in downs.0.2.fn.fn.to_qkv
Injected lora   128 x 4 x 28    in downs.0.2.fn.fn.0
Injected lora    28 x 4 x 384   in downs.1.2.fn.fn.to_qkv
Injected lora   128 x 4 x 28    in downs.1.2.fn.fn.0
Injected lora    56 x 4 x 384   in downs.2.2.fn.fn.to_qkv
Injected lora   128 x 4 x 56    in downs.2.2.fn.fn.0
Injected lora   112 x 4 x 384   in ups.0.2.fn.fn.to_qkv
Injected lora   128 x 4 x 112   in ups.0.2.fn.fn.0
Injected lora    56 x 4 x 384   in ups.1.2.fn.fn.to_qkv
Injected lora   128 x 4 x 56    in ups.1.2.fn.fn.0
Injected lora    28 x 4 x 384   in ups.2.2.fn.fn.to_qkv
Injected lora   128 x 4 x 28    in ups.2.2.fn.fn.0

trainable layers:            24
frozen layers:              231
total params:           2035009


In [59]:
settings.results_folder = Path("./7-results-rank=2")
settings.results_folder.mkdir(exist_ok=True)

optimizer = Adam(model.parameters(), lr=1e-3)
train(model, optimizer, dataloader, sampler, settings, epochs=10)

sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03206548094749451


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.031112419441342354


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.030909627676010132


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.030770281329751015


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.030660543590784073


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03056148625910282


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03047545999288559


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03040124475955963


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.030336471274495125


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03027975745499134


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

In [60]:
%%bash
DIR=7-results-rank=4
ffmpeg -f image2 -framerate 7 -i $DIR/sample-%d.png -loop -0 $DIR/sample.gif

ffmpeg version 4.3 Copyright (c) 2000-2020 the FFmpeg developers
  built with gcc 7.3.0 (crosstool-NG 1.23.0.449-a04d0)
  configuration: --prefix=/opt/conda/conda-bld/ffmpeg_1597178665428/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeh --cc=/opt/conda/conda-bld/ffmpeg_1597178665428/_build_env/bin/x86_64-conda_cos6-linux-gnu-cc --disable-doc --disable-openssl --enable-avresample --enable-gnutls --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame
  libavutil      56. 51.100 / 56. 51.100
  libavcodec     58. 91.100 / 58. 91.100
  libavformat    58. 45.100 / 58. 45.100
  libavdevice    58. 10.100 / 58. 10.100
  libavfilter     7. 85.100 /  7. 85.100
  libavresample   4.  0.  0 /  4.  0.  0
  libsw

Как будто бы нет разницы с dropout0d

### Dropout0d, rank=1, dropout=0.4

In [7]:
set_all_seeds()
model = Unet(
    dim=settings.image_size,
    channels=settings.channels,
    dim_mults=(1, 2, 4,)
)
model.load_state_dict(torch.load(settings.checkpoint))

mylora.inject_lora(
    model, 1, 0.4,
    ['LinearAttention'],
    [nn.Conv2d]
)
model.to(settings.device)

mylora.freeze_lora(model)
print()
mylora.model_summary(model)

Injected lora    28 x 1 x 384   in downs.0.2.fn.fn.to_qkv
Injected lora   128 x 1 x 28    in downs.0.2.fn.fn.0
Injected lora    28 x 1 x 384   in downs.1.2.fn.fn.to_qkv
Injected lora   128 x 1 x 28    in downs.1.2.fn.fn.0
Injected lora    56 x 1 x 384   in downs.2.2.fn.fn.to_qkv
Injected lora   128 x 1 x 56    in downs.2.2.fn.fn.0
Injected lora   112 x 1 x 384   in ups.0.2.fn.fn.to_qkv
Injected lora   128 x 1 x 112   in ups.0.2.fn.fn.0
Injected lora    56 x 1 x 384   in ups.1.2.fn.fn.to_qkv
Injected lora   128 x 1 x 56    in ups.1.2.fn.fn.0
Injected lora    28 x 1 x 384   in ups.2.2.fn.fn.to_qkv
Injected lora   128 x 1 x 28    in ups.2.2.fn.fn.0

trainable layers:            24
frozen layers:              231
total params:           2023945


In [8]:
settings.results_folder = Path("./8-results_rank=1_dropout=0.1")
settings.results_folder.mkdir(exist_ok=True)

optimizer = Adam(model.parameters(), lr=1e-3)
train(model, optimizer, dataloader, sampler, settings, epochs=10)

sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03206548094749451


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03126571699976921


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.031140146777033806


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03107609413564205


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03103799745440483


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03100646659731865


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.030982419848442078


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.03095947578549385


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.030934074893593788


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

Loss: 0.030912069603800774


sampling loop time step:   0%|          | 0/300 [00:00<?, ?it/s]

In [10]:
%%bash
DIR=8-results_rank=1_dropout=0.1
ffmpeg -f image2 -framerate 7 -i $DIR/sample-%d.png -loop -0 $DIR/sample.gif

ffmpeg version 4.3 Copyright (c) 2000-2020 the FFmpeg developers
  built with gcc 7.3.0 (crosstool-NG 1.23.0.449-a04d0)
  configuration: --prefix=/opt/conda/conda-bld/ffmpeg_1597178665428/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeh --cc=/opt/conda/conda-bld/ffmpeg_1597178665428/_build_env/bin/x86_64-conda_cos6-linux-gnu-cc --disable-doc --disable-openssl --enable-avresample --enable-gnutls --enable-hardcoded-tables --enable-libfreetype --enable-libopenh264 --enable-pic --enable-pthreads --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libmp3lame
  libavutil      56. 51.100 / 56. 51.100
  libavcodec     58. 91.100 / 58. 91.100
  libavformat    58. 45.100 / 58. 45.100
  libavdevice    58. 10.100 / 58. 10.100
  libavfilter     7. 85.100 /  7. 85.100
  libavresample   4.  0.  0 /  4.  0.  0
  libsw

По качеству почти неразличимы, но стало чуть меньше артефактов