In [1]:
!conda activate cloudspace

In [2]:
import json
import cv2
import numpy as np

from torch.utils.data import Dataset


class MyDataset(Dataset):
    def __init__(self):
        self.data = []
        with open('./training/fill50k/prompt.json', 'rt') as f:
            for line in f:
                self.data.append(json.loads(line))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]

        source_filename = item['source']
        target_filename = item['target']
        prompt = item['prompt']

        source = cv2.imread('./training/fill50k/' + source_filename)
        target = cv2.imread('./training/fill50k/' + target_filename)

        # Do not forget that OpenCV read images in BGR order.
        source = cv2.cvtColor(source, cv2.COLOR_BGR2RGB)
        target = cv2.cvtColor(target, cv2.COLOR_BGR2RGB)

        # Normalize source images to [0, 1].
        source = source.astype(np.float32) / 255.0

        # Normalize target images to [-1, 1].
        target = (target.astype(np.float32) / 127.5) - 1.0

        return dict(jpg=target, txt=prompt, hint=source)


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from tutorial_dataset import MyDataset

dataset = MyDataset()
print(len(dataset))

item = dataset[999]
jpg = item['jpg']
txt = item['txt']
hint = item['hint']
print(txt)
print(jpg.shape)
print(hint.shape)


1000
brown circle with salmon background
(512, 512, 3)
(512, 512, 3)


In [4]:
!python --version

Python 3.8.20


In [6]:
!python tool_add_control_sd21.py ./models/v2-1_512-ema-pruned.ckpt ./models/control_sd21_ini.ckpt

logging improved.
No module 'xformers'. Proceeding without it.
ControlLDM: Running in eps-prediction mode
DiffusionWrapper has 865.91 M params.
making attention of type 'vanilla' with 512 in_channels
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla' with 512 in_channels
Loaded model config from [./models/cldm_v21.yaml]
These weights are newly added: logvar
These weights are newly added: control_model.zero_convs.0.0.weight
These weights are newly added: control_model.zero_convs.0.0.bias
These weights are newly added: control_model.zero_convs.1.0.weight
These weights are newly added: control_model.zero_convs.1.0.bias
These weights are newly added: control_model.zero_convs.2.0.weight
These weights are newly added: control_model.zero_convs.2.0.bias
These weights are newly added: control_model.zero_convs.3.0.weight
These weights are newly added: control_model.zero_convs.3.0.bias
These weights are newly added: control_model.zero_convs.4.0.weight
The

In [7]:
from share import *

import pytorch_lightning as pl
from torch.utils.data import DataLoader
from tutorial_dataset import MyDataset
from cldm.logger import ImageLogger
from cldm.model import create_model, load_state_dict


# Configs
resume_path = './models/control_sd21_ini.ckpt'
batch_size = 4
logger_freq = 300
learning_rate = 1e-5
sd_locked = True
only_mid_control = False


# First use cpu to load models. Pytorch Lightning will automatically move it to GPUs.
model = create_model('./models/cldm_v21.yaml').cpu()
model.load_state_dict(load_state_dict(resume_path, location='cpu'))
model.learning_rate = learning_rate
model.sd_locked = sd_locked
model.only_mid_control = only_mid_control


# Misc
dataset = MyDataset()
dataloader = DataLoader(dataset, num_workers=0, batch_size=batch_size, shuffle=True)
logger = ImageLogger(batch_frequency=logger_freq)
trainer = pl.Trainer(gpus=1, precision=16, callbacks=[logger],max_epochs=10)


# Train!
trainer.fit(model, dataloader)


logging improved.
No module 'xformers'. Proceeding without it.
ControlLDM: Running in eps-prediction mode
DiffusionWrapper has 865.91 M params.
making attention of type 'vanilla' with 512 in_channels
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla' with 512 in_channels
Loaded model config from [./models/cldm_v21.yaml]
Loaded state_dict from [./models/control_sd21_ini.ckpt]


Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_warn("You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.")
  rank_zero_deprecation(
  rank_zero_deprecation(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type                   | Params
-------------------------------------------------------------
0 | model             | DiffusionWrapper       | 865 M 
1 | first_stage_model | AutoencoderKL          | 83.7 M
2 | cond_stage_model  | FrozenOpenCLIPEmbedder | 354 M 
3 | control_model     | ControlNet             | 364 M 
-------------------------------------------------------------
1.2 B     Trainable params
437 M     Non-trainable params
1.7 B     Total params
3,335.651 Total estimated model params size (MB)
  rank_zero_warn(


Epoch 0:   0%|          | 0/250 [00:00<?, ?it/s] 



Data shape for DDIM sampling is (4, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:24,  2.03it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:23,  2.03it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:23,  2.03it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:22,  2.03it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:02<00:22,  2.03it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:21,  2.03it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:03<00:21,  2.03it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:20,  2.03it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:04<00:20,  2.03it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:04<00:19,  2.03it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:05<00:19,  2.03it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:05<00:18,  2.03it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:06<00:18,  2.03it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:06<00:17,  2.03it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 1:   0%|          | 0/250 [00:00<?, ?it/s, loss=0.00427, v_num=3, train/loss_simple_step=0.00558, train/loss_vlb_step=2.19e-5, train/loss_step=0.00558, global_step=249.0]          Data shape for DDIM sampling is (4, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:24,  1.99it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:01<00:24,  1.98it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:23,  1.98it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:02<00:23,  1.98it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:02<00:22,  1.98it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:03<00:22,  1.98it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:03<00:21,  1.98it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:04<00:21,  1.98it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:04<00:20,  1.98it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:05<00:20,  1.98it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:05<00:19,  1.97it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:06<00:19,  1.97it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:06<00:18,  1.97it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:07<00:18,  1.97it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 2:   0%|          | 0/250 [00:00<?, ?it/s, loss=0.00588, v_num=3, train/loss_simple_step=0.0198, train/loss_vlb_step=0.00155, train/loss_step=0.0198, global_step=499.0, train/loss_simple_epoch=0.00597, train/loss_vlb_epoch=0.000204, train/loss_epoch=0.00597]             Data shape for DDIM sampling is (4, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:24,  1.96it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:01<00:24,  1.96it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:24,  1.95it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:02<00:23,  1.95it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:02<00:23,  1.95it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:03<00:22,  1.94it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:03<00:22,  1.94it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:04<00:21,  1.94it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:04<00:21,  1.94it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:05<00:20,  1.94it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:05<00:20,  1.94it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:06<00:19,  1.94it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:06<00:19,  1.93it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:07<00:18,  1.93it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 3:   0%|          | 0/250 [00:00<?, ?it/s, loss=0.00664, v_num=3, train/loss_simple_step=0.00738, train/loss_vlb_step=4.9e-5, train/loss_step=0.00738, global_step=749.0, train/loss_simple_epoch=0.00599, train/loss_vlb_epoch=0.000123, train/loss_epoch=0.00599]            Data shape for DDIM sampling is (4, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:24,  1.98it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:01<00:24,  1.97it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:23,  1.97it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:02<00:23,  1.97it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:02<00:22,  1.98it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:03<00:22,  1.98it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:03<00:21,  1.98it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:04<00:21,  1.97it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:04<00:20,  1.97it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:05<00:20,  1.97it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:05<00:19,  1.97it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:06<00:19,  1.97it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:06<00:18,  1.97it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:07<00:18,  1.97it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 4:   0%|          | 0/250 [00:00<?, ?it/s, loss=0.00509, v_num=3, train/loss_simple_step=0.00691, train/loss_vlb_step=5.02e-5, train/loss_step=0.00691, global_step=999.0, train/loss_simple_epoch=0.00621, train/loss_vlb_epoch=0.000327, train/loss_epoch=0.00621]           Data shape for DDIM sampling is (4, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:25,  1.95it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:01<00:24,  1.94it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:24,  1.94it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:02<00:23,  1.93it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:02<00:23,  1.93it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:03<00:22,  1.93it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:03<00:22,  1.93it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:04<00:21,  1.92it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:04<00:21,  1.92it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:05<00:20,  1.92it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:05<00:20,  1.92it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:06<00:19,  1.92it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:06<00:19,  1.91it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:07<00:18,  1.91it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 5:   0%|          | 0/250 [00:00<?, ?it/s, loss=0.00499, v_num=3, train/loss_simple_step=0.00376, train/loss_vlb_step=1.49e-5, train/loss_step=0.00376, global_step=1249.0, train/loss_simple_epoch=0.00561, train/loss_vlb_epoch=0.000154, train/loss_epoch=0.00561]            Data shape for DDIM sampling is (4, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:25,  1.93it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:01<00:24,  1.93it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:24,  1.93it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:02<00:23,  1.92it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:02<00:23,  1.92it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:03<00:22,  1.92it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:03<00:22,  1.92it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:04<00:21,  1.92it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:04<00:21,  1.92it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:05<00:20,  1.92it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:05<00:20,  1.92it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:06<00:19,  1.92it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:06<00:19,  1.92it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:07<00:18,  1.91it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 6:   0%|          | 0/250 [00:00<?, ?it/s, loss=0.00558, v_num=3, train/loss_simple_step=0.00316, train/loss_vlb_step=1.12e-5, train/loss_step=0.00316, global_step=1499.0, train/loss_simple_epoch=0.0058, train/loss_vlb_epoch=0.000267, train/loss_epoch=0.0058]            Data shape for DDIM sampling is (4, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:24,  1.97it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:01<00:24,  1.96it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:23,  1.96it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:02<00:23,  1.96it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:02<00:23,  1.96it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:03<00:22,  1.96it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:03<00:22,  1.95it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:04<00:21,  1.95it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:04<00:20,  1.95it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:05<00:20,  1.95it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:05<00:19,  1.95it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:06<00:19,  1.95it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:06<00:18,  1.95it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:07<00:18,  1.95it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 7:   0%|          | 0/250 [00:00<?, ?it/s, loss=0.00583, v_num=3, train/loss_simple_step=0.00413, train/loss_vlb_step=1.55e-5, train/loss_step=0.00413, global_step=1749.0, train/loss_simple_epoch=0.00591, train/loss_vlb_epoch=0.000226, train/loss_epoch=0.00591]           Data shape for DDIM sampling is (4, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:24,  1.97it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:01<00:24,  1.96it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:23,  1.96it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:02<00:23,  1.96it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:02<00:23,  1.96it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:03<00:22,  1.95it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:03<00:22,  1.95it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:04<00:21,  1.95it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:04<00:21,  1.95it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:05<00:20,  1.95it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:05<00:20,  1.95it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:06<00:19,  1.95it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:06<00:18,  1.95it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:07<00:18,  1.95it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 8:   0%|          | 0/250 [00:00<?, ?it/s, loss=0.00586, v_num=3, train/loss_simple_step=0.0041, train/loss_vlb_step=2.23e-5, train/loss_step=0.0041, global_step=2e+3, train/loss_simple_epoch=0.00588, train/loss_vlb_epoch=0.000309, train/loss_epoch=0.00588]                Data shape for DDIM sampling is (4, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:24,  1.99it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:01<00:24,  1.99it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:23,  1.99it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:02<00:23,  1.98it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:02<00:22,  1.98it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:03<00:22,  1.98it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:03<00:21,  1.98it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:04<00:21,  1.98it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:04<00:20,  1.98it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:05<00:20,  1.98it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:05<00:19,  1.98it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:06<00:19,  1.97it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:06<00:18,  1.98it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:07<00:18,  1.97it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 9:   0%|          | 0/250 [00:00<?, ?it/s, loss=0.00481, v_num=3, train/loss_simple_step=0.00232, train/loss_vlb_step=8e-6, train/loss_step=0.00232, global_step=2249.0, train/loss_simple_epoch=0.00587, train/loss_vlb_epoch=0.000217, train/loss_epoch=0.00587]               Data shape for DDIM sampling is (4, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:24,  1.97it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:01<00:24,  1.96it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:23,  1.96it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:02<00:23,  1.96it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:02<00:23,  1.96it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:03<00:22,  1.96it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:03<00:21,  1.95it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:04<00:21,  1.95it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:04<00:21,  1.95it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:05<00:20,  1.95it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:05<00:19,  1.95it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:06<00:19,  1.95it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:06<00:18,  1.95it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:07<00:18,  1.95it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 9: 100%|██████████| 250/250 [05:33<00:00,  1.33s/it, loss=0.00526, v_num=3, train/loss_simple_step=0.00671, train/loss_vlb_step=5.24e-5, train/loss_step=0.00671, global_step=2499.0, train/loss_simple_epoch=0.00544, train/loss_vlb_epoch=0.000157, train/loss_epoch=0.00544] 
