In [1]:
INPUT_FOLDER = "./images"
OUTPUT_FOLDER = "./output"
TRIGGER_WORD = "teleski"
LORA_RANK = 16
BATCHSIZE = 1
LEARNING_RATE = 0.0001
STEPS_TRAIN = 3000
STEPS_SAVE = 250
STEPS_SAMPLE = 250

In [2]:
from collections import OrderedDict

job_to_run = OrderedDict([
    ('job', 'extension'),
    ('config', OrderedDict([
        # this name will be the folder and filename name
        ('name', 'my_first_flux_lora_v1'),
        ('process', [
            OrderedDict([
                ('type', 'sd_trainer'),
                ('training_folder', OUTPUT_FOLDER),
                ('performance_log_every', 100),
                ('device', 'cuda:0'),
                ('trigger_word', TRIGGER_WORD),
                ('network', OrderedDict([
                    ('type', 'lora'),
                    ('linear', LORA_RANK),
                    ('linear_alpha', LORA_RANK)
                ])),
                ('save', OrderedDict([
                    ('dtype', 'float16'),  # precision to save
                    ('save_every', STEPS_SAVE),  # save every this many steps
                    ('max_step_saves_to_keep', 10)  # how many intermittent saves to keep
                ])),
                ('datasets', [
                    # datasets are a folder of images. captions need to be txt files with the same name as the image
                    # for instance image2.jpg and image2.txt. Only jpg, jpeg, and png are supported currently
                    # images will automatically be resized and bucketed into the resolution specified
                    OrderedDict([
                        ('folder_path', INPUT_FOLDER),
                        ('caption_ext', 'txt'),
                        ('caption_dropout_rate', 0.05),  # will drop out the caption 5% of time
                        ('shuffle_tokens', False),  # shuffle caption order, split by commas
                        ('cache_latents_to_disk', True),  # leave this true unless you know what you're doing
                        ('resolution', [512, 768, 1024])  # flux enjoys multiple resolutions
                    ])
                ]),
                ('train', OrderedDict([
                    ('batch_size', BATCHSIZE),
                    ('steps', STEPS_TRAIN),  # total number of steps to train 500 - 4000 is a good range
                    ('gradient_accumulation_steps', 1),
                    ('train_unet', True),
                    ('train_text_encoder', False),  # probably won't work with flux
                    ('content_or_style', 'balanced'),  # content, style, balanced
                    ('gradient_checkpointing', True),  # need the on unless you have a ton of vram
                    ('noise_scheduler', 'flowmatch'),  # for training only
                    ('optimizer', 'adamw8bit'),
                    ('lr', LEARNING_RATE),
                    # ema will smooth out learning, but could slow it down. Recommended to leave on.
                    ('ema_config', OrderedDict([
                        ('use_ema', True),
                        ('ema_decay', 0.99)
                    ])),
                    # will probably need this if gpu supports it for flux, other dtypes may not work correctly
                    ('dtype', 'bf16')
                ])),
                ('model', OrderedDict([
                    # huggingface model name or path
                    ('name_or_path', 'black-forest-labs/FLUX.1-dev'),
                    ('is_flux', True),
                    ('quantize', True),  # run 8bit mixed precision
                    ('low_vram', True),  # uncomment this if the GPU is connected to your monitors. It will use less vram to quantize, but is slower.
                ])),
                ('sample', OrderedDict([
                    ('sampler', 'flowmatch'),  # must match train.noise_scheduler
                    ('sample_every', STEPS_SAMPLE),  # sample every this many steps
                    ('width', 1024),
                    ('height', 1024),
                    ('prompts', [
                        # you can add [trigger] to the prompts here and it will be replaced with the trigger word
                        '[trigger] with a water bottle by the lake',
                        '[trigger] playing tennis on a rainy day',
                        '[trigger] smiling wearing a tuxedo, in a crowded room',
                    ]),
                    ('neg', ''),  # not used on flux
                    ('seed', 42),
                    ('walk_seed', True),
                    ('guidance_scale', 4),
                    ('sample_steps', 20)
                ]))
            ])
        ])
    ])),
    # you can add any additional meta info here. [name] is replaced with config name at top
    ('meta', OrderedDict([
        ('name', '[name]'),
        ('version', '1.0')
    ]))
])

In [3]:
!huggingface-cli login --token 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

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/sagemaker-user/.cache/huggingface/token
Login successful


In [4]:
import os
import sys
sys.path.append('./ai-toolkit')
from toolkit.job import run_job

run_job(job_to_run)

  warn(
2024-09-15 15:42:23.615689: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-15 15:42:23.615734: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-15 15:42:23.615745: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-15 15:42:23.620703: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


{
    "type": "sd_trainer",
    "training_folder": "./output",
    "performance_log_every": 100,
    "device": "cuda:0",
    "trigger_word": "teleski",
    "network": {
        "type": "lora",
        "linear": 16,
        "linear_alpha": 16
    },
    "save": {
        "dtype": "float16",
        "save_every": 250,
        "max_step_saves_to_keep": 10
    },
    "datasets": [
        {
            "folder_path": "./images",
            "caption_ext": "txt",
            "caption_dropout_rate": 0.05,
            "shuffle_tokens": false,
            "cache_latents_to_disk": true,
            "resolution": [
                512,
                768,
                1024
            ]
        }
    ],
    "train": {
        "batch_size": 1,
        "steps": 3000,
        "gradient_accumulation_steps": 1,
        "train_unet": true,
        "train_text_encoder": false,
        "content_or_style": "balanced",
        "gradient_checkpointing": true,
        "noise_scheduler": "flowmatch",
   

  return register_model(fn_wrapper)
  return register_model(fn_wrapper)
  return register_model(fn_wrapper)
  return register_model(fn_wrapper)
  return register_model(fn_wrapper)
  self.scaler = torch.cuda.amp.GradScaler()


Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

Quantizing transformer


scheduler/scheduler_config.json:   0%|          | 0.00/274 [00:00<?, ?B/s]

Loading vae


vae/config.json:   0%|          | 0.00/774 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Loading t5




tokenizer_2/tokenizer_config.json:   0%|          | 0.00/20.8k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer_2/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

tokenizer_2/special_tokens_map.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers


text_encoder_2/config.json:   0%|          | 0.00/782 [00:00<?, ?B/s]

(…)t_encoder_2/model.safetensors.index.json:   0%|          | 0.00/19.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.53G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Quantizing T5
Loading clip


text_encoder/config.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/246M [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/705 [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/588 [00:00<?, ?B/s]

making pipe
preparing
create LoRA network. base dim (rank): 16, alpha: 16
neuron dropout: p=None, rank dropout: p=None, module dropout: p=None
create LoRA for Text Encoder: 0 modules.
create LoRA for U-Net: 494 modules.
enable LoRA for U-Net
Dataset: ./images
  -  Preprocessing image dimensions


100%|██████████| 16/16 [00:00<00:00, 115.62it/s]

  -  Found 16 images
Bucket sizes for ./images:
384x640: 7 files
448x576: 6 files
512x512: 3 files
3 buckets made
Caching latents for ./images
 - Saving latents to disk



Caching latents to disk: 100%|██████████| 16/16 [00:01<00:00, 11.57it/s]


Dataset: ./images
  -  Preprocessing image dimensions


100%|██████████| 16/16 [00:00<00:00, 37076.72it/s]

  -  Found 16 images
Bucket sizes for ./images:
576x960: 7 files
640x832: 6 files
768x768: 3 files
3 buckets made
Caching latents for ./images
 - Saving latents to disk



Caching latents to disk: 100%|██████████| 16/16 [00:02<00:00,  7.15it/s]


Dataset: ./images
  -  Preprocessing image dimensions


100%|██████████| 16/16 [00:00<00:00, 36751.84it/s]

  -  Found 16 images
Bucket sizes for ./images:
704x1216: 7 files
832x1152: 6 files
960x960: 1 files
1024x1024: 1 files
896x896: 1 files
5 buckets made
Caching latents for ./images
 - Saving latents to disk



Caching latents to disk: 100%|██████████| 16/16 [00:03<00:00,  4.13it/s]


Generating baseline samples before training


  with device_autocast_ctx, torch.cpu.amp.autocast(**cpu_autocast_kwargs), recompute_context:  # type: ignore[attr-defined]
my_first_flux_lora_v1:   3%|▎         | 99/3000 [12:08<6:06:29,  7.58s/it, lr: 1.0e-04 loss: 2.894e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 7.7572s avg - train_loop, num = 10
 - 4.8264s avg - backward, num = 10
 - 2.2447s avg - predict_unet, num = 10
 - 0.3037s avg - calculate_loss, num = 10
 - 0.2274s avg - optimizer_step, num = 10
 - 0.0955s avg - encode_prompt, num = 10
 - 0.1664s avg - reset_batch, num = 2
 - 0.0047s avg - get_batch, num = 10
 - 0.0021s avg - preprocess_batch, num = 10
 - 0.0008s avg - prepare_noise, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0002s avg - batch_cleanup, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:   7%|▋         | 199/3000 [24:00<3:51:49,  4.97s/it, lr: 1.0e-04 loss: 5.279e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 4.9760s avg - train_loop, num = 10
 - 2.9288s avg - backward, num = 10
 - 1.5236s avg - predict_unet, num = 10
 - 0.2001s avg - calculate_loss, num = 10
 - 0.1693s avg - optimizer_step, num = 10
 - 0.0954s avg - encode_prompt, num = 10
 - 0.1654s avg - reset_batch, num = 2
 - 0.0063s avg - get_batch, num = 10
 - 0.0021s avg - preprocess_batch, num = 10
 - 0.0008s avg - prepare_noise, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0002s avg - batch_cleanup, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:   8%|▊         | 249/3000 [30:19<4:35:09,  6.00s/it, lr: 1.0e-04 loss: 4.640e-01]
Generating Images:   0%|          | 0/3 [00:00<?, ?it/s][A
Generating Images:  33%|███▎      | 1/3 [01:13<02:27, 73.91s/it][A
Generating Images:  67%|██████▋   | 2/3 [02:27<01:13, 73.99s/it][A
Generating Images: 100%|██████████| 3/3 [03:42<00:00, 74.02s/it][A
my_first_flux_lora_v1:   8%|▊         | 249/3000 [30:19<4:35:09,  6.00s/it, lr: 1.0e-04 loss: 4.640e-01]

Saving at step 250


my_first_flux_lora_v1:   8%|▊         | 249/3000 [30:22<4:35:09,  6.00s/it, lr: 1.0e-04 loss: 4.640e-01]

Saved to ./output/my_first_flux_lora_v1/optimizer.pt


my_first_flux_lora_v1:  10%|▉         | 299/3000 [36:20<5:08:04,  6.84s/it, lr: 1.0e-04 loss: 5.680e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 6.8134s avg - train_loop, num = 10
 - 4.2239s avg - backward, num = 10
 - 1.9756s avg - predict_unet, num = 10
 - 0.2862s avg - calculate_loss, num = 10
 - 0.2094s avg - optimizer_step, num = 10
 - 0.0950s avg - encode_prompt, num = 10
 - 0.1638s avg - reset_batch, num = 2
 - 0.0020s avg - preprocess_batch, num = 10
 - 0.0014s avg - get_batch, num = 10
 - 0.0008s avg - prepare_noise, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0002s avg - batch_cleanup, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  13%|█▎        | 399/3000 [48:19<4:43:13,  6.53s/it, lr: 1.0e-04 loss: 4.287e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 5.7149s avg - train_loop, num = 10
 - 3.4888s avg - backward, num = 10
 - 1.6872s avg - predict_unet, num = 10
 - 0.2382s avg - calculate_loss, num = 10
 - 0.1892s avg - optimizer_step, num = 10
 - 0.0947s avg - encode_prompt, num = 10
 - 0.1635s avg - reset_batch, num = 2
 - 0.0018s avg - preprocess_batch, num = 10
 - 0.0015s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0002s avg - batch_cleanup, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  17%|█▋        | 499/3000 [1:00:46<4:37:02,  6.65s/it, lr: 1.0e-04 loss: 3.615e-01]
Generating Images:   0%|          | 0/3 [00:00<?, ?it/s][A
Generating Images:  33%|███▎      | 1/3 [01:14<02:29, 74.54s/it][A
Generating Images:  67%|██████▋   | 2/3 [02:28<01:14, 74.21s/it][A
Generating Images: 100%|██████████| 3/3 [03:42<00:00, 74.14s/it][A
my_first_flux_lora_v1:  17%|█▋        | 499/3000 [1:00:46<4:37:02,  6.65s/it, lr: 1.0e-04 loss: 3.615e-01]

Saving at step 500


my_first_flux_lora_v1:  17%|█▋        | 499/3000 [1:00:49<4:37:02,  6.65s/it, lr: 1.0e-04 loss: 3.615e-01]

Saved to ./output/my_first_flux_lora_v1/optimizer.pt

Timer 'my_first_flux_lora_v1 Timer':
 - 6.9916s avg - train_loop, num = 10
 - 4.3499s avg - backward, num = 10
 - 2.0167s avg - predict_unet, num = 10
 - 0.2957s avg - calculate_loss, num = 10
 - 0.2143s avg - optimizer_step, num = 10
 - 0.0949s avg - encode_prompt, num = 10
 - 0.1691s avg - reset_batch, num = 2
 - 0.0018s avg - preprocess_batch, num = 10
 - 0.0016s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0003s avg - batch_cleanup, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  20%|█▉        | 599/3000 [1:12:42<3:48:01,  5.70s/it, lr: 1.0e-04 loss: 3.623e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 7.1491s avg - train_loop, num = 10
 - 4.4561s avg - backward, num = 10
 - 2.0573s avg - predict_unet, num = 10
 - 0.3031s avg - calculate_loss, num = 10
 - 0.2163s avg - optimizer_step, num = 10
 - 0.0947s avg - encode_prompt, num = 10
 - 0.1687s avg - reset_batch, num = 2
 - 0.0019s avg - preprocess_batch, num = 10
 - 0.0014s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0002s avg - batch_cleanup, num = 10
 - 0.0002s avg - prepare_latents, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  23%|██▎       | 699/3000 [1:25:02<4:43:50,  7.40s/it, lr: 1.0e-04 loss: 3.630e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 8.2356s avg - train_loop, num = 10
 - 5.1850s avg - backward, num = 10
 - 2.3401s avg - predict_unet, num = 10
 - 0.3513s avg - calculate_loss, num = 10
 - 0.2419s avg - optimizer_step, num = 10
 - 0.0948s avg - encode_prompt, num = 10
 - 0.1683s avg - reset_batch, num = 2
 - 0.0019s avg - preprocess_batch, num = 10
 - 0.0015s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0003s avg - batch_cleanup, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  25%|██▍       | 749/3000 [1:31:04<5:27:35,  8.73s/it, lr: 1.0e-04 loss: 3.539e-01]
Generating Images:   0%|          | 0/3 [00:00<?, ?it/s][A
Generating Images:  33%|███▎      | 1/3 [01:14<02:28, 74.12s/it][A
Generating Images:  67%|██████▋   | 2/3 [02:28<01:14, 74.02s/it][A
Generating Images: 100%|██████████| 3/3 [03:42<00:00, 74.01s/it][A
my_first_flux_lora_v1:  25%|██▍       | 749/3000 [1:31:04<5:27:35,  8.73s/it, lr: 1.0e-04 loss: 3.539e-01]

Saving at step 750


my_first_flux_lora_v1:  25%|██▍       | 749/3000 [1:31:07<5:27:35,  8.73s/it, lr: 1.0e-04 loss: 3.539e-01]

Saved to ./output/my_first_flux_lora_v1/optimizer.pt


my_first_flux_lora_v1:  27%|██▋       | 799/3000 [1:36:42<2:58:48,  4.87s/it, lr: 1.0e-04 loss: 3.821e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 5.8543s avg - train_loop, num = 10
 - 3.5839s avg - backward, num = 10
 - 1.7246s avg - predict_unet, num = 10
 - 0.2430s avg - calculate_loss, num = 10
 - 0.1912s avg - optimizer_step, num = 10
 - 0.0946s avg - encode_prompt, num = 10
 - 0.1708s avg - reset_batch, num = 2
 - 0.0018s avg - preprocess_batch, num = 10
 - 0.0014s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0002s avg - batch_cleanup, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  30%|██▉       | 899/3000 [1:49:10<4:58:06,  8.51s/it, lr: 1.0e-04 loss: 3.283e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 7.9918s avg - train_loop, num = 10
 - 5.0277s avg - backward, num = 10
 - 2.2701s avg - predict_unet, num = 10
 - 0.3420s avg - calculate_loss, num = 10
 - 0.2355s avg - optimizer_step, num = 10
 - 0.0949s avg - encode_prompt, num = 10
 - 0.1811s avg - reset_batch, num = 2
 - 0.0019s avg - preprocess_batch, num = 10
 - 0.0015s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0002s avg - batch_cleanup, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  33%|███▎      | 999/3000 [2:01:17<2:55:50,  5.27s/it, lr: 1.0e-04 loss: 6.055e-01]
Generating Images:   0%|          | 0/3 [00:00<?, ?it/s][A
Generating Images:  33%|███▎      | 1/3 [01:14<02:28, 74.49s/it][A
Generating Images:  67%|██████▋   | 2/3 [02:28<01:14, 74.06s/it][A
Generating Images: 100%|██████████| 3/3 [03:41<00:00, 73.76s/it][A
my_first_flux_lora_v1:  33%|███▎      | 999/3000 [2:01:17<2:55:50,  5.27s/it, lr: 1.0e-04 loss: 6.055e-01]

Saving at step 1000


my_first_flux_lora_v1:  33%|███▎      | 999/3000 [2:01:20<2:55:50,  5.27s/it, lr: 1.0e-04 loss: 6.055e-01]

Saved to ./output/my_first_flux_lora_v1/optimizer.pt

Timer 'my_first_flux_lora_v1 Timer':
 - 6.2725s avg - train_loop, num = 10
 - 3.8664s avg - backward, num = 10
 - 1.8288s avg - predict_unet, num = 10
 - 0.2646s avg - calculate_loss, num = 10
 - 0.1998s avg - optimizer_step, num = 10
 - 0.0949s avg - encode_prompt, num = 10
 - 0.1745s avg - reset_batch, num = 2
 - 0.0018s avg - preprocess_batch, num = 10
 - 0.0015s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0002s avg - batch_cleanup, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  37%|███▋      | 1099/3000 [2:13:16<3:51:45,  7.31s/it, lr: 1.0e-04 loss: 6.542e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 7.5408s avg - train_loop, num = 10
 - 4.7229s avg - backward, num = 10
 - 2.1547s avg - predict_unet, num = 10
 - 0.3204s avg - calculate_loss, num = 10
 - 0.2275s avg - optimizer_step, num = 10
 - 0.0946s avg - encode_prompt, num = 10
 - 0.1710s avg - reset_batch, num = 2
 - 0.0018s avg - preprocess_batch, num = 10
 - 0.0014s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0003s avg - batch_cleanup, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  40%|███▉      | 1199/3000 [2:25:31<2:58:59,  5.96s/it, lr: 1.0e-04 loss: 2.886e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 7.6640s avg - train_loop, num = 10
 - 4.7771s avg - backward, num = 10
 - 2.1743s avg - predict_unet, num = 10
 - 0.3231s avg - calculate_loss, num = 10
 - 0.2277s avg - optimizer_step, num = 10
 - 0.0957s avg - encode_prompt, num = 10
 - 0.1661s avg - reset_batch, num = 3
 - 0.0074s avg - get_batch, num = 10
 - 0.0021s avg - preprocess_batch, num = 10
 - 0.0008s avg - prepare_noise, num = 10
 - 0.0003s avg - batch_cleanup, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  42%|████▏     | 1249/3000 [2:31:25<2:38:06,  5.42s/it, lr: 1.0e-04 loss: 3.341e-01]
Generating Images:   0%|          | 0/3 [00:00<?, ?it/s][A
Generating Images:  33%|███▎      | 1/3 [01:14<02:29, 74.57s/it][A
Generating Images:  67%|██████▋   | 2/3 [02:28<01:13, 73.99s/it][A
Generating Images: 100%|██████████| 3/3 [03:42<00:00, 73.94s/it][A
my_first_flux_lora_v1:  42%|████▏     | 1249/3000 [2:31:25<2:38:06,  5.42s/it, lr: 1.0e-04 loss: 3.341e-01]

Saving at step 1250


my_first_flux_lora_v1:  42%|████▏     | 1249/3000 [2:31:28<2:38:06,  5.42s/it, lr: 1.0e-04 loss: 3.341e-01]

Saved to ./output/my_first_flux_lora_v1/optimizer.pt


my_first_flux_lora_v1:  43%|████▎     | 1299/3000 [2:37:35<2:45:52,  5.85s/it, lr: 1.0e-04 loss: 7.026e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 6.4589s avg - train_loop, num = 10
 - 3.9510s avg - backward, num = 10
 - 1.8749s avg - predict_unet, num = 10
 - 0.2675s avg - calculate_loss, num = 10
 - 0.2022s avg - optimizer_step, num = 10
 - 0.0958s avg - encode_prompt, num = 10
 - 0.1719s avg - reset_batch, num = 2
 - 0.0070s avg - get_batch, num = 10
 - 0.0021s avg - preprocess_batch, num = 10
 - 0.0008s avg - prepare_noise, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0003s avg - batch_cleanup, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  47%|████▋     | 1399/3000 [2:49:47<3:55:29,  8.83s/it, lr: 1.0e-04 loss: 4.658e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 7.9872s avg - train_loop, num = 10
 - 5.0068s avg - backward, num = 10
 - 2.2455s avg - predict_unet, num = 10
 - 0.3390s avg - calculate_loss, num = 10
 - 0.2338s avg - optimizer_step, num = 10
 - 0.0958s avg - encode_prompt, num = 10
 - 0.1729s avg - reset_batch, num = 2
 - 0.0079s avg - get_batch, num = 10
 - 0.0022s avg - preprocess_batch, num = 10
 - 0.0008s avg - prepare_noise, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0003s avg - batch_cleanup, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  50%|████▉     | 1499/3000 [3:01:33<3:00:41,  7.22s/it, lr: 1.0e-04 loss: 3.328e-01]
Generating Images:   0%|          | 0/3 [00:00<?, ?it/s][A
Generating Images:  33%|███▎      | 1/3 [01:14<02:29, 74.56s/it][A
Generating Images:  67%|██████▋   | 2/3 [02:28<01:14, 74.24s/it][A
Generating Images: 100%|██████████| 3/3 [03:42<00:00, 74.06s/it][A
my_first_flux_lora_v1:  50%|████▉     | 1499/3000 [3:01:33<3:00:41,  7.22s/it, lr: 1.0e-04 loss: 3.328e-01]

Saving at step 1500


my_first_flux_lora_v1:  50%|████▉     | 1499/3000 [3:01:36<3:00:41,  7.22s/it, lr: 1.0e-04 loss: 3.328e-01]

Saved to ./output/my_first_flux_lora_v1/optimizer.pt

Timer 'my_first_flux_lora_v1 Timer':
 - 6.3021s avg - train_loop, num = 10
 - 3.8817s avg - backward, num = 10
 - 1.8404s avg - predict_unet, num = 10
 - 0.2634s avg - calculate_loss, num = 10
 - 0.2014s avg - optimizer_step, num = 10
 - 0.0951s avg - encode_prompt, num = 10
 - 0.1747s avg - reset_batch, num = 2
 - 0.0019s avg - preprocess_batch, num = 10
 - 0.0014s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0002s avg - prepare_latents, num = 10
 - 0.0002s avg - batch_cleanup, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  53%|█████▎    | 1599/3000 [3:13:57<3:07:54,  8.05s/it, lr: 1.0e-04 loss: 5.310e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 7.4270s avg - train_loop, num = 10
 - 4.6490s avg - backward, num = 10
 - 2.1239s avg - predict_unet, num = 10
 - 0.3148s avg - calculate_loss, num = 10
 - 0.2226s avg - optimizer_step, num = 10
 - 0.0949s avg - encode_prompt, num = 10
 - 0.1618s avg - reset_batch, num = 2
 - 0.0019s avg - preprocess_batch, num = 10
 - 0.0016s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0003s avg - batch_cleanup, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  57%|█████▋    | 1699/3000 [3:26:07<2:52:10,  7.94s/it, lr: 1.0e-04 loss: 3.899e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 7.3515s avg - train_loop, num = 10
 - 4.5840s avg - backward, num = 10
 - 2.1204s avg - predict_unet, num = 10
 - 0.3114s avg - calculate_loss, num = 10
 - 0.2214s avg - optimizer_step, num = 10
 - 0.0946s avg - encode_prompt, num = 10
 - 0.1714s avg - reset_batch, num = 2
 - 0.0018s avg - preprocess_batch, num = 10
 - 0.0015s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0002s avg - prepare_latents, num = 10
 - 0.0002s avg - batch_cleanup, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  58%|█████▊    | 1749/3000 [3:32:09<3:07:01,  8.97s/it, lr: 1.0e-04 loss: 3.377e-01]
Generating Images:   0%|          | 0/3 [00:00<?, ?it/s][A
Generating Images:  33%|███▎      | 1/3 [01:13<02:27, 73.58s/it][A
Generating Images:  67%|██████▋   | 2/3 [02:27<01:13, 73.85s/it][A
Generating Images: 100%|██████████| 3/3 [03:40<00:00, 73.55s/it][A
my_first_flux_lora_v1:  58%|█████▊    | 1749/3000 [3:32:09<3:07:01,  8.97s/it, lr: 1.0e-04 loss: 3.377e-01]

Saving at step 1750


my_first_flux_lora_v1:  58%|█████▊    | 1749/3000 [3:32:12<3:07:01,  8.97s/it, lr: 1.0e-04 loss: 3.377e-01]

Saved to ./output/my_first_flux_lora_v1/optimizer.pt


my_first_flux_lora_v1:  60%|█████▉    | 1799/3000 [3:38:31<2:43:48,  8.18s/it, lr: 1.0e-04 loss: 3.663e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 7.4845s avg - train_loop, num = 10
 - 4.6887s avg - backward, num = 10
 - 2.1348s avg - predict_unet, num = 10
 - 0.3197s avg - calculate_loss, num = 10
 - 0.2261s avg - optimizer_step, num = 10
 - 0.0945s avg - encode_prompt, num = 10
 - 0.1720s avg - reset_batch, num = 2
 - 0.0018s avg - preprocess_batch, num = 10
 - 0.0016s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0003s avg - batch_cleanup, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  63%|██████▎   | 1899/3000 [3:50:22<2:47:44,  9.14s/it, lr: 1.0e-04 loss: 3.884e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 7.6474s avg - train_loop, num = 10
 - 4.7903s avg - backward, num = 10
 - 2.1897s avg - predict_unet, num = 10
 - 0.3231s avg - calculate_loss, num = 10
 - 0.2292s avg - optimizer_step, num = 10
 - 0.0947s avg - encode_prompt, num = 10
 - 0.1737s avg - reset_batch, num = 2
 - 0.0019s avg - preprocess_batch, num = 10
 - 0.0015s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0003s avg - batch_cleanup, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  67%|██████▋   | 1999/3000 [4:02:31<1:59:44,  7.18s/it, lr: 1.0e-04 loss: 3.621e-01]
Generating Images:   0%|          | 0/3 [00:00<?, ?it/s][A
Generating Images:  33%|███▎      | 1/3 [01:13<02:27, 73.95s/it][A
Generating Images:  67%|██████▋   | 2/3 [02:27<01:13, 73.66s/it][A
Generating Images: 100%|██████████| 3/3 [03:40<00:00, 73.25s/it][A
my_first_flux_lora_v1:  67%|██████▋   | 1999/3000 [4:02:31<1:59:44,  7.18s/it, lr: 1.0e-04 loss: 3.621e-01]

Saving at step 2000


my_first_flux_lora_v1:  67%|██████▋   | 1999/3000 [4:02:34<1:59:44,  7.18s/it, lr: 1.0e-04 loss: 3.621e-01]

Saved to ./output/my_first_flux_lora_v1/optimizer.pt

Timer 'my_first_flux_lora_v1 Timer':
 - 8.8612s avg - train_loop, num = 10
 - 5.6181s avg - backward, num = 10
 - 2.4908s avg - predict_unet, num = 10
 - 0.3821s avg - calculate_loss, num = 10
 - 0.2505s avg - optimizer_step, num = 10
 - 0.0951s avg - encode_prompt, num = 10
 - 0.1680s avg - reset_batch, num = 2
 - 0.0020s avg - preprocess_batch, num = 10
 - 0.0016s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0003s avg - batch_cleanup, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  70%|██████▉   | 2099/3000 [4:14:26<1:52:16,  7.48s/it, lr: 1.0e-04 loss: 4.731e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 7.0293s avg - train_loop, num = 10
 - 4.3711s avg - backward, num = 10
 - 2.0316s avg - predict_unet, num = 10
 - 0.2967s avg - calculate_loss, num = 10
 - 0.2142s avg - optimizer_step, num = 10
 - 0.0949s avg - encode_prompt, num = 10
 - 0.1686s avg - reset_batch, num = 2
 - 0.0019s avg - preprocess_batch, num = 10
 - 0.0014s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0003s avg - batch_cleanup, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  73%|███████▎  | 2199/3000 [4:26:18<1:54:37,  8.59s/it, lr: 1.0e-04 loss: 2.566e-01]


Timer 'my_first_flux_lora_v1 Timer':
 - 7.7016s avg - train_loop, num = 10
 - 4.8387s avg - backward, num = 10
 - 2.1894s avg - predict_unet, num = 10
 - 0.3290s avg - calculate_loss, num = 10
 - 0.2280s avg - optimizer_step, num = 10
 - 0.0947s avg - encode_prompt, num = 10
 - 0.1763s avg - reset_batch, num = 2
 - 0.0019s avg - preprocess_batch, num = 10
 - 0.0016s avg - get_batch, num = 10
 - 0.0007s avg - prepare_noise, num = 10
 - 0.0003s avg - batch_cleanup, num = 10
 - 0.0003s avg - prepare_latents, num = 10
 - 0.0000s avg - prepare_prompt, num = 10
 - 0.0000s avg - scheduler_step, num = 10
 - 0.0000s avg - grad_setup, num = 10
 - 0.0000s avg - log_to_tensorboard, num = 1



my_first_flux_lora_v1:  75%|███████▍  | 2249/3000 [4:32:37<1:05:43,  5.25s/it, lr: 1.0e-04 loss: 1.959e-01]
Generating Images:   0%|          | 0/3 [00:00<?, ?it/s][A
Generating Images:  33%|███▎      | 1/3 [01:14<02:28, 74.05s/it][A
Generating Images:  67%|██████▋   | 2/3 [02:27<01:13, 73.79s/it][A
Generating Images: 100%|██████████| 3/3 [03:41<00:00, 73.75s/it][A
my_first_flux_lora_v1:  75%|███████▍  | 2249/3000 [4:32:37<1:05:43,  5.25s/it, lr: 1.0e-04 loss: 1.959e-01]

Saving at step 2250


my_first_flux_lora_v1:  75%|███████▍  | 2249/3000 [4:32:40<1:05:43,  5.25s/it, lr: 1.0e-04 loss: 1.959e-01]

Saved to ./output/my_first_flux_lora_v1/optimizer.pt


my_first_flux_lora_v1:  75%|███████▌  | 2254/3000 [4:33:03<1:11:07,  5.72s/it, lr: 1.0e-04 loss: 5.535e-01]

KeyboardInterrupt: 