source:

https://colab.research.google.com/github/ostris/ai-toolkit/blob/main/notebooks/FLUX_1_dev_LoRA_Training.ipynb

In [1]:
!git clone https://github.com/ostris/ai-toolkit

fatal: destination path 'ai-toolkit' already exists and is not an empty directory.


[38;5;57m[1m⚡️ Tip[0m	Connect GitHub to Studios: [4mhttps://lightning.ai/michalreal/home?settings=integrations[0m



In [2]:
!mkdir -p ./content/dataset

In [3]:
!cd ai-toolkit && git submodule update --init --recursive && pip install -r requirements.txt

Collecting git+https://github.com/huggingface/diffusers.git (from -r requirements.txt (line 4))
  Cloning https://github.com/huggingface/diffusers.git to /tmp/pip-req-build-dews3yrm
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/diffusers.git /tmp/pip-req-build-dews3yrm

  Resolved https://github.com/huggingface/diffusers.git to commit 0e50401e34242dbd4b94a8a3cf0ee24afc25ea65
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone


In [1]:
import getpass
import os

# Prompt for the token
hf_token = getpass.getpass('Enter your HF access token and press enter: ')

# Set the environment variable
os.environ['HF_TOKEN'] = hf_token

print("HF_TOKEN environment variable has been set.")

HF_TOKEN environment variable has been set.


In [5]:
import os
import sys
sys.path.append('/teamspace/studios/this_studio/ai-toolkit')
from toolkit.job import run_job
from collections import OrderedDict
from PIL import Image
import os

In [7]:
from collections import OrderedDict

job_to_run = OrderedDict([
    ('job', 'extension'),
    ('config', OrderedDict([
        # this name will be the folder and filename name
        ('name', 'my_first_flux_lora_v1'),
        ('process', [
            OrderedDict([
                ('type', 'sd_trainer'),
                # root folder to save training sessions/samples/weights
                ('training_folder', '/teamspace/studios/this_studio/content/output'),
                # uncomment to see performance stats in the terminal every N steps
                #('performance_log_every', 1000),
                ('device', 'cuda:0'),
                # if a trigger word is specified, it will be added to captions of training data if it does not already exist
                # alternatively, in your captions you can add [trigger] and it will be replaced with the trigger word
                # ('trigger_word', 'image'),
                ('network', OrderedDict([
                    ('type', 'lora'),
                    ('linear', 16),
                    ('linear_alpha', 16)
                ])),
                ('save', OrderedDict([
                    ('dtype', 'float16'),  # precision to save
                    ('save_every', 250),  # save every this many steps
                    ('max_step_saves_to_keep', 4)  # how many intermittent saves to keep
                ])),
                ('datasets', [
                    # datasets are a folder of images. captions need to be txt files with the same name as the image
                    # for instance image2.jpg and image2.txt. Only jpg, jpeg, and png are supported currently
                    # images will automatically be resized and bucketed into the resolution specified
                    OrderedDict([
                        ('folder_path', '/teamspace/studios/this_studio/content/dataset'),
                        ('caption_ext', 'txt'),
                        ('caption_dropout_rate', 0.05),  # will drop out the caption 5% of time
                        ('shuffle_tokens', False),  # shuffle caption order, split by commas
                        ('cache_latents_to_disk', True),  # leave this true unless you know what you're doing
                        ('resolution', [512, 768, 1024])  # flux enjoys multiple resolutions
                    ])
                ]),
                ('train', OrderedDict([
                    ('batch_size', 1),
                    ('steps', 2000),  # total number of steps to train 500 - 4000 is a good range
                    ('gradient_accumulation_steps', 1),
                    ('train_unet', True),
                    ('train_text_encoder', False),  # probably won't work with flux
                    ('content_or_style', 'content'),  # content, style, balanced
                    ('gradient_checkpointing', True),  # need the on unless you have a ton of vram
                    ('noise_scheduler', 'flowmatch'),  # for training only
                    ('optimizer', 'adamw8bit'),
                    ('lr', 1e-4),

                    # uncomment this to skip the pre training sample
                    # ('skip_first_sample', True),

                    # uncomment to completely disable sampling
                    # ('disable_sampling', True),

                    # uncomment to use new vell curved weighting. Experimental but may produce better results
                    # ('linear_timesteps', True),

                    # ema will smooth out learning, but could slow it down. Recommended to leave on.
                    ('ema_config', OrderedDict([
                        ('use_ema', True),
                        ('ema_decay', 0.99)
                    ])),

                    # will probably need this if gpu supports it for flux, other dtypes may not work correctly
                    ('dtype', 'bf16')
                ])),
                ('model', OrderedDict([
                    # huggingface model name or path
                    ('name_or_path', 'black-forest-labs/FLUX.1-dev'),
                    ('is_flux', True),
                    ('quantize', True),  # run 8bit mixed precision
                    #('low_vram', True),  # uncomment this if the GPU is connected to your monitors. It will use less vram to quantize, but is slower.
                ])),
                ('sample', OrderedDict([
                    ('sampler', 'flowmatch'),  # must match train.noise_scheduler
                    ('sample_every', 250),  # sample every this many steps
                    ('width', 1024),
                    ('height', 1024),
                    ('prompts', [
                        # you can add [trigger] to the prompts here and it will be replaced with the trigger word
                        #'[trigger] holding a sign that says \'I LOVE PROMPTS!\'',
                        'woman with red hair, playing chess at the park, bomb going off in the background',
                        'a woman holding a coffee cup, in a beanie, sitting at a cafe',
                        'a horse is a DJ at a night club, fish eye lens, smoke machine, lazer lights, holding a martini',
                        'a man showing off his cool new t shirt at the beach, a shark is jumping out of the water in the background',
                        'a bear building a log cabin in the snow covered mountains',
                        'woman playing the guitar, on stage, singing a song, laser lights, punk rocker',
                        'hipster man with a beard, building a chair, in a wood shop',
                        'photo of a man, white background, medium shot, modeling clothing, studio lighting, white backdrop',
                        'a man holding a sign that says, \'this is a sign\'',
                        'a bulldog, in a post apocalyptic world, with a shotgun, in a leather jacket, in a desert, with a motorcycle'
                    ]),
                    ('neg', ''),  # not used on flux
                    ('seed', 42),
                    ('walk_seed', True),
                    ('guidance_scale', 4),
                    ('sample_steps', 20)
                ]))
            ])
        ])
    ])),
    # you can add any additional meta info here. [name] is replaced with config name at top
    ('meta', OrderedDict([
        ('name', '[name]'),
        ('version', '1.0')
    ]))
])


In [8]:
%%time
run_job(job_to_run)

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

  check_for_updates()


{
    "type": "sd_trainer",
    "training_folder": "/teamspace/studios/this_studio/content/output",
    "device": "cuda:0",
    "network": {
        "type": "lora",
        "linear": 16,
        "linear_alpha": 16
    },
    "save": {
        "dtype": "float16",
        "save_every": 250,
        "max_step_saves_to_keep": 4
    },
    "datasets": [
        {
            "folder_path": "/teamspace/studios/this_studio/content/dataset",
            "caption_ext": "txt",
            "caption_dropout_rate": 0.05,
            "shuffle_tokens": false,
            "cache_latents_to_disk": true,
            "resolution": [
                512,
                768,
                1024
            ]
        }
    ],
    "train": {
        "batch_size": 1,
        "steps": 2000,
        "gradient_accumulation_steps": 1,
        "train_unet": true,
        "train_text_encoder": false,
        "content_or_style": "content",
        "gradient_checkpointing": true,
        "noise_scheduler": "flowmat

  return register_model(fn_wrapper)
  return register_model(fn_wrapper)
  return register_model(fn_wrapper)
  return register_model(fn_wrapper)
  return register_model(fn_wrapper)
  self.scaler = torch.cuda.amp.GradScaler()


transformer/config.json:   0%|          | 0.00/378 [00:00<?, ?B/s]

(…)ion_pytorch_model.safetensors.index.json:   0%|          | 0.00/121k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

(…)pytorch_model-00003-of-00003.safetensors:   0%|          | 0.00/3.87G [00:00<?, ?B/s]

(…)pytorch_model-00002-of-00003.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

(…)pytorch_model-00001-of-00003.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Quantizing transformer


scheduler/scheduler_config.json:   0%|          | 0.00/273 [00:00<?, ?B/s]

Loading vae


vae/config.json:   0%|          | 0.00/820 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Loading t5


tokenizer_2/tokenizer_config.json:   0%|          | 0.00/20.8k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer_2/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

tokenizer_2/special_tokens_map.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers


text_encoder_2/config.json:   0%|          | 0.00/782 [00:00<?, ?B/s]

(…)t_encoder_2/model.safetensors.index.json:   0%|          | 0.00/19.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.53G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Quantizing T5
Loading clip


text_encoder/config.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/246M [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/705 [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/588 [00:00<?, ?B/s]

making pipe
preparing
create LoRA network. base dim (rank): 16, alpha: 16
neuron dropout: p=None, rank dropout: p=None, module dropout: p=None
create LoRA for Text Encoder: 0 modules.
create LoRA for U-Net: 494 modules.
enable LoRA for U-Net
#### IMPORTANT RESUMING FROM /teamspace/studios/this_studio/content/output/my_first_flux_lora_v1/my_first_flux_lora_v1_000000250.safetensors ####
Loading from /teamspace/studios/this_studio/content/output/my_first_flux_lora_v1/my_first_flux_lora_v1_000000250.safetensors
Missing keys: []
Found step 250 in metadata, starting from there
Loading optimizer state from /teamspace/studios/this_studio/content/output/my_first_flux_lora_v1/optimizer.pt
Updating optimizer LR from params
Dataset: /teamspace/studios/this_studio/content/dataset
  -  Preprocessing image dimensions


100%|██████████| 25/25 [00:00<00:00, 54613.33it/s]

  -  Found 25 images
Bucket sizes for /teamspace/studios/this_studio/content/dataset:
448x576: 23 files
576x448: 2 files
2 buckets made
Caching latents for /teamspace/studios/this_studio/content/dataset
 - Saving latents to disk



Caching latents to disk: 100%|██████████| 25/25 [00:00<00:00, 33608.21it/s]


Dataset: /teamspace/studios/this_studio/content/dataset
  -  Preprocessing image dimensions


100%|██████████| 25/25 [00:00<00:00, 63550.06it/s]

  -  Found 25 images
Bucket sizes for /teamspace/studios/this_studio/content/dataset:
640x832: 23 files
832x640: 2 files
2 buckets made
Caching latents for /teamspace/studios/this_studio/content/dataset
 - Saving latents to disk



Caching latents to disk: 100%|██████████| 25/25 [00:00<00:00, 27991.88it/s]


Dataset: /teamspace/studios/this_studio/content/dataset
  -  Preprocessing image dimensions


100%|██████████| 25/25 [00:00<00:00, 57519.25it/s]

  -  Found 25 images
Bucket sizes for /teamspace/studios/this_studio/content/dataset:
832x1152: 23 files
1152x832: 2 files
2 buckets made
Caching latents for /teamspace/studios/this_studio/content/dataset
 - Saving latents to disk



Caching latents to disk: 100%|██████████| 25/25 [00:00<00:00, 27557.84it/s]
my_first_flux_lora_v1:  25%|██▍       | 499/2000 [09:10<50:16,  2.01s/it, lr: 1.0e-04 loss: 5.203e-01]  
Generating Images:   0%|          | 0/10 [00:00<?, ?it/s][A
Generating Images:  10%|█         | 1/10 [00:21<03:16, 21.83s/it][A
Generating Images:  20%|██        | 2/10 [00:43<02:54, 21.78s/it][A
Generating Images:  30%|███       | 3/10 [01:05<02:32, 21.79s/it][A
Generating Images:  40%|████      | 4/10 [01:27<02:11, 21.85s/it][A
Generating Images:  50%|█████     | 5/10 [01:49<01:49, 21.91s/it][A
Generating Images:  60%|██████    | 6/10 [02:11<01:27, 21.87s/it][A
Generating Images:  70%|███████   | 7/10 [02:32<01:05, 21.83s/it][A
Generating Images:  80%|████████  | 8/10 [02:54<00:43, 21.80s/it][A
Generating Images:  90%|█████████ | 9/10 [03:16<00:21, 21.84s/it][A
Generating Images: 100%|██████████| 10/10 [03:38<00:00, 21.88s/it][A
my_first_flux_lora_v1:  25%|██▍       | 499/2000 [09:10<50:16,  2.

Saving at step 500


my_first_flux_lora_v1:  25%|██▍       | 499/2000 [09:12<50:16,  2.01s/it, lr: 1.0e-04 loss: 5.203e-01]

Saved to /teamspace/studios/this_studio/content/output/my_first_flux_lora_v1/optimizer.pt


my_first_flux_lora_v1:  37%|███▋      | 749/2000 [18:19<49:15,  2.36s/it, lr: 1.0e-04 loss: 2.629e-01]  
Generating Images:   0%|          | 0/10 [00:00<?, ?it/s][A
Generating Images:  10%|█         | 1/10 [00:21<03:13, 21.45s/it][A
Generating Images:  20%|██        | 2/10 [00:43<02:52, 21.57s/it][A
Generating Images:  30%|███       | 3/10 [01:04<02:31, 21.65s/it][A
Generating Images:  40%|████      | 4/10 [01:26<02:10, 21.76s/it][A
Generating Images:  50%|█████     | 5/10 [01:48<01:49, 21.82s/it][A
Generating Images:  60%|██████    | 6/10 [02:10<01:27, 21.81s/it][A
Generating Images:  70%|███████   | 7/10 [02:32<01:05, 21.81s/it][A
Generating Images:  80%|████████  | 8/10 [02:54<00:43, 21.85s/it][A
Generating Images:  90%|█████████ | 9/10 [03:16<00:21, 21.88s/it][A
Generating Images: 100%|██████████| 10/10 [03:37<00:00, 21.85s/it][A
my_first_flux_lora_v1:  37%|███▋      | 749/2000 [18:19<49:15,  2.36s/it, lr: 1.0e-04 loss: 2.629e-01]

Saving at step 750


my_first_flux_lora_v1:  37%|███▋      | 749/2000 [18:21<49:15,  2.36s/it, lr: 1.0e-04 loss: 2.629e-01]

Saved to /teamspace/studios/this_studio/content/output/my_first_flux_lora_v1/optimizer.pt


my_first_flux_lora_v1:  50%|████▉     | 999/2000 [27:24<34:28,  2.07s/it, lr: 1.0e-04 loss: 2.951e-01]
Generating Images:   0%|          | 0/10 [00:00<?, ?it/s][A
Generating Images:  10%|█         | 1/10 [00:21<03:14, 21.62s/it][A
Generating Images:  20%|██        | 2/10 [00:43<02:53, 21.71s/it][A
Generating Images:  30%|███       | 3/10 [01:05<02:32, 21.76s/it][A
Generating Images:  40%|████      | 4/10 [01:27<02:10, 21.82s/it][A
Generating Images:  50%|█████     | 5/10 [01:49<01:49, 21.86s/it][A
Generating Images:  60%|██████    | 6/10 [02:11<01:27, 21.90s/it][A
Generating Images:  70%|███████   | 7/10 [02:32<01:05, 21.89s/it][A
Generating Images:  80%|████████  | 8/10 [02:54<00:43, 21.82s/it][A
Generating Images:  90%|█████████ | 9/10 [03:16<00:21, 21.78s/it][A
Generating Images: 100%|██████████| 10/10 [03:38<00:00, 21.77s/it][A
my_first_flux_lora_v1:  50%|████▉     | 999/2000 [27:24<34:28,  2.07s/it, lr: 1.0e-04 loss: 2.951e-01]

Saving at step 1000


my_first_flux_lora_v1:  50%|████▉     | 999/2000 [27:26<34:28,  2.07s/it, lr: 1.0e-04 loss: 2.951e-01]

Saved to /teamspace/studios/this_studio/content/output/my_first_flux_lora_v1/optimizer.pt


my_first_flux_lora_v1:  62%|██████▏   | 1249/2000 [36:34<31:51,  2.55s/it, lr: 1.0e-04 loss: 2.872e-01]
Generating Images:   0%|          | 0/10 [00:00<?, ?it/s][A
Generating Images:  10%|█         | 1/10 [00:21<03:13, 21.45s/it][A
Generating Images:  20%|██        | 2/10 [00:43<02:52, 21.62s/it][A
Generating Images:  30%|███       | 3/10 [01:05<02:32, 21.76s/it][A
Generating Images:  40%|████      | 4/10 [01:26<02:10, 21.79s/it][A
Generating Images:  50%|█████     | 5/10 [01:48<01:48, 21.77s/it][A
Generating Images:  60%|██████    | 6/10 [02:10<01:27, 21.79s/it][A
Generating Images:  70%|███████   | 7/10 [02:32<01:05, 21.83s/it][A
Generating Images:  80%|████████  | 8/10 [02:54<00:43, 21.85s/it][A
Generating Images:  90%|█████████ | 9/10 [03:16<00:21, 21.82s/it][A
Generating Images: 100%|██████████| 10/10 [03:37<00:00, 21.80s/it][A
my_first_flux_lora_v1:  62%|██████▏   | 1249/2000 [36:34<31:51,  2.55s/it, lr: 1.0e-04 loss: 2.872e-01]

Saving at step 1250


my_first_flux_lora_v1:  62%|██████▏   | 1249/2000 [36:35<31:51,  2.55s/it, lr: 1.0e-04 loss: 2.872e-01]

Saved to /teamspace/studios/this_studio/content/output/my_first_flux_lora_v1/optimizer.pt
Removing old save: /teamspace/studios/this_studio/content/output/my_first_flux_lora_v1/my_first_flux_lora_v1_000000250.safetensors


my_first_flux_lora_v1:  75%|███████▍  | 1499/2000 [45:38<15:38,  1.87s/it, lr: 1.0e-04 loss: 3.305e-01]
Generating Images:   0%|          | 0/10 [00:00<?, ?it/s][A
Generating Images:  10%|█         | 1/10 [00:21<03:13, 21.53s/it][A
Generating Images:  20%|██        | 2/10 [00:43<02:53, 21.68s/it][A
Generating Images:  30%|███       | 3/10 [01:05<02:32, 21.79s/it][A
Generating Images:  40%|████      | 4/10 [01:27<02:10, 21.81s/it][A
Generating Images:  50%|█████     | 5/10 [01:48<01:48, 21.75s/it][A
Generating Images:  60%|██████    | 6/10 [02:10<01:26, 21.71s/it][A
Generating Images:  70%|███████   | 7/10 [02:32<01:05, 21.70s/it][A
Generating Images:  80%|████████  | 8/10 [02:53<00:43, 21.73s/it][A
Generating Images:  90%|█████████ | 9/10 [03:15<00:21, 21.82s/it][A
Generating Images: 100%|██████████| 10/10 [03:37<00:00, 21.82s/it][A
my_first_flux_lora_v1:  75%|███████▍  | 1499/2000 [45:38<15:38,  1.87s/it, lr: 1.0e-04 loss: 3.305e-01]

Saving at step 1500


my_first_flux_lora_v1:  75%|███████▍  | 1499/2000 [45:40<15:38,  1.87s/it, lr: 1.0e-04 loss: 3.305e-01]

Saved to /teamspace/studios/this_studio/content/output/my_first_flux_lora_v1/optimizer.pt
Removing old save: /teamspace/studios/this_studio/content/output/my_first_flux_lora_v1/my_first_flux_lora_v1_000000500.safetensors


my_first_flux_lora_v1:  87%|████████▋ | 1749/2000 [54:48<08:26,  2.02s/it, lr: 1.0e-04 loss: 5.210e-01]
Generating Images:   0%|          | 0/10 [00:00<?, ?it/s][A
Generating Images:  10%|█         | 1/10 [00:21<03:14, 21.61s/it][A
Generating Images:  20%|██        | 2/10 [00:43<02:53, 21.67s/it][A
Generating Images:  30%|███       | 3/10 [01:05<02:31, 21.71s/it][A
Generating Images:  40%|████      | 4/10 [01:26<02:10, 21.78s/it][A
Generating Images:  50%|█████     | 5/10 [01:48<01:49, 21.82s/it][A
Generating Images:  60%|██████    | 6/10 [02:10<01:27, 21.80s/it][A
Generating Images:  70%|███████   | 7/10 [02:32<01:05, 21.79s/it][A
Generating Images:  80%|████████  | 8/10 [02:54<00:43, 21.82s/it][A
Generating Images:  90%|█████████ | 9/10 [03:16<00:21, 21.86s/it][A
Generating Images: 100%|██████████| 10/10 [03:37<00:00, 21.78s/it][A
my_first_flux_lora_v1:  87%|████████▋ | 1749/2000 [54:48<08:26,  2.02s/it, lr: 1.0e-04 loss: 5.210e-01]

Saving at step 1750


my_first_flux_lora_v1:  87%|████████▋ | 1749/2000 [54:50<08:26,  2.02s/it, lr: 1.0e-04 loss: 5.210e-01]

Saved to /teamspace/studios/this_studio/content/output/my_first_flux_lora_v1/optimizer.pt
Removing old save: /teamspace/studios/this_studio/content/output/my_first_flux_lora_v1/my_first_flux_lora_v1_000000750.safetensors


my_first_flux_lora_v1: 100%|█████████▉| 1999/2000 [1:03:59<00:02,  2.20s/it, lr: 1.0e-04 loss: 2.237e-01]
                                                                  


Saved to /teamspace/studios/this_studio/content/output/my_first_flux_lora_v1/optimizer.pt
CPU times: user 58min 49s, sys: 34min 48s, total: 1h 33min 38s
Wall time: 2h 3min 22s
