In [1]:
import os


## Install the latest diffusers

In [None]:
!pip install git+https://github.com/huggingface/diffusers

In [None]:
import torch
from diffusers import FluxKontextPipeline, QwenImageEditPipeline
from PIL import Image

  from .autonotebook import tqdm as notebook_tqdm
`flash-attn` is not available or the version is too old. Please install `flash-attn>=2.6.3`.
`sageattention` is not available or the version is too old. Please install `sageattention>=2.1.1`.


In [None]:
# Enable any one

mode = "MANUAL"      # Use your own instructions, and image
# mode = "AUTO"        # Use instructions, and image from the folder

output_dir = "outputs/"

if not os.path.exists(output_dir):
  os.makedirs(output_dir)

# For manual
instructions = [
  "Add character to the image.",
]

base_images = [
  "assets/scene_01.png",
]

# For auto
default_instruction = "Add character to the image."

# This folder has an image and a text file with instructions, with the same name
base_src = "example_data/"

if mode == "AUTO":
  # load all images from the folder
  base_images_name = [f for f in os.listdir(base_src) if f.endswith(('.png', '.jpg', '.jpeg', '.webp'))]
  instructions = []
  base_images = []
  for base_image_name in base_images_name:
    name = base_image_name.split(".")[0]
    base_images.append(os.path.join(base_src, base_image_name))
    _instruction = default_instruction
    if os.path.exists(os.path.join(base_src, f"{name}.txt")):
      with open(os.path.join(base_src, f"{name}.txt"), "r") as f:
        _instruction = f.read()
    instructions.append(_instruction)
  

In [10]:
print(f"Mode: {mode}")
print(f"Total images: {len(base_images)}")

Mode: MANUAL
Total images: 1


In [None]:
pipe = FluxKontextPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16
)
pipe = pipe.to("cuda")

Loading pipeline components...:   0%|                                                 | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards:   0%|                                                      | 0/3 [00:00<?, ?it/s][A
Loading checkpoint shards: 100%|██████████████████████████████████████████████| 3/3 [00:00<00:00, 28.23it/s][A
Loading pipeline components...:  29%|███████████▋                             | 2/7 [00:00<00:00,  5.18it/s]You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
Loading pipeline components...:  86%|███████████████████████████████████▏     | 6/7 [00:00<00:00, 10.27it/s]
Loading checkpoint shards:   0%|                                                      | 0/2 [00:00<?, ?it/s][A
Loading checkpoint shards:  50%|███████████████████████                       | 1/2 [00:00<00:00,  6.15it/s][A
Loading checkpoint shards: 100%|██████████████████████████████████████████████| 2/2 [00:00<00:00,  6.44it/s][A
Loading pipeline componen

In [None]:
result_img = None
model = "flux"
i = 0
for image_path, instruction in zip(base_images, instructions):
    i += 1
    image = Image.open(image_path).convert("RGB")
    width, height = image.size

    seed = torch.Generator().manual_seed(42)

    result_img = pipe(
        prompt=instruction,
        image=image,
        num_inference_steps=14,
        height=height,
        width=width,
        generator=seed,
    ).images[0]

    result_img.save(f"{output_dir}/{i:03d}_{model}.png")

print("Last image")
result_img

100%|███████████████████████████████████████████████████████████████████████| 14/14 [00:20<00:00,  1.45s/it]


In [None]:
del pipe
torch.cuda.empty_cache()

In [None]:
pipe = QwenImageEditPipeline.from_pretrained(
    "Qwen/Qwen-Image-Edit",
    torch_dtype=torch.bfloat16,
)
print("pipeline loaded")
pipe.to("cuda")
pipe.set_progress_bar_config(disable=None)



In [None]:
model = "qwen"
i = 0

for image_path, instruction in zip(base_images, instructions):
    i += 1
    image = Image.open(image_path).convert("RGB")

    seed = torch.Generator().manual_seed(42)

    output_image = pipe(
        image=image,
        prompt=instruction,
        generator=seed,
        true_cfg_scale=4.0,
        negative_prompt=" ",
        num_inference_steps=14,
    ).images[0]

    output_image.save(f"{output_dir}/{i:03d}_{model}.png")

print("Last image")
output_image

In [None]:
del pipe
torch.cuda.empty_cache()