Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions docs/source/en/api/pipelines/flux.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,35 @@ image = pipe(
image.save("output.png")
```

Canny Control is also possible with a LoRA variant of this condition. The usage is as follows:

```python
# !pip install -U controlnet-aux
import torch
from controlnet_aux import CannyDetector
from diffusers import FluxControlPipeline
from diffusers.utils import load_image

pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16).to("cuda")
pipe.load_lora_weights("black-forest-labs/FLUX.1-Canny-dev-lora")

prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")

processor = CannyDetector()
control_image = processor(control_image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024)

image = pipe(
prompt=prompt,
control_image=control_image,
height=1024,
width=1024,
num_inference_steps=50,
guidance_scale=30.0,
).images[0]
image.save("output.png")
```

### Depth Control

**Note:** `black-forest-labs/Flux.1-Depth-dev` is _not_ a ControlNet model. [`ControlNetModel`] models are a separate component from the UNet/Transformer whose residuals are added to the actual underlying model. Depth Control is an alternate architecture that achieves effectively the same results as a ControlNet model would, by using channel-wise concatenation with input control condition and ensuring the transformer learns structure control by following the condition as closely as possible.
Expand Down Expand Up @@ -174,6 +203,36 @@ image = pipe(
image.save("output.png")
```

Depth Control is also possible with a LoRA variant of this condition. The usage is as follows:

```python
# !pip install git+https://github.com/huggingface/image_gen_aux
import torch
from diffusers import FluxControlPipeline, FluxTransformer2DModel
from diffusers.utils import load_image
from image_gen_aux import DepthPreprocessor

pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16).to("cuda")
pipe.load_lora_weights("black-forest-labs/FLUX.1-Depth-dev-lora")

prompt = "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts."
control_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/robot.png")

processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
control_image = processor(control_image)[0].convert("RGB")

image = pipe(
prompt=prompt,
control_image=control_image,
height=1024,
width=1024,
num_inference_steps=30,
guidance_scale=10.0,
generator=torch.Generator().manual_seed(42),
).images[0]
image.save("output.png")
```

### Redux

* Flux Redux pipeline is an adapter for FLUX.1 base models. It can be used with both flux-dev and flux-schnell, for image-to-image generation.
Expand Down
3 changes: 1 addition & 2 deletions examples/cogvideo/train_cogvideox_image_to_video_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,10 +872,9 @@ def prepare_rotary_positional_embeddings(
crops_coords=grid_crops_coords,
grid_size=(grid_height, grid_width),
temporal_size=num_frames,
device=device,
)

freqs_cos = freqs_cos.to(device=device)
freqs_sin = freqs_sin.to(device=device)
return freqs_cos, freqs_sin


Expand Down
3 changes: 1 addition & 2 deletions examples/cogvideo/train_cogvideox_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,10 +894,9 @@ def prepare_rotary_positional_embeddings(
crops_coords=grid_crops_coords,
grid_size=(grid_height, grid_width),
temporal_size=num_frames,
device=device,
)

freqs_cos = freqs_cos.to(device=device)
freqs_sin = freqs_sin.to(device=device)
return freqs_cos, freqs_sin


Expand Down
36 changes: 23 additions & 13 deletions src/diffusers/image_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def denormalize(images: Union[np.ndarray, torch.Tensor]) -> Union[np.ndarray, to
`np.ndarray` or `torch.Tensor`:
The denormalized image array.
"""
return (images / 2 + 0.5).clamp(0, 1)
return (images * 0.5 + 0.5).clamp(0, 1)

@staticmethod
def convert_to_rgb(image: PIL.Image.Image) -> PIL.Image.Image:
Expand Down Expand Up @@ -537,6 +537,26 @@ def binarize(self, image: PIL.Image.Image) -> PIL.Image.Image:

return image

def _denormalize_conditionally(
self, images: torch.Tensor, do_denormalize: Optional[List[bool]] = None
) -> torch.Tensor:
r"""
Denormalize a batch of images based on a condition list.

Args:
images (`torch.Tensor`):
The input image tensor.
do_denormalize (`Optional[List[bool]`, *optional*, defaults to `None`):
A list of booleans indicating whether to denormalize each image in the batch. If `None`, will use the
value of `do_normalize` in the `VaeImageProcessor` config.
"""
if do_denormalize is None:
return self.denormalize(images) if self.config.do_normalize else images

return torch.stack(
[self.denormalize(images[i]) if do_denormalize[i] else images[i] for i in range(images.shape[0])]
)

def get_default_height_width(
self,
image: Union[PIL.Image.Image, np.ndarray, torch.Tensor],
Expand Down Expand Up @@ -752,12 +772,7 @@ def postprocess(
if output_type == "latent":
return image

if do_denormalize is None:
do_denormalize = [self.config.do_normalize] * image.shape[0]

image = torch.stack(
[self.denormalize(image[i]) if do_denormalize[i] else image[i] for i in range(image.shape[0])]
)
image = self._denormalize_conditionally(image, do_denormalize)

if output_type == "pt":
return image
Expand Down Expand Up @@ -966,12 +981,7 @@ def postprocess(
deprecate("Unsupported output_type", "1.0.0", deprecation_message, standard_warn=False)
output_type = "np"

if do_denormalize is None:
do_denormalize = [self.config.do_normalize] * image.shape[0]

image = torch.stack(
[self.denormalize(image[i]) if do_denormalize[i] else image[i] for i in range(image.shape[0])]
)
image = self._denormalize_conditionally(image, do_denormalize)

image = self.pt_to_numpy(image)

Expand Down
Loading