Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions demo/Diffusion/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ This demo application ("demoDiffusion") showcases the acceleration of Stable Dif
### Clone the TensorRT OSS repository

```bash
git clone git@github.com:NVIDIA/TensorRT.git -b release/10.13 --single-branch
git clone git@github.com:NVIDIA/TensorRT.git -b release/sd35 --single-branch
cd TensorRT
```

Expand Down Expand Up @@ -210,7 +210,7 @@ Run the command below to generate an image using Stable Diffusion 3 and Stable D
python3 demo_txt2img_sd3.py "A vibrant street wall covered in colorful graffiti, the centerpiece spells \"SD3 MEDIUM\", in a storm of colors" --version sd3 --hf-token=$HF_TOKEN

# Stable Diffusion 3.5-medium
python3 demo_txt2img_sd35.py "a beautiful photograph of Mt. Fuji during cherry blossom" --version=3.5-medium --denoising-steps=30 --guidance-scale 3.5 --hf-token=$HF_TOKEN --bf16
python3 demo_txt2img_sd35.py "a beautiful photograph of Mt. Fuji during cherry blossom" --version=3.5-medium --denoising-steps=30 --guidance-scale 3.5 --hf-token=$HF_TOKEN --bf16 --download-onnx-models

# Stable Diffusion 3.5-large
python3 demo_txt2img_sd35.py "a beautiful photograph of Mt. Fuji during cherry blossom" --version=3.5-large --denoising-steps=30 --guidance-scale 3.5 --hf-token=$HF_TOKEN --bf16 --download-onnx-models
Expand All @@ -234,13 +234,13 @@ Note that a denosing-percentage is applied to the number of denoising-steps when

```bash
# Depth
python3 demo_controlnet_sd35.py "a photo of a man" --controlnet-type depth --hf-token=$HF_TOKEN --denoising-steps 40 --guidance-scale 4.5 --bf16
python3 demo_controlnet_sd35.py "a photo of a man" --controlnet-type depth --hf-token=$HF_TOKEN --denoising-steps 40 --guidance-scale 4.5 --bf16 --download-onnx-models

# Canny
python3 demo_controlnet_sd35.py "A Night time photo taken by Leica M11, portrait of a Japanese woman in a kimono, looking at the camera, Cherry blossoms" --controlnet-type canny --hf-token=$HF_TOKEN --denoising-steps 60 --guidance-scale 3.5 --bf16
python3 demo_controlnet_sd35.py "A Night time photo taken by Leica M11, portrait of a Japanese woman in a kimono, looking at the camera, Cherry blossoms" --controlnet-type canny --hf-token=$HF_TOKEN --denoising-steps 60 --guidance-scale 3.5 --bf16 --download-onnx-models

# Blur
python3 demo_controlnet_sd35.py "generated ai art, a tiny, lost rubber ducky in an action shot close-up, surfing the humongous waves, inside the tube, in the style of Kelly Slater" --controlnet-type blur --hf-token=$HF_TOKEN --denoising-steps 60 --guidance-scale 3.5 --bf16
python3 demo_controlnet_sd35.py "generated ai art, a tiny, lost rubber ducky in an action shot close-up, surfing the humongous waves, inside the tube, in the style of Kelly Slater" --controlnet-type blur --hf-token=$HF_TOKEN --denoising-steps 60 --guidance-scale 3.5 --bf16 --download-onnx-models
```

### Generate a video guided by an initial image using Stable Video Diffusion
Expand Down
67 changes: 28 additions & 39 deletions demo/Diffusion/demo_controlnet_sd35.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def parseArgs():
parser.add_argument(
"--max-sequence-length",
type=int,
default=77,
default=256,
help="Maximum sequence length to use with the prompt.",
)
parser.add_argument(
Expand All @@ -55,17 +55,15 @@ def parseArgs():
)
parser.add_argument(
"--controlnet-type",
nargs="+",
type=str,
default=["canny"],
help="Controlnet type, can be `None`, `str` or `str` list from ['canny', 'depth', 'blur']",
default="canny",
help="Controlnet type (single type only), can be 'canny', 'depth', 'blur', etc.",
)
parser.add_argument(
"--controlnet-scale",
nargs="+",
type=float,
default=[1.0],
help="The outputs of the controlnet are multiplied by `controlnet_scale` before they are added to the residual in the original unet, can be `None`, `float` or `float` list",
default=1.0,
help="The outputs of the controlnet are multiplied by `controlnet_scale` before they are added to the residual in the original Transformer",
)
return parser.parse_args()

Expand Down Expand Up @@ -99,48 +97,39 @@ def process_demo_args(args):
)

# Controlnet configuration
if not isinstance(args.controlnet_type, list):
raise ValueError(
f"`--controlnet-type` must be of type `str` or `str` list, but is {type(args.controlnet_type)}"
)
if not isinstance(args.controlnet_type, str):
raise ValueError(f"`--controlnet-type` must be of type `str`, but is {type(args.controlnet_type)}")

# Controlnet configuration
if not isinstance(args.controlnet_scale, list):
raise ValueError(
f"`--controlnet-scale`` must be of type `float` or `float` list, but is {type(args.controlnet_scale)}"
)

# Check number of ControlNets to ControlNet scales
if len(args.controlnet_type) != len(args.controlnet_scale):
raise ValueError(
f"Numbers of ControlNets {len(args.controlnet_type)} should be equal to number of ControlNet scales {len(args.controlnet_scale)}."
)
if not isinstance(args.controlnet_scale, float):
raise ValueError(f"`--controlnet-scale` must be of type `float`, but is {type(args.controlnet_scale)}")

# Convert controlnet scales to tensor
controlnet_scale = torch.FloatTensor(args.controlnet_scale)
controlnet_scale = torch.FloatTensor([args.controlnet_scale])

# Check images
input_images = []
if len(args.control_image) > 0:
for image in args.control_image:
input_images.append(Image.open(image))
else:
for controlnet in args.controlnet_type:
if controlnet == "canny":
canny_image = image_module.download_image("https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/canny.png")
input_images.append(canny_image.resize((args.height, args.width)))
elif controlnet == "depth":
depth_image = image_module.download_image(
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/marigold/marigold_einstein_lcm_depth.png"
)
input_images.append(depth_image.resize((args.height, args.width)))
elif controlnet == "blur":
blur_image = image_module.download_image(
"https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/blur.png"
)
input_images.append(blur_image.resize((args.height, args.width)))
else:
raise ValueError(f"You should implement the conditonal image of this controlnet: {controlnet}")
if args.controlnet_type == "canny":
canny_image = image_module.download_image(
"https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/canny.png"
)
input_images.append(canny_image.resize((args.height, args.width)))
elif args.controlnet_type == "depth":
depth_image = image_module.download_image(
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/marigold/marigold_einstein_lcm_depth.png"
)
input_images.append(depth_image.resize((args.height, args.width)))
elif args.controlnet_type == "blur":
blur_image = image_module.download_image(
"https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/blur.png"
)
input_images.append(blur_image.resize((args.height, args.width)))
else:
raise ValueError(f"You should implement the conditonal image of this controlnet: {args.controlnet_type}")
assert len(input_images) > 0

kwargs_run_demo = {
Expand All @@ -149,7 +138,7 @@ def process_demo_args(args):
"height": args.height,
"width": args.width,
"control_image": input_images,
"controlnet_scales": controlnet_scale,
"controlnet_scale": controlnet_scale,
"batch_count": args.batch_count,
"num_warmup_runs": args.num_warmup_runs,
"use_cuda_graph": args.use_cuda_graph,
Expand Down
4 changes: 2 additions & 2 deletions demo/Diffusion/demo_diffusion/dd_argparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def process_pipeline_args(args: argparse.Namespace) -> Tuple[Dict[str, Any], Dic
# int8 support
if args.int8 and not any(args.version.startswith(prefix) for prefix in ("xl", "1.4", "1.5", "2.1")):
raise ValueError("int8 quantization is only supported for SDXL, SD1.4, SD1.5 and SD2.1 pipelines.")

# fp8 support validation
if args.fp8:
# Check version compatibility
Expand Down Expand Up @@ -339,7 +339,7 @@ def process_pipeline_args(args: argparse.Namespace) -> Tuple[Dict[str, Any], Dic
raise ValueError(
"Native FP8 quantization is not supported for SD3.5-large. Please pass --download-onnx-models."
)

# TensorRT ModelOpt quantization level
if args.quantization_level == 0.0:
def override_quant_level(level: float, dtype_str: str):
Expand Down
4 changes: 2 additions & 2 deletions demo/Diffusion/demo_diffusion/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
FluxTransformerModel,
SD3_MMDiTModel,
SD3TransformerModel,
SD3TransformerModelControlNet,
)
from demo_diffusion.model.controlnet import SD3ControlNet
from demo_diffusion.model.gan import VQGANModel
from demo_diffusion.model.load import unload_torch_model
from demo_diffusion.model.lora import FLUXLoraLoader, SDLoraLoader, merge_loras
Expand Down Expand Up @@ -71,7 +71,7 @@
"SD3_MMDiTModel",
"FluxTransformerModel",
"SD3TransformerModel",
"SD3TransformerModelControlNet",
"SD3ControlNet",
# gan
"VQGANModel",
# lora
Expand Down
2 changes: 2 additions & 0 deletions demo/Diffusion/demo_diffusion/model/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def __init__(
bf16=False,
int8=False,
fp8=False,
fp4=False,
max_batch_size=16,
text_maxlen=77,
embedding_dim=768,
Expand All @@ -63,6 +64,7 @@ def __init__(
self.bf16 = bf16
self.int8 = int8
self.fp8 = fp8
self.fp4 = fp4

self.compression_factor = compression_factor
self.min_batch = 1
Expand Down
Loading