## ControlNet
* ControlNet is a type of model for controlling image diffusion models by conditioning the model with an additional input image. There are many types of conditioning inputs (canny edge, user sketching, human pose, depth map, M-LSD, HED, ADE20K) you can use to control a diffusion model. This is hugely useful because it affords you greater control over image generation, making it easier to generate specific images without experimenting with different text prompts or denoising values as much.

* [Arxiv](https://arxiv.org/pdf/2302.05543)

In [None]:
import torch
from torch import nn
import intel_extension_for_pytorch as ipex
from diffusers.utils import load_image, make_image_grid
from PIL import Image
import cv2
import numpy as np
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler

In [None]:
original_image = load_image(
    "https://img.freepik.com/premium-photo/house-outline-illustration-white-background_1112329-31710.jpg?w=1480"
).resize((1024, 1024))

def optimize_pipeline(pipeline):
    """
    Optimizes the model for inference using ipex.

    Parameters:
    - pipeline: The model pipeline to be optimized.

    Returns:
    - pipeline: The optimized model pipeline.
    """

    for attr in dir(pipeline):
        try:
            if isinstance(getattr(pipeline, attr), nn.Module):
                setattr(
                    pipeline,
                    attr,
                    ipex.optimize(
                        getattr(pipeline, attr).eval(),
                        dtype=pipeline.text_encoder.dtype,
                        inplace=True,
                    ),
                )
        except AttributeError:
            pass
    return pipeline

image = np.array(original_image)

low_threshold = 100
high_threshold = 200

image = cv2.Canny(image, low_threshold, high_threshold)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
canny_image = Image.fromarray(image)

image_tensor = torch.tensor(np.array(canny_image)).to("xpu")
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.bfloat16, use_safetensors=True)

pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.bfloat16, use_safetensors=True
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("xpu")
pipe = optimize_pipeline(pipe)
# pipe.enable_xformers_memory_efficient_attention()
with torch.inference_mode():
    with torch.xpu.amp.autocast(enabled=True, dtype=torch.bfloat16):
        output = pipe(
            "Colorful", image=canny_image
        ).images[0]

image_grid = make_image_grid([original_image, canny_image, output], rows=1, cols=3)
#image_grid = image_grid.to("xpu")
image_grid_np = np.array(image_grid)
cv2.imwrite("house.png", image_grid_np)