# 5.2. ControlNet
## Dreambooth + ControlNet

Controlling image diffusion models by conditioning the model with an additional input image

In [None]:
%env HF_HOME=/cluster/user/ehoemmen/.cache
%env HF_DATASETS_CACHE=/cluster/user/ehoemmen/.cache

In [None]:
!pip install diffusers --upgrade -q
!pip install opencv-python transformers mediapipe matplotlib accelerate -q

In [None]:
from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL, UniPCMultistepScheduler
from diffusers.utils import load_image
import numpy as np
import torch

import cv2
from PIL import Image

In [None]:
# create grid
from PIL import Image

#Image Grid
def image_grid(imgs, rows, cols):
    assert len(imgs) == rows*cols

    w, h = imgs[0].size
    grid = Image.new('RGB', size=(cols*w, rows*h))
    grid_w, grid_h = grid.size
    
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

In [None]:
# initialize the models and pipeline
controlnet_conditioning_scale = 0.5  # recommended for good generalization
controlnet = ControlNetModel.from_pretrained(
    "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16, cache_dir="/cluster/user/ehoemmen/.cache",
)
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, cache_dir="/cluster/user/ehoemmen/.cache",
)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",  controlnet=controlnet, vae=vae, torch_dtype=torch.float16, cache_dir="/cluster/user/ehoemmen/.cache",
)

#pipe.load_lora_weights("erikhsos/cbbier_02-3-images_LoRA_lr1-4_500")
# pipe.load_lora_weights("erikhsos/cbbier_06-15-images_LoRA_lr1-4_2000")
pipe.load_lora_weights("erikhsos/nesquik_15-images_LoRA_lr1-4_2000")


pipe.enable_model_cpu_offload()
#pipe.enable_sequential_cpu_offload()

In [None]:
#unload lora weights

pipe.unload_lora_weights()

In [None]:
#load Original Image

image = load_image(
 '/cluster/user/ehoemmen/development/tests_sonstiges/05_Masterarbeit/03_inpainting/campusbier_input.png'   #enter path
)

image

In [None]:
num_images = 4

# Liste der gewünschten Farben
colors = ["light green", "light blue", "olive", "grey"]

# Erstelle einen individuellen Prompt für jede Farbe
prompts = [f"a [CB] bottle photo with a {color} label with the text CAMPUSBIER" for color in colors]
neg_prompt="green label, brown bottle"

# Lade das Originalbild
original_image = load_image(
   '/cluster/user/ehoemmen/development/tests_sonstiges/05_Masterarbeit/03_inpainting/campusbier_input.png'
)

# Erzeuge ein Canny-Edge-Bild
image = np.array(original_image)
image = cv2.Canny(image, 100, 200)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
canny_image = Image.fromarray(image)

generator = torch.manual_seed(493)

# Generiere Bilder basierend auf den individuellen Prompts
generated_images = pipe(
    prompts,
    negative_prompt=neg_prompt,
    num_inference_steps=25,
    controlnet_conditioning_scale=controlnet_conditioning_scale, 
    image=canny_image,
    generator=generator
).images

# Erzeuge ein Raster von den generierten Bildern
grid = image_grid(generated_images, rows=1, cols=num_images)

grid