# Copyright

<PRE>
This notebook was created as part of the "UX Laboratory / BMEVITMMB14" class at
Budapest University of Technology and Economics, Hungary,
https://portal.vik.bme.hu/kepzes/targyak/VITMMB14/.

Any re-use or publication of any part of the notebook is only allowed with the
written consent of the authors.

2024 (c) András Béres
</PRE>

## Install the required packages:
* [gradio](https://www.gradio.app/docs): UI library with Python interface
* [accelerate](https://huggingface.co/docs/accelerate/index): Huggingface library for distributed training and inference
* [diffusers](https://huggingface.co/docs/diffusers/index): Diffusion model library from Huggingface supporting a range of generative diffusion models, pipelines and schedulers
* [controlnet_aux](https://github.com/huggingface/controlnet_aux): ControlNet-related utilities and auxiliary models

In [1]:
!pip install "gradio<4.0" accelerate diffusers controlnet_aux "numpy<2.0"



In [2]:
import random
import numpy as np
import cv2
import torch
import gradio as gr
import controlnet_aux
import PIL.Image
from diffusers import (
    ControlNetModel,
    DiffusionPipeline,
    StableDiffusionControlNetPipeline,
    UniPCMultistepScheduler,
)



In [3]:
MAX_NUM_IMAGES = 5
DEFAULT_NUM_IMAGES = 3
MAX_IMAGE_RESOLUTION = 768
DEFAULT_IMAGE_RESOLUTION = 768

MAX_SEED = np.iinfo(np.int32).max

In [4]:
# utility function for image resizing
def resize_image(input_image, resolution, interpolation=None):
    H, W, C = input_image.shape
    H = float(H)
    W = float(W)
    k = float(resolution) / max(H, W)
    H *= k
    W *= k
    H = int(np.round(H / 64.0)) * 64
    W = int(np.round(W / 64.0)) * 64
    # area interpolation for downsizing, lanczos for upsizing
    if interpolation is None:
        interpolation = cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA
    img = cv2.resize(input_image, (W, H), interpolation=interpolation)
    return img

In [5]:
device = "cuda"
task_name = "scribble"

# using srcibble-based ControlNet with Stable Diffusion 1.5
base_model_id = "runwayml/stable-diffusion-v1-5"
model_id = "lllyasviel/control_v11p_sd15_scribble"

# instantiate the model and its pipeline form diffusers
controlnet = ControlNetModel.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
    base_model_id, controlnet=controlnet, torch_dtype=torch.float16
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe.to(device)

# image generation from scribble input
# using torch.inference_mode to disable gradient tracking
@torch.inference_mode()
def process_scribble_interactive(
    image_and_mask: dict[str, np.ndarray],
    prompt: str,
    additional_prompt: str,
    negative_prompt: str,
    num_images: int,
    image_resolution: int,
    num_steps: int,
    guidance_scale: float,
    seed: int,
) -> list[PIL.Image.Image]:
    if image_and_mask is None:
        raise ValueError
    if image_resolution > MAX_IMAGE_RESOLUTION:
        raise ValueError
    if num_images > MAX_NUM_IMAGES:
        raise ValueError

    image = image_and_mask["mask"]
    image = controlnet_aux.util.HWC3(image)
    image = resize_image(image, resolution=image_resolution)
    control_image = PIL.Image.fromarray(image)

    if not prompt:
        prompt = additional_prompt
    else:
        prompt = f"{prompt}, {additional_prompt}"

    generator = torch.Generator().manual_seed(seed)
    results = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        guidance_scale=guidance_scale,
        num_images_per_prompt=num_images,
        num_inference_steps=num_steps,
        generator=generator,
        image=control_image,
    ).images
    return [control_image] + results

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/999 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/1.45G [00:00<?, ?B/s]

model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.72k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [6]:
# random seed utility
def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    return seed


# create Gradio-based user interface
# based on: https://huggingface.co/spaces/hysts/ControlNet
def create_demo(process):
    # UI components

    with gr.Blocks() as demo:
        white_image = np.full(
            shape=(DEFAULT_IMAGE_RESOLUTION, DEFAULT_IMAGE_RESOLUTION, 3),
            fill_value=255,
            dtype=np.uint8,
        )
        with gr.Row():
            gr.Markdown("# Image Generation Tool")
        with gr.Row():
            image = gr.Image(tool="sketch", brush_radius=10, label="Draw",value= white_image)
        with gr.Row():
            prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")
        with gr.Row():
            a_prompt = gr.Textbox(label="Additional Prompt")
            n_prompt = gr.Textbox(label="Negative Prompt")
        with gr.Row():
            num_samples = gr.Slider(label="Number of Images", minimum=1, maximum=MAX_NUM_IMAGES, value=DEFAULT_NUM_IMAGES, step=1)
            image_resolution = gr.Slider(label="Image Resolution", minimum=256, maximum=MAX_IMAGE_RESOLUTION, value=DEFAULT_IMAGE_RESOLUTION, step=256)
        with gr.Row():
            num_steps = gr.Slider(label="Number of Steps", minimum=1, maximum=100, value=1, step=1)
            guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=1.0, step=0.1)
        with gr.Row():
            seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
            randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
        with gr.Row():
            run_button = gr.Button("Generate Image")
        with gr.Row():
            result = gr.Gallery(label="Output", columns=2, object_fit="scale-down")

               # UI behaviour
        inputs = [
            image,
            prompt,
            a_prompt,
            n_prompt,
            num_samples,
            image_resolution,
            num_steps,
            guidance_scale,
            seed,
        ]
        prompt.submit(
            fn=randomize_seed_fn,
            inputs=[seed, randomize_seed],
            outputs=seed,
            queue=False,
            api_name=False,
        ).then(
            fn=process,
            inputs=inputs,
            outputs=result,
            api_name=False,
        )
        run_button.click(
            fn=randomize_seed_fn,
            inputs=[seed, randomize_seed],
            outputs=seed,
            queue=False,
            api_name=False,
        ).then(
            fn=process,
            inputs=inputs,
            outputs=result,
        )
    return demo


demo = create_demo(process_scribble_interactive)
demo.queue().launch(debug=True)  # 40 s

# Hint: test the demo using the public URL given in the output of this cell

IMPORTANT: You are using gradio version 3.50.2, however version 4.44.1 is available, please upgrade.
--------
Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://79b7faa211121dab1d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://79b7faa211121dab1d.gradio.live


