<a href="https://colab.research.google.com/github/MichaelPaulukonis/notebooks/blob/main/IllusionDiffusionGenerator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#How to use this Illusion Diffusion Generator
Credits to the original app at [HugginFace](https://huggingface.co/spaces/AP123/IllusionDiffusion)

To use this notebook, click the play button next to each section.
1. The first one is to get all the packages, models and defining functions later used in the notebook.
2. The second section is where you upload your original image.
3. In this part if you dont want to meess around the settings just set the `prompt` to the image you want to generate and leave the rest as it is.
4. Finally the last section will generate the image.

In [None]:
# @title Installing and Importing the Packages. Then Function Definitions
# @markdown ###Will take some time to run. So please be patient.
!pip install diffusers --quiet
!pip install transformers --quiet
!pip install accelerate --quiet
!pip install xformers --quiet
!pip install gradio --quiet
!pip install Pillow --quiet
!pip install qrcode --quiet
!pip install filelock --quiet
!pip install https://gradio-builds.s3.amazonaws.com/52ceac5ecd12fa0990273dcb69c899abfb9c6a27/gradio-3.45.1-py3-none-any.whl --quiet
!pip install torch --extra-index-url https://download.pytorch.org/whl/cu118 --quiet

import torch
import gradio as gr
from gradio import processing_utils, utils
from PIL import Image
import random
from diffusers import (
    DiffusionPipeline,
    AutoencoderKL,
    StableDiffusionControlNetPipeline,
    ControlNetModel,
    StableDiffusionLatentUpscalePipeline,
    StableDiffusionImg2ImgPipeline,
    StableDiffusionControlNetImg2ImgPipeline,
    DPMSolverMultistepScheduler,  # <-- Added import
    EulerDiscreteScheduler  # <-- Added import
)
import time

BASE_MODEL = "SG161222/Realistic_Vision_V5.1_noVAE"

vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
#init_pipe = DiffusionPipeline.from_pretrained("SG161222/Realistic_Vision_V5.1_noVAE", torch_dtype=torch.float16)
controlnet = ControlNetModel.from_pretrained("monster-labs/control_v1p_sd15_qrcode_monster", torch_dtype=torch.float16)#, torch_dtype=torch.float16)
main_pipe = StableDiffusionControlNetPipeline.from_pretrained(
    BASE_MODEL,
    controlnet=controlnet,
    vae=vae,
    safety_checker=None,
    torch_dtype=torch.float16,
).to("cuda")

image_pipe = StableDiffusionControlNetImg2ImgPipeline(**main_pipe.components)

SAMPLER_MAP = {
    "DPM++ Karras SDE": lambda config: DPMSolverMultistepScheduler.from_config(config, use_karras=True, algorithm_type="sde-dpmsolver++"),
    "Euler": lambda config: EulerDiscreteScheduler.from_config(config),
}

# @title Function Definitions
def inference(
    control_image: Image.Image,
    prompt: str,
    negative_prompt: str,
    guidance_scale: float = 8.0,
    controlnet_conditioning_scale: float = 1,
    control_guidance_start: float = 1,
    control_guidance_end: float = 1,
    upscaler_strength: float = 0.5,
    seed: int = -1,
    sampler = "DPM++ Karras SDE",
    progress = gr.Progress(track_tqdm=True)
):
    start_time = time.time()
    start_time_struct = time.localtime(start_time)
    start_time_formatted = time.strftime("%H:%M:%S", start_time_struct)
    print(f"Inference started at {start_time_formatted}")

    # Generate the initial image
    #init_image = init_pipe(prompt).images[0]

    # Rest of your existing code
    control_image_small = center_crop_resize(control_image)
    control_image_large = center_crop_resize(control_image, (1024, 1024))

    main_pipe.scheduler = SAMPLER_MAP[sampler](main_pipe.scheduler.config)
    my_seed = random.randint(0, 2**32 - 1) if seed == -1 else seed
    generator = torch.Generator(device="cuda").manual_seed(my_seed)

    out = main_pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        image=control_image_small,
        guidance_scale=float(guidance_scale),
        controlnet_conditioning_scale=float(controlnet_conditioning_scale),
        generator=generator,
        control_guidance_start=float(control_guidance_start),
        control_guidance_end=float(control_guidance_end),
        num_inference_steps=15,
        output_type="latent"
    )
    upscaled_latents = upscale(out, "nearest-exact", 2)
    out_image = image_pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        control_image=control_image_large,
        image=upscaled_latents,
        guidance_scale=float(guidance_scale),
        generator=generator,
        num_inference_steps=20,
        strength=upscaler_strength,
        control_guidance_start=float(control_guidance_start),
        control_guidance_end=float(control_guidance_end),
        controlnet_conditioning_scale=float(controlnet_conditioning_scale)
    )
    end_time = time.time()
    end_time_struct = time.localtime(end_time)
    end_time_formatted = time.strftime("%H:%M:%S", end_time_struct)
    print(f"Inference ended at {end_time_formatted}, taking {end_time-start_time}s")
    return out_image["images"][0], gr.update(visible=True), gr.update(visible=True), my_seed

def center_crop_resize(img, output_size=(512, 512)):
    width, height = img.size

    # Calculate dimensions to crop to the center
    new_dimension = min(width, height)
    left = (width - new_dimension)/2
    top = (height - new_dimension)/2
    right = (width + new_dimension)/2
    bottom = (height + new_dimension)/2

    # Crop and resize
    img = img.crop((left, top, right, bottom))
    img = img.resize(output_size)

    return img

def common_upscale(samples, width, height, upscale_method, crop=False):
        if crop == "center":
            old_width = samples.shape[3]
            old_height = samples.shape[2]
            old_aspect = old_width / old_height
            new_aspect = width / height
            x = 0
            y = 0
            if old_aspect > new_aspect:
                x = round((old_width - old_width * (new_aspect / old_aspect)) / 2)
            elif old_aspect < new_aspect:
                y = round((old_height - old_height * (old_aspect / new_aspect)) / 2)
            s = samples[:,:,y:old_height-y,x:old_width-x]
        else:
            s = samples

        return torch.nn.functional.interpolate(s, size=(height, width), mode=upscale_method)

def upscale(samples, upscale_method, scale_by):
        #s = samples.copy()
        width = round(samples["images"].shape[3] * scale_by)
        height = round(samples["images"].shape[2] * scale_by)
        s = common_upscale(samples["images"], width, height, upscale_method, "disabled")
        return (s)

def check_inputs(prompt: str, control_image: Image.Image):
    if control_image is None:
        raise gr.Error("Please select or upload an Input Illusion")
    if prompt is None or prompt == "":
        raise gr.Error("Prompt is required")

def convert_to_pil(base64_image):

    if not hasattr(processing_utils, 'decode_base64_to_image'):
        raise AttributeError("processing_utils module does not have decode_base64_to_image function")

    if not isinstance(base64_image, str) or not base64_image.startswith("data:image"):
        raise ValueError("base64_image should be a valid base64-encoded image")

    pil_image = processing_utils.decode_base64_to_image(base64_image)
    return pil_image


def convert_to_base64(pil_image):
    base64_image = processing_utils.encode_pil_to_base64(pil_image)
    return base64_image

def get_image_input(image_path):
    # Open and return the image object
    try:
        img = Image.open(image_path)
        return img
    except FileNotFoundError:
        print("Image not found at the specified path.")
        return None
    except Exception as e:
        print("An error occurred:", str(e))
        return None

def save_image_as_file(pil_image, file_path):
    pil_image.save(file_path)

In [None]:
# @title Upload Original Image
from google.colab import files
uploaded = files.upload()

In [None]:
# @title Parameters (leave at default if dont know what to do)
prompt = "a small-town baseball game in the 1890s, very detailed painting by Norman Rockwell" # @param {type:"string"}
negative_prompt = "low quality image,bad, bad anatomy, too many limbs, too many fingers, too many legs, malformed" # @param {type:"string"}
guidance_scale = 8 # @param {type:"slider", min:0, max:50, step:0.25}
controlnet_conditioning_scale = 1 # @param {type:"slider", min:0, max:5, step:0.05}
control_guidance_start = 0.3 # @param {type:"slider", min:0, max:1, step:0.1}
control_guidance_end = 1 # @param {type:"slider", min:0, max:1, step:0.1}
upscaler_strength = 1 # @param {type:"slider", min:0, max:1, step:0.1}

In [None]:
# @title Generate Output Image
from google.colab import files

seed = random.randint(1,9999999999)
image_path = list(uploaded.keys())[0]
state_img_input = get_image_input(image_path)
s = state_img_input.size
ratio = 600/s[0]
display(state_img_input.resize((int(s[0]*ratio), int(s[1]*ratio)), Image.ANTIALIAS))
check_inputs(prompt,state_img_input)
state_img_output, result_image, share_group, used_seed=inference(state_img_input,prompt, negative_prompt, guidance_scale, controlnet_conditioning_scale, control_guidance_start, control_guidance_end, upscaler_strength, seed, "DPM++ Karras SDE")
result_image = convert_to_base64(state_img_output)
pil_image = convert_to_pil(result_image)
display(pil_image.resize((int(s[0]*ratio), int(s[1]*ratio)), Image.ANTIALIAS))

save_image_as_file(pil_image,"output.png")

files.download('output.png')