In [None]:
#Available modes Pose, Depth, HED, Canny, Seg
import cv2
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
from diffusers.utils import load_image
import numpy as np
import torch
from transformers import AutoImageProcessor, UperNetForSemanticSegmentation

controlnet_libs = {
    'pose': "lllyasviel/sd-controlnet-openpose",
    'scribble': "lllyasviel/sd-controlnet-scribble",
    'canny': "lllyasviel/sd-controlnet-canny",
    'hed': "lllyasviel/sd-controlnet-hed",
    'depth': "lllyasviel/sd-controlnet-depth"
}

def load_pipeline(mode_1, mode_2,device='cuda'):
    controlnet_1 = ControlNetModel.from_pretrained(controlnet_libs[mode_1])
    controlnet_2 = ControlNetModel.from_pretrained(controlnet_libs[mode_2])

    pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", controlnet=controlnet_2
    )

    pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

    pipe.controlnet1=controlnet_1.to(device)
    pipe.controlnet2= controlnet_2.to(device)

    pipe=pipe.to(device)
    pipe.enable_xformers_memory_efficient_attention()

    return pipe


mode_1 = 'pose'
mode_2 = 'depth'


pipe= load_pipeline(mode_1, mode_2)

prompt = "Spiderman waving from times square"
prompts=[prompt]
negative_prompts=[" monochrome, bad anatomy, lowres,  worst quality, low quality"]


In [None]:
from img_process_utils import find_mode

input_type = "image" # or "mode"

if input_type=="image": 
    image1=load_image("./test_images/luffy.jpg")
    image2=load_image("./test_images/times.jpeg")
    control1 = find_mode(mode_1,image1)
    control2 = find_mode(mode_2,image2)
else:
    control1=load_image("./test_images/luffy_pose.png")
    control2=load_image("./test_images/posecanny.png")


In [None]:
import ptp_utils_max_merge as ptp_utils
def run_and_display(prompts, negative_prompts, latent=None, run_baseline=False, generator=None, control1=None,control2=None,  results_dir='results'):

    images, x_t = ptp_utils.text2image_ldm_stable(pipe, prompts, negative_prompts, latent=latent, num_inference_steps=50, guidance_scale=7.5, generator=generator, control1=control1,control2=control2,low_resource=False)

    ptp_utils.view_images(images,results_dir=results_dir)
    return images, x_t


seed=1024
g_cpu = torch.Generator().manual_seed(seed)

latent = torch.randn(
                (1, 4,512 // 8, 512 // 8),
                generator=g_cpu,
            )

image, x_t = run_and_display(prompts,negative_prompts, latent=latent , run_baseline=False, generator=g_cpu, control1= control1, control2 = control2)
