In [1]:
!pip install rembg controlnet_aux diffusers accelerate

Collecting rembg
  Downloading rembg-2.0.57-py3-none-any.whl (33 kB)
Collecting controlnet_aux
  Downloading controlnet_aux-0.0.9-py3-none-any.whl (282 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m282.4/282.4 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting diffusers
  Downloading diffusers-0.29.1-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m57.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.31.0-py3-none-any.whl (309 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.4/309.4 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
Collecting onnxruntime (from rembg)
  Downloading onnxruntime-1.18.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m70.8 MB/s[0m eta [36m0:00:00[0m
Collecting pymatting (from rembg)
  Downloading PyMat

In [2]:


import random
import requests
from io import BytesIO
import torch
from rembg import remove
from controlnet_aux import ZoeDetector
from PIL import Image, ImageOps
from diffusers import (
    AutoencoderKL,
    ControlNetModel,
    StableDiffusionXLControlNetPipeline,
    StableDiffusionXLInpaintPipeline,
)




def load_image(url):
    return Image.open(requests.get(url, stream=True).raw).convert("RGBA")


def remove_background(image):
    output = remove(image)
    return output

def scale_and_paste(original_image):
    aspect_ratio = original_image.width / original_image.height
    if original_image.width > original_image.height:
        new_width = 1024
        new_height = round(new_width / aspect_ratio)
    else:
        new_height = 1024
        new_width = round(new_height * aspect_ratio)

    new_width -= 20
    new_height -= 20

    resized_original = original_image.resize((new_width, new_height), Image.LANCZOS)
    white_background = Image.new("RGBA", (1024, 1024), "white")
    x = (1024 - new_width) // 2
    y = (1024 - new_height) // 2
    white_background.paste(resized_original, (x, y), resized_original)

    return resized_original, white_background


def generate_image(pipeline, prompt, negative_prompt, inpaint_image, zoe_image, seed=None):
    if seed is None:
        seed = random.randint(0, 2**32 - 1)
    generator = torch.Generator(device="cpu").manual_seed(seed)

    return pipeline(
        prompt,
        negative_prompt=negative_prompt,
        image=[inpaint_image, zoe_image],
        guidance_scale=6.5,
        num_inference_steps=25,
        generator=generator,
        controlnet_conditioning_scale=[0.5, 0.8],
        control_guidance_end=[0.9, 0.6],
    ).images[0]


def generate_outpaint(pipeline, prompt, negative_prompt, image, mask, seed=None):
    if seed is None:
        seed = random.randint(0, 2**32 - 1)
    generator = torch.Generator(device="cpu").manual_seed(seed)

    return pipeline(
        prompt,
        negative_prompt=negative_prompt,
        image=image,
        mask_image=mask,
        guidance_scale=10.0,
        strength=0.8,
        num_inference_steps=30,
        generator=generator,
    ).images[0]


def main(image_url, prompt, negative_prompt):
    original_image = load_image(image_url)
    image_without_bg = remove_background(original_image)
    resized_img, white_bg_image = scale_and_paste(image_without_bg)

    zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
    image_zoe = zoe(white_bg_image, detect_resolution=512, image_resolution=1024)

    controlnets = [
        ControlNetModel.from_pretrained(
            "destitech/controlnet-inpaint-dreamer-sdxl", torch_dtype=torch.float16, variant="fp16"
        ),
        ControlNetModel.from_pretrained("diffusers/controlnet-zoe-depth-sdxl-1.0", torch_dtype=torch.float16),
    ]

    vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to("cuda")

    pipeline = StableDiffusionXLControlNetPipeline.from_pretrained(
        "SG161222/RealVisXL_V4.0", torch_dtype=torch.float16, variant="fp16", controlnet=controlnets, vae=vae
    ).to("cuda")

    temp_image = generate_image(pipeline, prompt, negative_prompt, white_bg_image, image_zoe)

    x = (1024 - resized_img.width) // 2
    y = (1024 - resized_img.height) // 2
    temp_image.paste(resized_img, (x, y), resized_img)

    mask = Image.new("L", temp_image.size)
    mask.paste(resized_img.split()[3], (x, y))
    mask = ImageOps.invert(mask)
    final_mask = mask.point(lambda p: p > 128 and 255)

    pipeline = None
    torch.cuda.empty_cache()

    inpaint_pipeline = StableDiffusionXLInpaintPipeline.from_pretrained(
        "OzzyGT/RealVisXL_V4.0_inpainting",
        torch_dtype=torch.float16,
        variant="fp16",
        vae=vae,
    ).to("cuda")

    mask_blurred = inpaint_pipeline.mask_processor.blur(final_mask, blur_factor=20)

    final_image = generate_outpaint(inpaint_pipeline, prompt, negative_prompt, temp_image, mask_blurred)

    final_image.paste(resized_img, (x, y), resized_img)
    final_image.save("result.png")




  deprecate("Transformer2DModelOutput", "1.0.0", deprecation_message)


In [3]:
image_url = "https://papik.pro/uploads/posts/2021-11/1636135216_25-papik-pro-p-gazprombank-logotip-foto-29.png"
product = "logo"
place = "Flower field"
prompt = f"high quality photo of {product} in the {place}, shadows, highly detailed"
negative_prompt = ""
main(image_url, prompt, negative_prompt)

Downloading data from 'https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx' to file '/root/.u2net/u2net.onnx'.
100%|███████████████████████████████████████| 176M/176M [00:00<00:00, 63.3GB/s]
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


ZoeD_M12_N.pt:   0%|          | 0.00/1.44G [00:00<?, ?B/s]

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


config.json:   0%|          | 0.00/1.23k [00:00<?, ?B/s]

diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/2.50G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.28k [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/2.50G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/631 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

model_index.json:   0%|          | 0.00/577 [00:00<?, ?B/s]

Fetching 16 files:   0%|          | 0/16 [00:00<?, ?it/s]

text_encoder_2/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/474 [00:00<?, ?B/s]

text_encoder/config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/737 [00:00<?, ?B/s]

model.fp16.safetensors:   0%|          | 0.00/1.39G [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

tokenizer_2/tokenizer_config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

unet/config.json:   0%|          | 0.00/1.68k [00:00<?, ?B/s]

tokenizer_2/special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

model.fp16.safetensors:   0%|          | 0.00/246M [00:00<?, ?B/s]

diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/5.14G [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  return F.conv2d(input, weight, bias, self.stride,


model_index.json:   0%|          | 0.00/721 [00:00<?, ?B/s]

Fetching 16 files:   0%|          | 0/16 [00:00<?, ?it/s]

text_encoder/config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

text_encoder_2/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/563 [00:00<?, ?B/s]

model.fp16.safetensors:   0%|          | 0.00/246M [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/588 [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/705 [00:00<?, ?B/s]

tokenizer_2/special_tokens_map.json:   0%|          | 0.00/462 [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

model.fp16.safetensors:   0%|          | 0.00/1.39G [00:00<?, ?B/s]

tokenizer_2/tokenizer_config.json:   0%|          | 0.00/856 [00:00<?, ?B/s]

diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/5.14G [00:00<?, ?B/s]

unet/config.json:   0%|          | 0.00/1.78k [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/24 [00:00<?, ?it/s]