In [1]:
import torch
from diffusers import (
    StableDiffusionXLControlNetPipeline,
    ControlNetModel,
    StableDiffusionXLInpaintPipeline,
    AutoencoderKL
)
from diffusers.utils import load_image, make_image_grid
import cv2
import numpy as np
from PIL import Image
import os

device = "cuda"
torch_dtype = torch.float16

In [None]:
controlnet = ControlNetModel.from_pretrained(
    "diffusers/controlnet-canny-sdxl-1.0",
    torch_dtype=torch_dtype
)

text2img_pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    torch_dtype=torch_dtype
).to(device)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [None]:
inpaint_pipe = StableDiffusionXLInpaintPipeline.from_pretrained(
    "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
    torch_dtype=torch.float16
).to(device)

inpaint_pipe.enable_xformers_memory_efficient_attention()
inpaint_pipe.enable_model_cpu_offload()

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

The config attributes {'decay': 0.9999, 'inv_gamma': 1.0, 'min_decay': 0.0, 'optimization_step': 37000, 'power': 0.6666666666666666, 'update_after_step': 0, 'use_ema_warmup': False} were passed to UNet2DConditionModel, but are not expected and will be ignored. Please verify your config.json configuration file.


In [None]:
from PIL import Image, ImageDraw
def canny_from_image(image: Image.Image, low=100, high=200):
    image = image.resize((1024, 1024))
    image_np = np.array(image.convert("RGB"))
    edges = cv2.Canny(image_np, low, high)
    edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
    return Image.fromarray(edges_rgb)

# === 5. Generate Initial Background Layer ===
def generate_background(prompt: str):
    blank = Image.new("RGB", (1024, 1024), (255, 255, 255))
    canny = canny_from_image(blank)  # Uniform background → edges are blank
    result = text2img_pipe(
        prompt=prompt,
        image=canny,
        num_inference_steps=30,
        guidance_scale=7.5
    ).images[0]
    return result

# === 6. Inpaint Over Existing Image ===
def inpaint_layer(prompt: str, base_image: Image.Image, mask: Image.Image):
    base_image = base_image.resize((1024, 1024))
    mask = mask.resize((1024, 1024)).convert("L")

    result = inpaint_pipe(
        prompt=prompt,
        image=base_image,
        mask_image=mask,
        num_inference_steps=30,
        guidance_scale=8.5
    ).images[0]

    return result

# === 7. Create Circular Mask ===
def circular_mask(center, radius, size=(1024, 1024)):
    mask = Image.new("L", size, 0)
    draw = ImageDraw.Draw(mask)
    x, y = center
    draw.ellipse((x - radius, y - radius, x + radius, y + radius), fill=255)
    return mask

In [None]:
# === 8. Pipeline: Blank → Background → Foreground ===
def layered_generation():
    # Layer 1: Background
    bg_prompt = "a peaceful mountain landscape with a lake, in golden hour lighting"
    background = generate_background(bg_prompt)
    background.save("layer1_background.png")

    # Layer 2: Midground Object (e.g., cabin)
    cabin_center = (512, 650)
    cabin_radius = 300
    cabin_mask = circular_mask(center=cabin_center, radius=cabin_radius)
    cabin_mask.save("debug_cabin_mask.png")
    cabin_prompt = (
        "a cozy wooden log cabin with a triangular roof, warm lights glowing from the windows, "
        "located in the center of the image, clearly visible, surrounded by trees, chimney with smoke, photorealistic"
    )
    midground = inpaint_layer(cabin_prompt, background, cabin_mask)
    midground.save("layer2_cabin.png")

    # Layer 3: Foreground Subject (e.g., person)
    # Automatically place person mask just below the cabin
    person_center = (cabin_center[0], cabin_center[1] + int(cabin_radius * 0.5))
    person_radius = int(cabin_radius * 0.6)
    person_mask = circular_mask(center=person_center, radius=person_radius)
    person_mask.save("debug_person_mask.png")
    person_prompt = (
        "a hiker wearing a red jacket, standing in front of the cabin near the lake, full body, clearly visible"
    )
    final_image = inpaint_layer(person_prompt, midground, person_mask)
    final_image.save("layer3_person.png")

    print("✅ Image generated in 3 layers and saved.")
# ...existing code...

In [None]:
layered_generation()

  0%|          | 0/29 [00:00<?, ?it/s]

✅ Image generated in 3 layers and saved.
