In [None]:
import os

SAVE_DIR = r"sd-1.5-3"
os.makedirs(SAVE_DIR, exist_ok=True)


In [3]:
# !pip3 install -U xformers --index-url https://download.pytorch.org/whl/cu128
# !pip install --upgrade diffusers transformers accelerate
# !pip install opencv-python
# !pip install --upgrade xformers
# !pip install --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128

In [4]:
import torch
from diffusers import (
    StableDiffusionControlNetPipeline,
    ControlNetModel,
    StableDiffusionInpaintPipeline
)
from PIL import Image, ImageDraw
import numpy as np
import cv2
import os

# ====================
# Setup
# ====================
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

In [5]:
# import huggingface_hub
# huggingface_hub.login()

In [6]:
controlnet = ControlNetModel.from_pretrained(
    "lllyasviel/sd-controlnet-canny",
    torch_dtype=torch_dtype
)

text2img_pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    controlnet=controlnet,
    torch_dtype=torch_dtype
).to(device)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [7]:
inpaint_pipe = StableDiffusionInpaintPipeline.from_pretrained(
    "runwayml/stable-diffusion-inpainting",
    torch_dtype=torch_dtype
).to(device)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

An error occurred while trying to fetch C:\Users\Abhi\.cache\huggingface\hub\models--runwayml--stable-diffusion-inpainting\snapshots\8a4288a76071f7280aedbdb3253bdb9e9d5d84bb\unet: Error no file named diffusion_pytorch_model.safetensors found in directory C:\Users\Abhi\.cache\huggingface\hub\models--runwayml--stable-diffusion-inpainting\snapshots\8a4288a76071f7280aedbdb3253bdb9e9d5d84bb\unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch C:\Users\Abhi\.cache\huggingface\hub\models--runwayml--stable-diffusion-inpainting\snapshots\8a4288a76071f7280aedbdb3253bdb9e9d5d84bb\vae: Error no file named diffusion_pytorch_model.safetensors found in directory C:\Users\Abhi\.cache\huggingface\hub\models--runwayml--stable-diffusion-inpainting\snapshots\8a4288a76071f7280aedbdb3253bdb9e9d5d84bb\vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.


In [8]:
# Enable xFormers & Offload for better memory usage
text2img_pipe.enable_xformers_memory_efficient_attention()
inpaint_pipe.enable_xformers_memory_efficient_attention()
text2img_pipe.enable_model_cpu_offload()
inpaint_pipe.enable_model_cpu_offload()

In [9]:
import requests
import json
import re

def safe_json_extract(text):
    """Extract JSON object from raw text using regex."""
    match = re.search(r"\{[\s\S]*\}", text)
    if match:
        try:
            return json.loads(match.group())
        except json.JSONDecodeError:
            print("⚠️ Could not parse extracted JSON.")
    return None

def decompose_prompt_with_llama(prompt: str):
    system_instruction = (
        "Given the scene description below, break it into three parts: "
        "background, midground, and foreground. Respond in JSON format."
    )
    full_prompt = f"{system_instruction}\n\nScene: \"{prompt}\"\n\nRespond in JSON format."

    payload = {
        "model": "llama3.1:8b",
        "prompt": full_prompt,
        "stream": False
    }

    try:
        response = requests.post("http://localhost:11434/api/generate", json=payload)
        if response.status_code == 200:
            raw = response.json().get("response", "").strip()
            result = safe_json_extract(raw)
            if result:
                return result
            else:
                print("⚠️ Model output was not valid JSON:\n", raw)
        else:
            print("❌ Ollama error:", response.text)
    except Exception as e:
        print("❌ LLaMA prompt parsing failed:", e)

    # Heuristic fallback
    phrases = [p.strip() for p in prompt.split(",") if p.strip()]
    return {
        "background": phrases[0] if len(phrases) > 0 else "",
        "midground": phrases[1] if len(phrases) > 1 else "",
        "foreground": phrases[2] if len(phrases) > 2 else ""
    }


In [10]:
import numpy as np
import cv2
from PIL import Image, ImageDraw
W, H = 512, 512 
# Draws a circular binary mask
def circular_mask(center, radius, size=(W, H)):
    mask = Image.new("L", size, 0)
    draw = ImageDraw.Draw(mask)
    x, y = center
    draw.ellipse((x - radius, y - radius, x + radius, y + radius), fill=255)
    return mask

# Draws debug overlay with mask boundaries
def draw_debug_layout(image, layout):
    draw = ImageDraw.Draw(image)
    for name, cfg in layout.items():
        x, y = cfg["center"]
        r = cfg["radius"]
        draw.ellipse((x - r, y - r, x + r, y + r), outline="red", width=2)
        draw.text((x - r, y - r - 10), name, fill="red")
    image.save(f"{SAVE_DIR}/layout_debug.png")

# Generates a background using text-to-image guided by Canny
def generate_background(prompt: str):
    blank = Image.new("RGB", (W, H), (255, 255, 255))
    canny = canny_from_image(blank)
    result = text2img_pipe(
        prompt=prompt,
        image=canny,
        num_inference_steps=20,
        guidance_scale=7.5
    )
    return result.images[0]

# Performs inpainting over a masked region
def inpaint_layer(prompt: str, base_image: Image.Image, mask: Image.Image):
    base_image = base_image.resize((W, H))
    mask = mask.resize((W, H)).convert("L")
    result = inpaint_pipe(
        prompt=prompt,
        image=base_image,
        mask_image=mask,
        num_inference_steps=20,
        guidance_scale=8.0
    )
    return result.images[0]

# Converts image to canny edge RGB image
def canny_from_image(image: Image.Image, low=100, high=200):
    image = image.resize((W, H))
    image_np = np.array(image.convert("RGB"))
    edges = cv2.Canny(image_np, low, high)
    edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
    return Image.fromarray(edges_rgb)


In [11]:
def rectangular_mask(top_left, bottom_right, size=(512, 512)):
    mask = Image.new("L", size, 0)
    draw = ImageDraw.Draw(mask)
    draw.rectangle([top_left, bottom_right], fill=255)
    return mask


In [12]:
def layered_generation(prompt):
    concepts = decompose_prompt_with_llama(prompt)

    # Ensure concepts are strings
    for k in ["background", "midground", "foreground"]:
        val = concepts.get(k, "")
        if not isinstance(val, str):
            concepts[k] = str(val)
    print(f"Background prompt: {concepts.get('background')}")
    print(f"Midground prompt: {concepts.get('midground')}")
    print(f"Foreground prompt: {concepts.get('foreground')}")

    # Layout with rectangular regions
    # Assuming fixed size, or use background.size if variable

    layout = {
        "midground": {
            "top_left": (int(W * 0.1), int(H * 0.1)),   # 25% width, ~39% height
            "bottom_right": (int(W * 0.9), int(H * 0.9))  # 75% width, ~62% height
        },
        "foreground": {
            "top_left": (int(W * 0.25), int(H * 0.25)),   # 31% width, ~70% height
            "bottom_right": (int(W * 0.75), int(H * 0.75))  # 69% width, ~90% height
        }
    }


    # Visualize mask layout
    dbg = Image.new("RGB", (W, H), (255, 255, 255))
    draw = ImageDraw.Draw(dbg)
    for name, box in layout.items():
        draw.rectangle([box["top_left"], box["bottom_right"]], outline="red", width=2)
        draw.text((box["top_left"][0], box["top_left"][1] - 10), name, fill="red")
    dbg.save(f"{SAVE_DIR}/debug_mask_layout.png")

    # Background
    background = generate_background(concepts["background"])
    background.save(f"{SAVE_DIR}/layer1_background.png")

    # Midground
    mid_mask = rectangular_mask(**layout["midground"])
    midground = inpaint_layer(concepts["midground"], background, mid_mask)
    midground.save(f"{SAVE_DIR}/layer2_midground.png")

    # Foreground
    fg_mask = rectangular_mask(**layout["foreground"])
    final_image = inpaint_layer(concepts["foreground"], midground, fg_mask)
    final_image.save(f"{SAVE_DIR}/layer3_final.png")

    print(f"✅ All layers saved to {SAVE_DIR}")


In [14]:
prompt = "A serene forest clearing at sunrise, with a wooden cabin nestled among the trees and a woman in a red cloak standing in front holding a lantern."
layered_generation(prompt)

Background prompt: [{'object': 'forest', 'description': 'tall, dense trees with leaves of various colors'}, {'object': 'sunrise', 'description': 'warm light illuminating the forest from behind'}]
Midground prompt: [{'object': 'cabin', 'description': 'small, wooden structure nestled among the trees'}]
Foreground prompt: [{'object': 'woman', 'description': 'standing in front of the cabin wearing a red cloak and holding a lantern'}, {'object': 'lantern', 'description': 'glowing softly in her hand'}]


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

✅ All layers saved to sd-1.5-2
