In [1]:
import torch
from diffusers import (
    StableDiffusionControlNetPipeline,
    ControlNetModel,
    UniPCMultistepScheduler
    )
from PIL import Image
import cv2
import numpy as np

# -----------------------------
# 1. 加载 ControlNet 模型
# -----------------------------
controlnet = ControlNetModel.from_pretrained(
    "lllyasviel/sd-controlnet-canny",
    torch_dtype=torch.float16
)

# -----------------------------
# 2. 加载 Stable Diffusion v1.5 Pipeline
# -----------------------------
pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    controlnet=controlnet,
    torch_dtype=torch.float16
)

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_attention_slicing()
pipe.to("cuda")  # GPU

# [ADDED] Generator for reproducibility
generator = torch.Generator(device="cuda").manual_seed(42)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/920 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/1.45G [00:00<?, ?B/s]

model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

safety_checker/model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

text_encoder/model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

unet/diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

vae/diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

`torch_dtype` is deprecated! Use `dtype` instead!


In [12]:
# -----------------------------
# 3. 读取空房图片 / Read empty-room images
# -----------------------------
#统一为 RGB, 统一resize, 不然容易out of memory / Convert to RGB for consistent processing (PIL/Diffusers)
def load_and_resize(path, max_side=768):
    img = Image.open(path).convert("RGB")
    w, h = img.size
    scale = max_side / max(w, h)
    if scale < 1.0:
        img = img.resize((int(w*scale), int(h*scale)), Image.LANCZOS)
    return img

# Paths for 4 images
room_paths = [
    "/content/emptyroom1.jpeg",
    "/content/emptyroom2.jpeg",
    "/content/emptyroom3.jpeg",
    "/content/emptyroom4.jpeg"
]


In [15]:
# -----------------------------
# 4. 定义装修风格 Prompt
# -----------------------------
# [MODIFIED] Batch processing logic for edge detection + generation
# Prompt
prompt = (
    "Modern-style bedroom interior with plant, mirror, headboard, night light, "
    "bedside table, storage bench, accent chair, dresser closet, "
    "neutral walls and textures, natural materials and elements, simple clean lines, "
    "simplistic furniture, open natural lighting, practical and functional design; "
    "cinematic photo, highly detailed, cinematic lighting, ultra-detailed, ultrarealistic, "
    "photorealism, 8k, modern interior design style"
)

# -----------------------------
# 5. 生成效果图/ Generate image
# -----------------------------
# Process each image one by one
for idx, current_room_path in enumerate(room_paths):
    room_img_pil = load_and_resize(current_room_path, max_side=768)
    width, height = room_img_pil.size

    # RGB to BGR
    input_image_rgb = np.array(room_img_pil)
    input_image = cv2.cvtColor(input_image_rgb, cv2.COLOR_RGB2BGR)

    # Canny Edge Detection
    canny_image = cv2.Canny(input_image, 100, 200)
    canny_image = cv2.cvtColor(canny_image, cv2.COLOR_GRAY2RGB)
    canny_pil = Image.fromarray(canny_image)
    canny_pil = canny_pil.resize((width, height), Image.NEAREST)

    # Run pipeline
    output = pipe(
        prompt=prompt,
        image=canny_pil,
        num_inference_steps=20,
        guidance_scale=5.0,
        height=height, width=width,
        num_images_per_prompt=1,
        generator=generator
    )

    result_image = output.images[0]
    result_image.save(f"/content/renovated_room_{idx+1}.png")
    print(f"[✓] Saved: /content/renovated_room_{idx+1}.png")




print("Furnitured room image generated!")

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['k , modern interior design style']


  0%|          | 0/20 [00:00<?, ?it/s]

[✓] Saved: /content/renovated_room_1.png


The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['k , modern interior design style']


  0%|          | 0/20 [00:00<?, ?it/s]

[✓] Saved: /content/renovated_room_2.png


The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['k , modern interior design style']


  0%|          | 0/20 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['k , modern interior design style']


[✓] Saved: /content/renovated_room_3.png


  0%|          | 0/20 [00:00<?, ?it/s]

[✓] Saved: /content/renovated_room_4.png
Furnitured room image generated!
