In [None]:
import os
import re
import csv
import random
from pathlib import Path
from typing import Dict, List

import torch
from PIL import Image, ImageDraw
from diffusers import StableDiffusionInpaintPipeline


In [None]:
ROOM_ATTRIBUTES = {
    "lighting": [
        "change lighting to bright morning daylight coming from the window",
        "change lighting to warm evening indoor light with strong shadows",
        "change lighting to cool artificial night lighting"
    ],
    "rug": [
        "remove any rug from the floor",
        "add a large patterned rug covering the floor",
        "replace the rug with a neutral minimalist rug"
    ],
    "curtains": [
        "fully open the curtains letting light in",
        "close the curtains with thick fabric",
        "replace curtains with half-open blinds"
    ],
    "wall_decor": [
        "remove all wall art",
        "add framed pictures on the walls",
        "add minimal modern wall decorations"
    ],
    "plants": [
        "remove indoor plants",
        "add one large indoor plant",
        "add multiple small plants around the room"
    ],
    "clutter": [
        "make the room very tidy and minimal",
        "add everyday clutter like books and small objects",
        "slightly rearrange small objects on surfaces"
    ],
}


In [None]:
BASE_PROMPT = (
    "Photorealistic image of the SAME physical room as the input image. "
    "Keep the room geometry, walls, windows, and layout unchanged. "
    "Actively MODIFY decor, lighting, materials, and movable objects. "
    "You MUST introduce visible changes while preserving the same room."
)

NEGATIVE_PROMPT = (
    "different room, new layout, changed geometry, "
    "distorted walls, warped perspective, extra windows, low quality"
)


In [None]:
def random_inpaint_mask(w: int, h: int) -> Image.Image:
    mask = Image.new("L", (w, h), 0)
    draw = ImageDraw.Draw(mask)

    # large central region
    draw.rectangle(
        [int(w*0.1), int(h*0.1), int(w*0.9), int(h*0.9)],
        fill=255
    )

    # decor-heavy lower regions
    for _ in range(3):
        x0 = random.randint(0, w//2)
        y0 = random.randint(h//3, h-1)
        x1 = x0 + random.randint(w//4, w//2)
        y1 = y0 + random.randint(h//6, h//3)
        draw.rectangle([x0, y0, x1, y1], fill=255)

    return mask.resize((w//2, h//2), Image.BILINEAR).resize((w, h), Image.BILINEAR)


In [None]:
def generate_attributed_prompt(min_attrs=2, max_attrs=4) -> Dict:
    keys = random.sample(
        list(ROOM_ATTRIBUTES.keys()),
        random.randint(min_attrs, max_attrs)
    )

    actions = [random.choice(ROOM_ATTRIBUTES[k]) for k in keys]

    prompt = (
        BASE_PROMPT
        + " Apply the following changes: "
        + "; ".join(actions)
        + ". High realism, straight lines."
    )

    return {
        "prompt": prompt,
        "negative_prompt": NEGATIVE_PROMPT,
        "attributes": dict(zip(keys, actions))
    }


In [None]:
@torch.inference_mode()
def generate_room_variants(
    pipe,
    image: Image.Image,
    num_variants: int = 10,
    seed: int = 0,
    size: int = 512
) -> List[Dict]:

    image = image.convert("RGB").resize((size, size), Image.LANCZOS)
    w, h = image.size

    results = []
    generator = torch.Generator(device=pipe.device)

    for i in range(num_variants):
        meta = generate_attributed_prompt()
        mask = random_inpaint_mask(w, h)

        generator.manual_seed(seed + i)

        out = pipe(
            prompt=meta["prompt"],
            negative_prompt=meta["negative_prompt"],
            image=image,
            mask_image=mask,
            guidance_scale=7.5,
            num_inference_steps=35,
            strength=0.8,
            generator=generator,
        ).images[0]

        results.append({
            "image": out,
            "attributes": meta["attributes"],
            "seed": seed + i
        })

    return results


In [None]:
def build_dataset(
    input_dir: str,
    output_dir: str,
    num_variants: int = 10
):
    device = "cuda" if torch.cuda.is_available() else "cpu"

    pipe = StableDiffusionInpaintPipeline.from_pretrained(
        "runwayml/stable-diffusion-inpainting",
        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
        safety_checker=None
    ).to(device)

    pipe.enable_attention_slicing()

    input_dir = Path(input_dir)
    output_dir = Path(output_dir)
    real_dir = output_dir / "real"
    synth_dir = output_dir / "synthetic"

    real_dir.mkdir(parents=True, exist_ok=True)
    synth_dir.mkdir(parents=True, exist_ok=True)

    rows = []

    for img_path in sorted(input_dir.glob("room_*.jpeg")):
        room_id = int(re.search(r"room_(\d+)", img_path.stem).group(1))
        print(f"Processing room {room_id}")

        image = Image.open(img_path)
        real_path = real_dir / img_path.name
        image.save(real_path)

        rows.append({"path": str(real_path), "room_id": room_id, "is_synthetic": 0})

        variants = generate_room_variants(
            pipe=pipe,
            image=image,
            num_variants=num_variants,
            seed=room_id * 1000
        )

        room_out = synth_dir / f"room_{room_id}"
        room_out.mkdir(parents=True, exist_ok=True)

        for i, v in enumerate(variants):
            out_path = room_out / f"syn_{i:02d}.png"
            v["image"].save(out_path)
            rows.append({"path": str(out_path), "room_id": room_id, "is_synthetic": 1})

    with open(output_dir / "labels.csv", "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=["path", "room_id", "is_synthetic"])
        writer.writeheader()
        writer.writerows(rows)

    print("\nDataset generation complete.")


In [None]:
build_dataset(
    input_dir="/content/input_rooms",
    output_dir="/content/dataset",
    num_variants=10
)


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--runwayml--stable-diffusion-inpainting/snapshots/8a4288a76071f7280aedbdb3253bdb9e9d5d84bb/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--runwayml--stable-diffusion-inpainting/snapshots/8a4288a76071f7280aedbdb3253bdb9e9d5d84bb/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch /root/.cache/huggingface/hub/models--runwayml--stable-diffusion-inpainting/snapshots/8a4288a76071f7280aedbdb3253bdb9e9d5d84bb/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--runwayml--stable-diffusion-inpainting/snapshots/8a4288a76071f7280aedbdb3253bdb9e9d5d84bb/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
You have disabled the safety checker for <class 'diffusers.pipelin

Processing room 1


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['clutter like books and small objects . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['walls . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['; replace curtains with half - open blinds ; add minimal modern wall decorations . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['coming from the window . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['close the curtains with thick fabric . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['.']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['the floor ; fully open the curtains letting light in . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['high realism , straight lines .']


Processing room 10


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['with a neutral minimalist rug . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['with half - open blinds ; add one large indoor plant . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['curtains letting light in ; remove all wall art . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['the floor ; change lighting to warm evening indoor light with strong shadows . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['covering the floor ; add framed pictures on the walls . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['with thick fabric ; remove all wall art . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['artificial night lighting ; fully open the curtains letting light in . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['remove any rug from the floor ; remove all wall art . high realism , straight lines .']


Processing room 11


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['floor . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['; remove all wall art ; make the room very tidy and minimal . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['. high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['. high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['artificial night lighting ; remove indoor plants . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['with half - open blinds ; remove indoor plants . high realism , straight lines .']


  0%|          | 0/28 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
!zip -r dataset.zip dataset/


  adding: dataset/ (stored 0%)
  adding: dataset/real/ (stored 0%)
  adding: dataset/real/room_20.jpg (deflated 3%)
  adding: dataset/real/room_4.jpg (deflated 1%)
  adding: dataset/real/room_39.jpg (deflated 1%)
  adding: dataset/real/room_34.jpg (deflated 6%)
  adding: dataset/real/room_19.jpg (deflated 1%)
  adding: dataset/real/room_45.jpg (deflated 3%)
  adding: dataset/real/room_27.jpg (deflated 2%)
  adding: dataset/real/room_38.jpg (deflated 1%)
  adding: dataset/real/room_29.jpg (deflated 1%)
  adding: dataset/real/room_23.jpg (deflated 1%)
  adding: dataset/real/room_26.jpg (deflated 1%)
  adding: dataset/real/room_13.jpg (deflated 1%)
  adding: dataset/real/room_2.jpg (deflated 3%)
  adding: dataset/real/room_49.jpg (deflated 1%)
  adding: dataset/real/room_12.jpg (deflated 1%)
  adding: dataset/real/room_42.jpg (deflated 2%)
  adding: dataset/real/room_46.jpg (deflated 1%)
  adding: dataset/real/room_40.jpg (deflated 1%)
  adding: dataset/real/room_17.jpg (deflated 1%)
  ad