In [40]:
import os

import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from PIL import Image

import torch
from diffusers.pipelines.controlnet import StableDiffusionControlNetInpaintPipeline
from diffusers import ControlNetModel, UniPCMultistepScheduler
from diffusers import StableDiffusionXLPipeline
from transformers import AutoImageProcessor, UperNetForSemanticSegmentation, AutoModelForDepthEstimation
from colors import ade_palette
from utils import map_colors_rgb

In [41]:
device = "cuda"
dtype = torch.float16

In [42]:
def exclude_items(color_set, item_set, exclude_set):
    valid_colors = []
    valid_items = []
    for color, item in zip(color_set, item_set):
        if item not in exclude_set:
            valid_colors.append(color)
            valid_items.append(item)
    return valid_colors, valid_items

In [43]:
def setup_segmentation_pipeline():
    processor = AutoImageProcessor.from_pretrained("models/openmmlab--upernet-convnext-small")
    segmentor = UperNetForSemanticSegmentation.from_pretrained("models/openmmlab--upernet-convnext-small")
    return processor, segmentor

In [44]:
@torch.inference_mode()
def generate_segmentation(image, processor, segmentor):
    processed_input = processor(image, return_tensors="pt").pixel_values
    with torch.no_grad():
        results = segmentor(processed_input)

    segmentation = processor.post_process_semantic_segmentation(
        results, target_sizes=[image.size[::-1]]
    )[0]
    color_map = np.zeros((segmentation.shape[0], segmentation.shape[1], 3), dtype=np.uint8)
    color_palette = np.array(ade_palette())
    for label, color in enumerate(color_palette):
        color_map[segmentation == label, :] = color
    return Image.fromarray(color_map.astype(np.uint8)).convert('RGB')

In [45]:
def setup_depth_pipeline():
    extractor = AutoImageProcessor.from_pretrained("models/models--LiheYoung--depth-anything-large-hf", torch_dtype=torch.float16)
    estimator = AutoModelForDepthEstimation.from_pretrained("models/models--LiheYoung--depth-anything-large-hf", torch_dtype=torch.float16)
    return extractor, estimator

In [46]:
@torch.inference_mode()
def compute_depth(image, extractor, estimator):
    input_data = extractor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        depth_output = estimator(**input_data).predicted_depth

    width, height = image.size
    depth_output = torch.nn.functional.interpolate(
        depth_output.unsqueeze(1).float(),
        size=(height, width),
        mode="bicubic",
        align_corners=False,
    )
    depth_min = torch.amin(depth_output, dim=[1, 2, 3], keepdim=True)
    depth_max = torch.amax(depth_output, dim=[1, 2, 3], keepdim=True)
    normalized_depth = (depth_output - depth_min) / (depth_max - depth_min)
    depth_image = Image.fromarray((normalized_depth[0][0].cpu().numpy() * 255).astype(np.uint8))
    return depth_image

In [47]:
def resize_to_target(dim, max_size):
    w, h = dim
    if max(w, h) <= max_size:
        return dim
    ratio = h / w if w > h else w / h
    return (max_size, int(max_size * ratio)) if w > h else (int(max_size * ratio), max_size)

In [48]:
class ControlNetMultiPipeline:
    def __init__(self):
        os.environ['HF_HUB_OFFLINE'] = "True"

        depth_model = ControlNetModel.from_pretrained("models/controlnet_depth", torch_dtype=torch.float16, use_safetensors=True)
        segment_model = ControlNetModel.from_pretrained("models/own_controlnet", torch_dtype=torch.float16, use_safetensors=True)

        self.pipeline = StableDiffusionControlNetInpaintPipeline.from_pretrained(
            "models/Realistic_Vision_V5.1_noVAE",
            controlnet=[depth_model, segment_model],
            safety_checker=None,
            torch_dtype=torch.float16
        )
        self.pipeline.load_ip_adapter("models/models--h94--IP-Adapter", subfolder="models", weight_name="ip-adapter_sd15.bin")
        self.pipeline.set_ip_adapter_scale(0.4)
        self.pipeline.scheduler = UniPCMultistepScheduler.from_config(self.pipeline.scheduler.config)
        self.pipeline = self.pipeline.to(device)

        self.guide_pipeline = StableDiffusionXLPipeline.from_pretrained(
            "models/models--segmind--SSD-1B", torch_dtype=dtype, use_safetensors=True, variant="fp16"
        ).to(device)

        self.seed = 323 * 111
        self.negative_prompt = "window, door, low resolution, banner, logo, watermark, text, deformed, blurry, out of focus, surreal, ugly, beginner"
        self.excluded_items = ["windowpane;window", "door;double;door"]
        self.quality_suffix = "interior design, 4K, high resolution, photorealistic"
        self.random_gen = torch.Generator(device=device).manual_seed(self.seed)

        self.seg_processor, self.seg_model = setup_segmentation_pipeline()
        self.depth_extractor, self.depth_model = setup_depth_pipeline()
        self.depth_model = self.depth_model.to(device)

    def generate_design(self, room_image, text_prompt):
        print(text_prompt)
        prompt = f"{text_prompt}, {self.quality_suffix}"
        original_width, original_height = room_image.size
        resized_width, resized_height = resize_to_target(room_image.size, 768)
        resized_image = room_image.resize((resized_width, resized_height))

        segmentation_map = np.array(generate_segmentation(resized_image, self.seg_processor, self.seg_model))
        unique_colors = [tuple(color) for color in np.unique(segmentation_map.reshape(-1, 3), axis=0)]
        color_list, item_list = exclude_items(unique_colors, [map_colors_rgb(c) for c in unique_colors], self.excluded_items)

        mask = np.zeros_like(segmentation_map)
        for color in color_list:
            mask[(segmentation_map == color).all(axis=2)] = 1

        mask_image = Image.fromarray((mask * 255).astype(np.uint8)).convert("RGB")
        depth_image = compute_depth(room_image, self.depth_extractor, self.depth_model)

        ip_adapter_image = self.guide_pipeline(
            prompt,
            negative_prompt=self.negative_prompt,
            height=resized_height // 8 * 8,
            width=resized_width // 8 * 8,
            generator=self.random_gen
        ).images[0]

        generated_image = self.pipeline(
            prompt=prompt,
            negative_prompt=self.negative_prompt,
            num_inference_steps=50,
            strength=0.9,
            guidance_scale=10,
            generator=self.random_gen,
            image=room_image,
            mask_image=mask_image,
            ip_adapter_image=ip_adapter_image,
            control_image=[depth_image, Image.fromarray(segmentation_map).convert("RGB")],
            controlnet_conditioning_scale=[0.5, 0.5]
        ).images[0]

        return generated_image.resize((original_width, original_height), Image.Resampling.LANCZOS)

In [49]:
UserModel = ControlNetMultiPipeline

In [50]:
def process_single_image(image_path, description, pipeline_model):
    input_image = Image.open(image_path)
    result_image = pipeline_model.generate_design(input_image, description)
    result_image = result_image.convert('RGB')
    assert input_image.size == result_image.size, "Output image dimensions must match the input image dimensions"
    return result_image

In [51]:
def execute_pipeline(prompts_filepath, dataset_dir, output_dir):
    pipeline_model = UserModel()
    prompts_df = pd.read_csv(prompts_filepath, sep='\t')

    for _, entry in tqdm(prompts_df.iterrows(), total=len(prompts_df)):
        image_file = os.path.join(dataset_dir, entry["image"])
        output_image = process_single_image(image_file, entry["prompt"], pipeline_model)
        output_image.save(os.path.join(output_dir, entry["image"]))

    print("Completed!")

In [53]:
PROMPTS_FILE = 'demo_dataset_prompts.tsv'
DATASET_DIRECTORY = 'demo_dataset/'
OUTPUT_DIRECTORY = 'outputs_submit_multi_check2/'

In [54]:
os.makedirs(OUTPUT_DIRECTORY, exist_ok=True)

In [55]:
execute_pipeline(PROMPTS_FILE, DATASET_DIRECTORY, OUTPUT_DIRECTORY)

The config attributes {'dropout': 0.0, 'sample_size': 32} were passed to ControlNetModel, but are not expected and will be ignored. Please verify your config.json configuration file.


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

A Bauhaus-inspired living room with a sleek black leather sofa, a tubular steel coffee table exemplifying modernist design, and a geometric patterned rug adding a touch of artistic flair.
(1344, 896) (768, 512)


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/45 [00:00<?, ?it/s]

A glamorous master bedroom in Hollywood Regency style, boasting a plush tufted headboard, mirrored furniture reflecting elegance, luxurious fabrics in rich textures, and opulent gold accents for a touch of luxury.
(1344, 896) (768, 512)


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/45 [00:00<?, ?it/s]

A vibrant living room with a tropical theme, complete with comfortable rattan furniture, large leafy plants bringing the outdoors in, bright cushions adding pops of color, and bamboo blinds for natural light control.
(1344, 896) (768, 512)


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/45 [00:00<?, ?it/s]

Completed!
