In [None]:
!nvidia-smi

In [None]:
!pip install -r "./requirements.txt"

In [None]:
import torch
import numpy as np
import cv2

from torchvision import transforms
from huggingface_hub import login

from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, UniPCMultistepScheduler
from PIL import Image
from safetensors import safe_open

In [None]:
login()

In [None]:
if torch.cuda.is_available():
    device_name = torch.device("cuda")
    torch_dtype = torch.float16
    print("Using CUDA")
else:
    device_name = torch.device("cpu")
    torch_dtype = torch.float32
    print("Using CPU")

In [None]:
class LocalModel:
    def __init__(self, local_path:str, base_model:str, controlnet_seg_model:str, controlnet_dep_model:str, controlnet_edg_model:str) -> None:
        self.local_path = local_path
        self.base_model = base_model
        self.controlnet_seg_model = controlnet_seg_model
        self.controlnet_dep_model = controlnet_dep_model
        self.controlnet_edg_model = controlnet_edg_model

In [None]:
base_path = "./safetensors/{}"

model_1 = LocalModel(
  local_path=base_path.format("A bird's-eye view of architecture.safetensors"),
  base_model="runwayml/stable-diffusion-v1-5",
  controlnet_seg_model="lllyasviel/control_v11p_sd15_seg",
  controlnet_dep_model="lllyasviel/control_v11f1p_sd15_depth",
  controlnet_edg_model="lllyasviel/control_v11p_sd15_canny"
)
model_2 = LocalModel(
  local_path=base_path.format("AARG_aerial-000018.safetensors"),
  base_model="runwayml/stable-diffusion-v1-5",
  controlnet_seg_model="lllyasviel/control_v11p_sd15_seg",
  controlnet_dep_model="lllyasviel/control_v11f1p_sd15_depth",
  controlnet_edg_model="lllyasviel/control_v11p_sd15_canny"
)
model_3 = LocalModel(
  local_path=base_path.format("aerial view-V2.safetensors"),
  base_model="runwayml/stable-diffusion-v1-5",
  controlnet_seg_model="lllyasviel/control_v11p_sd15_seg",
  controlnet_dep_model="lllyasviel/control_v11f1p_sd15_depth",
  controlnet_edg_model="lllyasviel/control_v11p_sd15_canny"
)
model_4 = LocalModel(
  local_path=base_path.format("FLUXD-Style-Urban_Jungles-urjungle.safetensors"),
  base_model="black-forest-labs/FLUX.1-dev",
  controlnet_seg_model="lllyasviel/control_v11p_sd15_seg",
  controlnet_dep_model="lllyasviel/control_v11f1p_sd15_depth",
  controlnet_edg_model="lllyasviel/control_v11p_sd15_canny"
)

In [None]:
# https://docs.google.com/spreadsheets/d/1se8YEtb2detS7OuPE86fXGyD269pMycAWe2mtKUj2W8/edit?gid=0#gid=0
# ADE20K Class -> Roads -> #8C8C8C
# ADE20K Class -> Buildings -> #B47878
# ADE20K Class -> Grass -> #04FA07
# ADE20K Class -> Water -> #3DE6FA
# ADE20K Class -> Sidewalk -> #EBFF07
# ADE20K Class -> Sky -> #06E6E6


# Local
model = model_3

controlnet_depth = ControlNetModel.from_pretrained(
    model.controlnet_dep_model, torch_dtype=torch_dtype
).to(device_name)
controlnet_seg = ControlNetModel.from_pretrained(
    model.controlnet_seg_model, torch_dtype=torch_dtype
).to(device_name)
controlnet_edge = ControlNetModel.from_pretrained(
    model.controlnet_edg_model, torch_dtype=torch_dtype
).to(device_name)

pipe = StableDiffusionControlNetPipeline.from_pretrained(
    model.base_model,
    controlnet=[controlnet_depth, controlnet_seg, controlnet_edge],
    torch_dtype=torch_dtype
).to(device_name)
pipe.load_lora_weights(model.local_path)
pipe.fuse_lora()

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

# HF
# controlnet_depth = ControlNetModel.from_pretrained(
#     "lllyasviel/control_v11f1p_sd15_depth", torch_dtype=torch_dtype
# ).to(device_name)
# controlnet_seg = ControlNetModel.from_pretrained(
#     "lllyasviel/control_v11p_sd15_seg", torch_dtype=torch_dtype
# ).to(device_name)

# pipe = StableDiffusionControlNetPipeline.from_pretrained(
#     "runwayml/stable-diffusion-v1-5", controlnet=[controlnet_depth, controlnet_seg], torch_dtype=torch_dtype,
#     use_auth_token=True
# ).to(device_name)

# from diffusers import StableDiffusion3Pipeline

# pipe = StableDiffusion3Pipeline.from_pretrained(
#     "stabilityai/stable-diffusion-3.5-large-turbo", torch_dtype=torch_dtype
# ).to(device_name)

In [None]:
image_ade20k = "./images/mapa_ADE20K_FIX.png"
image_depth = "./images/mapa_DEPTH_FIX.png"
image_real = "./images/mapa_REALISTIC.png"
image_edge = "./images/mapa_EDGE.png"

In [None]:
def load_image(image_path, size=(512, 512)):
    image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB
    image = cv2.resize(image, size)
    image = Image.fromarray(image)
    return image

In [None]:
base_image = load_image(image_real)
depth_map = load_image(image_depth)
seg_map = load_image(image_ade20k)
edge_map = load_image(image_edge)

# Convert images to tensors
transform = transforms.ToTensor()
depth_tensor = transform(depth_map).unsqueeze(0)
seg_tensor = transform(seg_map).unsqueeze(0)
edge_tensor = transform(edge_map).unsqueeze(0)

In [None]:
prompt = "A realistic aerial view from a city"
negative_prompt = "blurry, low resolution, unrealistic, distorted"
num_images = 1

# CONTROLNET

In [None]:
output = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt,
    image=[depth_tensor, seg_tensor, edge_tensor],
    num_inference_steps=25,
    strength=0.00001,
    guidance_scale=0.00001,
    controlnet_conditioning_scale=[1.0, 1.0, 1.0],
)

In [None]:
output.images[0]

# IMG 2 IMG

In [None]:
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", torch_dtype=torch_dtype
).to("cuda")

In [None]:
output = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt,
    image=base_image,
    strength=0.2,  # Controls how much the image should change (0 = minor edits, 1 = full generation)
    guidance_scale=1,  # Higher values enforce the prompt more
    num_inference_steps=50,
)

In [None]:
output.images[0]