In [None]:
!nvidia-smi

In [None]:
!pip install -r "/content/requirements.txt"
!pip install --upgrade "torch<2.6.0" "xformers==0.0.28.post3" diffusers

In [None]:
import torch
import numpy as np
import cv2

from torchvision import transforms
from huggingface_hub import login

from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, UniPCMultistepScheduler
from PIL import Image
from safetensors import safe_open

In [None]:
login()

In [None]:
if torch.cuda.is_available():
    device_name = torch.device("cuda")
    torch_dtype = torch.float16
    print("Using CUDA")
else:
    device_name = torch.device("cpu")
    torch_dtype = torch.float32
    print("Using CPU")

In [None]:
class LocalModel:
    def __init__(self, local_path:str, base_model:str, controlnet_seg_model:str, controlnet_dep_model:str, controlnet_edg_model:str) -> None:
        self.local_path = local_path
        self.base_model = base_model
        self.controlnet_seg_model = controlnet_seg_model
        self.controlnet_dep_model = controlnet_dep_model
        self.controlnet_edg_model = controlnet_edg_model

In [None]:
base_path = "./safetensors/{}"
base_path = "/kaggle/input/safetensors/{}"

model_1 = LocalModel(
  local_path=base_path.format("A bird's-eye view of architecture.safetensors"),
  base_model="runwayml/stable-diffusion-v1-5",
  controlnet_seg_model="lllyasviel/control_v11p_sd15_seg",
  controlnet_dep_model="lllyasviel/control_v11f1p_sd15_depth",
  controlnet_edg_model="lllyasviel/control_v11p_sd15_canny"
)
model_2 = LocalModel(
  local_path=base_path.format("AARG_aerial-000018.safetensors"),
  base_model="runwayml/stable-diffusion-v1-5",
  controlnet_seg_model="lllyasviel/control_v11p_sd15_seg",
  controlnet_dep_model="lllyasviel/control_v11f1p_sd15_depth",
  controlnet_edg_model="lllyasviel/control_v11p_sd15_canny"
)
model_3 = LocalModel(
  local_path=base_path.format("aerial view-V2.safetensors"),
  base_model="runwayml/stable-diffusion-v1-5",
  controlnet_seg_model="lllyasviel/control_v11p_sd15_seg",
  controlnet_dep_model="lllyasviel/control_v11f1p_sd15_depth",
  controlnet_edg_model="lllyasviel/control_v11p_sd15_canny"
)
model_4 = LocalModel(
  local_path=base_path.format("FLUXD-Style-Urban_Jungles-urjungle.safetensors"),
  base_model="black-forest-labs/FLUX.1-dev",
  controlnet_seg_model="lllyasviel/control_v11p_sd15_seg",
  controlnet_dep_model="lllyasviel/control_v11f1p_sd15_depth",
  controlnet_edg_model="lllyasviel/control_v11p_sd15_canny"
)

In [None]:
# https://docs.google.com/spreadsheets/d/1se8YEtb2detS7OuPE86fXGyD269pMycAWe2mtKUj2W8/edit?gid=0#gid=0
# ADE20K Class -> Roads -> #8C8C8C
# ADE20K Class -> Buildings -> #B47878
# ADE20K Class -> Grass -> #04FA07
# ADE20K Class -> Water -> #3DE6FA
# ADE20K Class -> Sidewalk -> #EBFF07
# ADE20K Class -> Sky -> #06E6E6


# Local
model = model_3

controlnet_depth = ControlNetModel.from_pretrained(
    model.controlnet_dep_model, torch_dtype=torch_dtype
).to(device_name)
controlnet_seg = ControlNetModel.from_pretrained(
    model.controlnet_seg_model, torch_dtype=torch_dtype
).to(device_name)
controlnet_edge = ControlNetModel.from_pretrained(
    model.controlnet_edg_model, torch_dtype=torch_dtype
).to(device_name)

pipe = StableDiffusionControlNetPipeline.from_pretrained(
    model.base_model,
    controlnet=[controlnet_depth, controlnet_seg, controlnet_edge],
    torch_dtype=torch_dtype
).to(device_name)
pipe.load_lora_weights(model.local_path)
pipe.fuse_lora()

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

# HF
# controlnet_depth = ControlNetModel.from_pretrained(
#     "lllyasviel/control_v11f1p_sd15_depth", torch_dtype=torch_dtype
# ).to(device_name)
# controlnet_seg = ControlNetModel.from_pretrained(
#     "lllyasviel/control_v11p_sd15_seg", torch_dtype=torch_dtype
# ).to(device_name)

# pipe = StableDiffusionControlNetPipeline.from_pretrained(
#     "runwayml/stable-diffusion-v1-5", controlnet=[controlnet_depth, controlnet_seg], torch_dtype=torch_dtype,
#     use_auth_token=True
# ).to(device_name)

# from diffusers import StableDiffusion3Pipeline

# pipe = StableDiffusion3Pipeline.from_pretrained(
#     "stabilityai/stable-diffusion-3.5-large-turbo", torch_dtype=torch_dtype
# ).to(device_name)

In [None]:
import cv2
import torch
from PIL import Image
from torchvision import transforms

# File paths
image_paths = {
    "real": "./images/mapa_REALISTIC.png",
    "depth": "./images/mapa_DEPTH_FIX.png",
    "seg": "./images/mapa_ADE20K_FIX.png",
    "edge": "./images/mapa_EDGE.png",
}

# Image processing function
def load_image(image_path, size=(512, 512)):
    image = Image.open(image_path).convert("RGB")  # Use PIL directly (no need for OpenCV)
    image = image.resize(size, Image.LANCZOS)  # Higher-quality resize method
    return image

# Load images
images = {key: load_image(path) for key, path in image_paths.items()}

# Convert images to tensors
transform = transforms.ToTensor()
image_tensors = {key: transform(img).unsqueeze(0) for key, img in images.items()}

# Access individual tensors
tile_tensor = image_tensors["real"]
depth_tensor = image_tensors["depth"]
seg_tensor = image_tensors["seg"]
edge_tensor = image_tensors["edge"]

In [None]:
prompt = """Ultra-realistic aerial view of a modern city, captured from a high-resolution satellite or drone. Skyscrapers, residential areas, roads, and parks are detailed and proportional, with natural lighting and depth. No distortions, warped buildings, or AI artifacts. Clear atmosphere with soft haze near the horizon."""
negative_prompt = "Distorted buildings, warped roads, AI artifacts, unnatural lighting, low detail, fisheye effect."
num_images = 1

# CONTROLNET3 + SD3

In [None]:
from diffusers.models import SD3ControlNetModel, SD3MultiControlNetModel
from diffusers import StableDiffusion3ControlNetPipeline

# load pipeline
controlnet_canny = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Canny", torch_dtype=torch_dtype)
controlnet_depth = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Depth", torch_dtype=torch_dtype)
controlnet_tile = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Tile", torch_dtype=torch_dtype)

pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-3-medium-diffusers",
    controlnet=[controlnet_canny, controlnet_depth, controlnet_tile],
    torch_dtype=torch_dtype
)

# Move to CUDA first
pipe.to(device_name)

# Apply xformers optimization
pipe.enable_xformers_memory_efficient_attention()

# Compile U-Net for optimization (AFTER moving to CUDA)
pipe.unet = torch.compile(pipe.unet)

In [None]:
image = pipe(
    prompt,
    negative_prompt=negative_prompt,
    num_inference_steps=50,
    guidance_scale=7.5,
    control_image=[edge_tensor, depth_tensor, tile_tensor],
    controlnet_conditioning_scale=[1.0, 1.0, 1.0],
).images[0]

In [None]:
image

# CONTROLNET

In [None]:
output = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt,
    image=[depth_tensor, seg_tensor, edge_tensor],
    num_inference_steps=25,
    strength=0.00001,
    guidance_scale=0.00001,
    controlnet_conditioning_scale=[1.0, 1.0, 1.0],
)

In [None]:
output.images[0]

# IMG 2 IMG

In [None]:
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", torch_dtype=torch_dtype
).to("cuda")

In [None]:
output = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt,
    image=base_image,
    strength=0.2,  # Controls how much the image should change (0 = minor edits, 1 = full generation)
    guidance_scale=1,  # Higher values enforce the prompt more
    num_inference_steps=50,
)

In [None]:
output.images[0]

## TESTE SHAKKER

In [None]:
import torch
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, DPMSolverMultistepScheduler
from diffusers.models import AutoencoderKL
from PIL import Image
import numpy as np

# Load Base Model (Stable Diffusion 1.5 or Realistic Vision V2.0)
base_model = "SG161222/Realistic_Vision_V2.0"

# Load ControlNet Models for Depth, Edge, and ADE20K Segmentation
controlnet_depth = ControlNetModel.from_pretrained("lllyasviel/control_v11f1p_sd15_depth", torch_dtype=torch.float16)
controlnet_edge = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16)
controlnet_seg = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_seg", torch_dtype=torch.float16)  # ADE20K

# Combine ControlNets
controlnets = [controlnet_depth, controlnet_edge, controlnet_seg]
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)

# Load Pipeline with ControlNets
pipe = StableDiffusionControlNetPipeline.from_pretrained(
    base_model,
    controlnet=controlnets,
    vae=vae,
    torch_dtype=torch.float16
).to("cuda")

# Use DPM++ 2M Karras Sampler for better results
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

# Load Depth, Edge, and ADE20K Segmentation Maps
depth_map = Image.open("/kaggle/input/teste-diffusers/images/mapa_DEPTH_FIX.png").convert("RGB").resize((768, 576))
edge_map = Image.open("/kaggle/input/teste-diffusers/images/mapa_EDGE.png").convert("RGB").resize((768, 576))
ade20k_map = Image.open("/kaggle/input/teste-diffusers/images/mapa_ADE20K_FIX.png").convert("RGB").resize((768, 576))  # ADE20K

# Convert images to numpy arrays (normalized)
depth_array = np.array(depth_map) / 255.0
edge_array = np.array(edge_map) / 255.0
ade20k_array = np.array(ade20k_map) / 255.0

In [None]:
# Generate Image
prompt = """Ultra-realistic aerial view of a modern city, captured from a high-resolution satellite or drone. Skyscrapers, residential areas, roads, and parks are detailed and proportional, with natural lighting and depth. No distortions, warped buildings, or AI artifacts. Clear atmosphere with soft haze near the horizon."""
negative_prompt = "Distorted buildings, warped roads, AI artifacts, unnatural lighting, low detail, fisheye effect."

# Run Stable Diffusion with ControlNets
output = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt,
    image=[depth_map, edge_map, ade20k_map],
    num_inference_steps=50,
    guidance_scale=7.5,
    controlnet_conditioning_scale=[1.0, 0.9, 0.8],
)

# Save output
output.images[0]