In [1]:
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from Depth_Anything_V2.depth_anything_v2.dpt import DepthAnythingV2

import torch
import time
from torchvision import transforms
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
import copy
import json
import numpy as np
import os
from tqdm.auto import tqdm
import random

In [2]:
# Функция для отрисовки графа на изображении
def draw_points(img, points, connections):
    circle_radius = 20
    draw = ImageDraw.Draw(img)
    font = ImageFont.truetype("arial.ttf", size=20)
    for i, j in connections:
        x1, y1, _ = points[i]
        x2, y2, _ = points[j]
        draw.line((x1, y1, x2, y2), fill="green", width=9)
    for x, y, label in points:
        draw.ellipse((x - circle_radius, y - circle_radius, x + circle_radius, y + circle_radius), outline="red", fill="white", width=4)
        bbox = font.getbbox(label)
        text_width = bbox[2] - bbox[0]
        text_height = bbox[3] - bbox[1]
        draw.text((x - text_width/2, y - text_height/2-4), label, fill='black', font=font)
    return img

# Функция для отрисовки графа из json
def draw_json(img, jd):
    img = copy.deepcopy(img)
    if type(img) is str:
        img = Image.open(img)
    if type(jd) is str:
        with open(jd, "r", encoding="utf-8") as f:
            jd = json.load(f)
    houses = jd['houses']
    for h in houses.keys():
        for f in houses[h].keys():
            points = [p+[a] for p, a in zip(houses[h][f][1], ['A','B', 'C','D'])]
            connections = [[0,1],[1,2],[2,3],[3,0]]
            img = draw_points(img, points, connections)
    return img

In [3]:
# Создание пайплайна StableDiffusion

controlnet = ControlNetModel.from_pretrained(
    "diffusers/controlnet-depth-sdxl-1.0",
    variant="fp16",
    use_safetensors=True,
    torch_dtype=torch.float16).to('cuda')
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to("cuda")

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    variant="fp16",
    use_safetensors=True,
    torch_dtype=torch.float16).to('cuda')
pipe.enable_model_cpu_offload()

In [4]:
# Инициация модели DepthAnything (карта глубины)

dmodel = DepthAnythingV2(encoder='vitl', features=256, out_channels=[256, 512, 1024, 1024]).cuda()
dmodel.load_state_dict(torch.load('depth_anything_v2_vitl.pth', map_location='cuda', weights_only=False))

In [5]:
# Скачивание порядка видов
city_path = ...
with open("sorted_views.json", "r", encoding="utf-8") as f:
    v_list = json.load(f)

In [6]:
pgi = ... # Путь для сохранения сгенерированных изображений
pgd = ... # Путь для сохранения сгенерированных карт глубины
pgv = ... # Путь для сохранения предварительных видов с графами

black = Image.fromarray(np.zeros([1024, 1024, 3]).astype(np.uint8))
res = transforms.Resize((518, 518))
transform = transforms.Compose([
    res,
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

In [7]:
# Промпты для генерации
negative_prompt = ["low quality, bad quality, sketches, collage"]
prompt = [
    "ultra-realistic 4k visualization of a modern Russian residential complex, new high-rise buildings, construction site, glass and concrete, construction, urban environment, overcast skies",
    "4k photo of recently built Russian apartment blocks, realistic lighting, detailed facades, modern urban planning, pedestrian streets, cars, parking, Russia, construction",
    "realistic rendering of Russian cityscape with new multi-storey buildings, residential area, contemporary architecture, trees, roads, realism, construction"
]
prompt += [
    "photo-realistic visualization of Soviet-style panel buildings, Khrushchyovka, worn facades, Russia, overcast day, gloomy mood, trees without leaves, construction",
    "realistic 4k photo of 1980s Russian apartment block, peeling paint, balconies with laundry, satellite dishes, courtyard with garages, gray sky, construction",
    "Soviet district in Russia, high-rise panel buildings, yard with playground, broken benches, realistic urban atmosphere, cinematic lighting, construction"
]
prompt += [
    "realistic photo of Russian ghetto district, high-rise buildings, graffiti, dark overcast atmosphere, wet pavement, realism, HDR, construction",
    "depressing urban landscape in Russia, tall concrete blocks, cloudy day, cold colors, HDR detail, street wires, industrial look, construction",
    "realistic Russian city outskirts, massive residential buildings, abandoned playground, snowy sidewalk, dull lighting, construction"
]
prompt += [
    "winter evening in Russian residential area, snow-covered panel buildings, warm lights in windows, realistic depth, 4k, construction",
    "Russian city block in autumn, yellow leaves, wet roads, realistic overcast lighting, puddles, realism, depth map match, construction",
    "spring morning in Moscow residential neighborhood, apartment blocks, trees in bloom, realistic photo-style render, construction"
]
prompt += [
    "realistic architectural visualization, depth-based view, urban Russia, cinematic color grading, detailed high-rise housing, construction",
    "DSLR photo of typical Russian residential district, high contrast, lens flare, slightly foggy, HDR details, construction",
    "photo of Russian apartment buildings from drone, aerial perspective, realistic depth, shadows, texture details, construction"
]

In [8]:
for v in tqdm(v_list[:17476]):
    ig_name = v+'_gi.jpg'
    if ig_name not in os.listdir(pgi):
        json_path = city_path+'/'+v+'.json'
        with open(json_path, "r", encoding="utf-8") as f:
            json_info = json.load(f)

        # Получение изображения с контрольными точками
        info_img = draw_json(black, json_info)
        info_img = res(info_img)

        # Генерация изображения
        depth_path = city_path+'/'+v+'_depth.jpg'
        depth = Image.open(depth_path)
        ctrl = random.sample([0.5, 0.6, 0.7], 1)[0]
        with torch.no_grad():
            image_gen = pipe(random.sample(prompt, 1), negative_prompt=negative_prompt,
                             image=depth, guidance_scale=13, num_images_per_prompt=1, 
                             num_inference_steps=25, controlnet_conditioning_scale=0.7).images[0]
        torch.cuda.empty_cache()

        # Генерация карты глубины
        input_tensor = transform(image_gen).unsqueeze(0)
        with torch.no_grad():
            gen_depth = dmodel(input_tensor.cuda()).cpu()
        torch.cuda.empty_cache()
        gen_depth_norm = (((gen_depth-gen_depth.min())/gen_depth.max())*255).to(torch.uint8).repeat(3, 1, 1).permute(1,2,0).numpy()
        depth_pil = Image.fromarray(gen_depth_norm)

        # Получение предварительного вида
        img_gen_res = res(image_gen)
        cat_list = [np.array(img_gen_res), gen_depth_norm, np.array(info_img)]
        cat_img = np.concatenate(cat_list, axis=1)
        pv = Image.fromarray(cat_img)

        # Сохранение
        pv.save(pgv+'/'+v+'_gv.jpg')
        depth_pil.save(pgd+'/'+v+'_gd.jpg')
        image_gen.save(pgi+'/'+ig_name)