# Download
Download the following datasets and place them in this folder
## CamVid
archive.zip  >  https://www.kaggle.com/datasets/carlolepelaars/camvid
## Kitti
data_semantics_pixel.zip  >  https://www.cvlibs.net/datasets/kitti/eval_semseg.php?benchmark=semantics2015
## Cityscapes
leftImg8bit_trainvaltest.zip , gtFine_trainvaltest.zip  >  https://www.cityscapes-dataset.com/downloads/
## WildDash2
wd_public_v2p0.zip  >  https://www.wilddash.cc/download

# Imports

In [None]:
import zipfile
import os
import shutil
import os
from PIL import Image
import cv2
import random
from pathlib import Path
import numpy as np

# Unzipping

In [2]:
# CamVid
with zipfile.ZipFile('./archive.zip', 'r') as zip_ref:
    zip_ref.extractall('./')
os.remove("./archive.zip")

# Kitti
with zipfile.ZipFile('./data_semantics_pixel.zip', 'r') as zip_ref:
    zip_ref.extractall('./Kitti/')
os.remove("./data_semantics_pixel.zip")

# Cityscapes
with zipfile.ZipFile('./leftImg8bit_trainvaltest.zip', 'r') as zip_ref:
    zip_ref.extractall('./Cityscapes/')
with zipfile.ZipFile('./gtFine_trainvaltest.zip', 'r') as zip_ref:
    zip_ref.extractall('./Cityscapes/')
os.remove("./leftImg8bit_trainvaltest.zip")
os.remove("./gtFine_trainvaltest.zip")
    
# WildDash2
with zipfile.ZipFile('./wd_public_v2p0.zip', 'r') as zip_ref:
    zip_ref.extractall('./WildDash2/')
os.remove("./wd_public_v2p0.zip")    

# Moving around, deleting and remapping files

In [3]:
def move_files(src, dst):
    if not os.path.exists(dst):
        os.makedirs(dst)
    file_names = os.listdir(src)
    for file_name in file_names:
        shutil.move(os.path.join(src, file_name), dst)
        
def move_files_recursive(src, dst):
    if not os.path.exists(dst):
        os.makedirs(dst)
    
    for item in os.listdir(src):
        src_path = os.path.join(src, item)
        dst_path = os.path.join(dst, item)
        
        if os.path.isfile(src_path):
            shutil.move(src_path, dst_path)

        elif os.path.isdir(src_path):
            move_files_recursive(src_path, dst)
            if not os.listdir(src_path):
                os.rmdir(src_path)
                
def recolor_images(input_folder, output_folder, color_mapping):
    os.makedirs(output_folder, exist_ok=True)
    
    for filename in os.listdir(input_folder):
        input_path = os.path.join(input_folder, filename)
        
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            try:
                with Image.open(input_path) as img:
                    img = img.convert("RGB")
                    
                    pixels = img.getdata()
                    new_pixels = [
                        color_mapping.get(pixel, pixel)
                        for pixel in pixels
                    ]
                    
                    new_img = Image.new("RGB", img.size)
                    new_img.putdata(new_pixels)
                    
                    output_path = os.path.join(output_folder, filename)
                    new_img.save(output_path)
                    print(f"Processed and saved: {output_path}")
            except Exception as e:
                print(f"Could not process {input_path}: {e}")


## Kitti

In [4]:
# move images
move_files('./Kitti/training/image_2', './Kitti/images/')
# move masks
move_files('./Kitti/training/semantic_rgb', './Kitti/masks/')
# remove leftovers
shutil.rmtree('./Kitti/training/')
shutil.rmtree('./Kitti/testing/')

## Cityscapes

In [5]:
# move images
sources = ['./Cityscapes/leftImg8bit/train', './Cityscapes/leftImg8bit/val']
for source in sources:
    move_files_recursive(source, './Cityscapes/images/')
# move masks
sources_masks = ['./Cityscapes/gtFine/train', './Cityscapes/gtFine/val']
for source in sources_masks:
    move_files_recursive(source, './Cityscapes/masks/')
# remove leftovers
shutil.rmtree('./Cityscapes/leftImg8bit/')
shutil.rmtree('./Cityscapes/gtFine/')
os.remove('./Cityscapes/license.txt')
os.remove('./Cityscapes/README')
for file in os.listdir("./Cityscapes/masks/"):
    file_path = os.path.join("./Cityscapes/masks/", file)
    if os.path.isfile(file_path) and not file.endswith("gtFine_color.png"):
        os.remove(file_path)

## WildDash2

In [6]:
!python pano2sem.py --json_path ./WildDash2/panoptic.json --outp_dir_sem ./WildDash2/masks_before/


  0%|          | 0/4256 [00:00<?, ?it/s]
  0%|          | 1/4256 [00:00<11:25,  6.21it/s]
  0%|          | 2/4256 [00:00<09:47,  7.24it/s]
  0%|          | 3/4256 [00:00<08:48,  8.04it/s]
  0%|          | 4/4256 [00:00<09:28,  7.48it/s]
For the old behavior, usually:
    np.array(value).astype(dtype)
will give the desired result (the cast overflows).
  instances[ids == id0] = category_id * 1000 + num_things

  0%|          | 6/4256 [00:00<11:01,  6.43it/s]
  0%|          | 7/4256 [00:01<10:20,  6.85it/s]
  0%|          | 8/4256 [00:01<11:00,  6.43it/s]
For the old behavior, usually:
    np.array(value).astype(dtype)
will give the desired result (the cast overflows).
  instances[ids == id0] = category_id * 1000 + num_things

For the old behavior, usually:
    np.array(value).astype(dtype)
will give the desired result (the cast overflows).
  instances[ids == id0] = category_id * 1000 + num_things
For the old behavior, usually:
    np.array(value).astype(dtype)
will give the desired resu

In [7]:
color_mapping_wilddash = {(0, 0, 0):(0,0,0),(1, 1, 1):(0,0,0),(2, 2, 2):(0,0,0),(3, 3, 3):(0,0,0),(4, 4, 4):(0,0,0),(5, 5, 5):(111, 74,  0),(6, 6, 6):(81,  0, 81),(7, 7, 7):(128, 64,128),(8, 8, 8):(244, 35,232),(9, 9, 9):(250,170,160 ),(10, 10, 10):(230,150,140 ),(11, 11, 11):(70,70,70 ),(12, 12, 12):(102,102,156 ),(13, 13, 13):(190,153,153 ),(14, 14, 14):(180,165,180 ),(15, 15, 15):(150,100,100 ),(16, 16, 16):(150,120, 90 ),(17, 17, 17):(153,153,153),(18, 18, 18):(153,153,153 ),(19, 19, 19):( 250,170, 30),(20, 20, 20):( 220,220,  0),(21, 21, 21):(107,142,35),(22, 22, 22):(152,251,152 ),(23, 23, 23):(70,130,180),(24, 24, 24):(220, 20, 60 ),(25, 25, 25):(255,  0,  0),(26, 26, 26):(0,  0,142 ),(27, 27, 27):( 0,  0, 70),(28, 28, 28):(0, 60,100 ),(29, 29, 29):(  0,  0, 90),(30, 30, 30):( 0,  0,110),(31, 31, 31):(0, 80,100 ),(32, 32, 32):(0,  0,230 ),(33, 33, 33):(119, 11, 32 ),(34, 34, 34):(0,  0, 142 ),(35, 35, 35):( 0,  0,142),(36, 36, 36):( 0,  0,  0 ),(37, 37, 37):(0,  0,  0 ),(38, 38, 38):(128, 64,128 ),(39, 39, 39):( 0,  0,  0 ),(40, 40, 40):( 0,  0,  0 ),(41, 41, 41):(128, 64,128 ),(42, 42, 42):(  0,  0,  0),(43, 43, 43):(128, 64,128 ),(44, 44, 44):( 0,  0,  0 ),(45, 45, 45):(153,153,153 ),(46, 46, 46):(153,153,153 ),(47, 47, 47):( 255,  0,  0),(48, 48, 48):(255,  0,  0 ),(49, 49, 49):(255,  0,  0 ),(50, 50, 50):(111, 74,  0 ),(51, 51, 51):( 111, 74,  0),(52, 52, 52):(244, 35,232),(54, 54, 54):( 220,  220,  0 ),(55, 55, 55):( 0,  0,  0 ),(56, 56, 56):( 102,102,156),(57, 57, 57):(111, 74,  0 ),(58, 58, 58):( 0,  0,230),(59, 59, 59):( 0,  0,  0 ),(60, 60, 60):( 107,  142,35 ),(61, 61, 61):( 128, 64,128),(62, 62, 62):(111, 74,  0 ),(63, 63, 63):(111, 74,  0 ),(64, 64, 64):(128, 64,128 ),(65, 65, 65):(244, 35,232 ),(66, 66, 66):(0, 0,  0 ),(67, 67, 67):(0,0,0),(68, 68, 68):(81,  0, 81 ),(69, 69, 69):(81,0,81),(70, 70, 70):(244, 35,232),(71, 71, 71):(0, 0,  0 ),(72, 72, 72):( 0,  0,  0 ),(73, 73, 73):(81,  0, 81 ),(74, 74, 74):( 128, 64,128),(75, 75, 75):(128, 64,128 ),(76, 76, 76):(128, 64,128 ),(77, 77, 77):(244, 35,232 ),(78, 78, 78):( 152,251,152),(79, 79, 79):(128, 64,128 ),(80, 80, 80):( 244, 35,232)}
recolor_images("./WildDash2/masks_before", "./WildDash2/masks", color_mapping_wilddash)

Processed and saved: ./WildDash2/masks\ae0000_100000_labelIds.png
Processed and saved: ./WildDash2/masks\ae0001_100000_labelIds.png
Processed and saved: ./WildDash2/masks\ae0002_100000_labelIds.png
Processed and saved: ./WildDash2/masks\ae0003_100000_labelIds.png
Processed and saved: ./WildDash2/masks\ae0004_100000_labelIds.png
Processed and saved: ./WildDash2/masks\ae0005_100000_labelIds.png
Processed and saved: ./WildDash2/masks\ae0006_100000_labelIds.png
Processed and saved: ./WildDash2/masks\ae0007_100000_labelIds.png
Processed and saved: ./WildDash2/masks\ae0008_100000_labelIds.png
Processed and saved: ./WildDash2/masks\ae0009_100000_labelIds.png
Processed and saved: ./WildDash2/masks\ae0010_100000_labelIds.png
Processed and saved: ./WildDash2/masks\ae0011_100000_labelIds.png
Processed and saved: ./WildDash2/masks\ae0012_100000_labelIds.png
Processed and saved: ./WildDash2/masks\ae0013_100000_labelIds.png
Processed and saved: ./WildDash2/masks\ae0014_100000_labelIds.png
Processed 

In [8]:
# WildDash2
# remove leftovers
shutil.rmtree('./WildDash2/random_split')
shutil.rmtree('./WildDash2/anonymized')
shutil.rmtree('./WildDash2/panoptic')
shutil.rmtree('./WildDash2/masks_before')
os.remove('./WildDash2/authors.txt')
os.remove('./WildDash2/license.txt')
os.remove('./WildDash2/readme.txt')
os.remove('./WildDash2/wilddash2-eval-meta.json')
os.remove('./WildDash2/wilddash2-meta.json')
os.remove('./WildDash2/panoptic.json')

## CamVid

In [9]:
# move images
sources = ['./CamVid/train', './CamVid/val', './CamVid/test']
for source in sources:
    move_files(source, './CamVid/images/')
# move masks
sources_masks = ['./CamVid/train_labels', './CamVid/val_labels', './CamVid/test_labels']
for source in sources_masks:
    move_files(source, './CamVid/masks_before/')

In [10]:
color_mapping_camvid = {(0,0,0):(0,0,0),(128,0,0):(70, 70, 70),(0,128,0):(152,251,152),(128,128,0):(107,142, 35),(0,0,128):(111, 74,  0),(128,0,128):(111, 74,  0),(0,128,128):(111, 74,  0),(128,128,128):(70,130,180),(64,0,0):(0,  0,  0),(192,0,0):(111,74,0),(64,128,0):(81,  0, 81),(192,128,0):(0,0,0),(64,0,128):( 0,  0,142),(192,0,128):(70,70,70),(64,128,128):(107,142, 35),(192,128,128):(220,220,  0),(0,64,0):(111, 74,  0),(128,64,0):(0,  0,  0),(0,192,0):(0,  0,  0),(128,192,0):(0,0,230),(0,64,128):(220, 20, 60),(128,64,128):(128,64,128),(0,192,128):(111, 74,  0),(128,192,128):(111, 74,  0),(64,64,0):(220, 20, 60),(192,64,0):(111,74,0),(64,192,0):(70, 70, 70),(192,192,0):(107,142, 35),(64,64,128):(70, 70, 70),(192,64,128):(0, 80,100),(64,192,128):(244, 35,232),(192,192,128):(153,153,153),(0,0,64):(220,220,  0),(128,0,64):(0,0,0),(0,128,64):(70, 70, 70),(128,128,64):(0,0,0),(0,0,192):(244, 35,232),(128,0,192):(128,64,128),(0,128,192):(220, 20, 60),(128,128,192):(244, 35,232),(64,0,64):(70, 70, 70),(192,0,64):(244, 35,232),(64,128,64):(111, 74,  0),(192,128,64):(220, 20, 60),(64,0,192):(0,0,0),(192,0,192):(0,  0,230),(64,128,192):(0,  0, 70),(192,128,192):( 0, 60,100),(0,64,64):(250,170, 30),(128,64,64):(111, 74,  0)}
recolor_images("./CamVid/masks_before", "./CamVid/masks", color_mapping_camvid)

Processed and saved: ./CamVid/masks\0001TP_006690_L.png
Processed and saved: ./CamVid/masks\0001TP_006720_L.png
Processed and saved: ./CamVid/masks\0001TP_006750_L.png
Processed and saved: ./CamVid/masks\0001TP_006780_L.png
Processed and saved: ./CamVid/masks\0001TP_006810_L.png
Processed and saved: ./CamVid/masks\0001TP_006840_L.png
Processed and saved: ./CamVid/masks\0001TP_006870_L.png
Processed and saved: ./CamVid/masks\0001TP_006900_L.png
Processed and saved: ./CamVid/masks\0001TP_006930_L.png
Processed and saved: ./CamVid/masks\0001TP_006960_L.png
Processed and saved: ./CamVid/masks\0001TP_006990_L.png
Processed and saved: ./CamVid/masks\0001TP_007020_L.png
Processed and saved: ./CamVid/masks\0001TP_007050_L.png
Processed and saved: ./CamVid/masks\0001TP_007080_L.png
Processed and saved: ./CamVid/masks\0001TP_007110_L.png
Processed and saved: ./CamVid/masks\0001TP_007140_L.png
Processed and saved: ./CamVid/masks\0001TP_007170_L.png
Processed and saved: ./CamVid/masks\0001TP_00720

In [11]:
# remove leftovers
for source in sources:
    os.rmdir(source)
for source in sources_masks:
    os.rmdir(source)
os.remove('./CamVid/class_dict.csv')
shutil.rmtree('./CamVid/masks_before')

# Unifying dataset

In [12]:
def resize_and_rename_images(folder_path, output_folder, interpolation_choice, size=(512, 512)):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    interpolation_map = {
        "linear": cv2.INTER_LINEAR,
        "nearest": cv2.INTER_NEAREST
    }
    if interpolation_choice not in interpolation_map:
        print("Invalid interpolation choice. Choose 'linear' or 'nearest'.")
        return
    interpolation = interpolation_map[interpolation_choice]

    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)

        if os.path.isfile(file_path) and filename.lower().endswith((".jpg", ".jpeg", ".png")):
            clean_filename = filename
            clean_filename = clean_filename.replace("_L.png",".png")#camvid
            clean_filename = clean_filename.replace("_leftImg8bit","")#cityscapes
            clean_filename = clean_filename.replace("_gtFine_color","")#cityscapes
            clean_filename = clean_filename.replace("_labelIds","")#wd

            image = cv2.imread(file_path)

            if image is not None:
                resized_image = cv2.resize(image, size, interpolation=interpolation)

                output_filename = os.path.splitext(clean_filename)[0] + ".png"
                output_path = os.path.join(output_folder, output_filename)

                cv2.imwrite(output_path, resized_image)
                print(f"Processed and saved: {output_path}")
            else:
                print(f"Failed to read: {file_path}")

In [13]:

datasets = ["./CamVid/", "./Cityscapes/", "./WildDash2/", "./Kitti/"]
size = (512, 512)
for dataset in datasets:
    resize_and_rename_images(dataset+"images/", "./images_out/", interpolation_choice="linear", size=size)
for dataset in datasets:
    resize_and_rename_images(dataset+"masks/", "./masks_out/", interpolation_choice="nearest", size=size)

Processed and saved: ./images_out/0001TP_006690.png
Processed and saved: ./images_out/0001TP_006720.png
Processed and saved: ./images_out/0001TP_006750.png
Processed and saved: ./images_out/0001TP_006780.png
Processed and saved: ./images_out/0001TP_006810.png
Processed and saved: ./images_out/0001TP_006840.png
Processed and saved: ./images_out/0001TP_006870.png
Processed and saved: ./images_out/0001TP_006900.png
Processed and saved: ./images_out/0001TP_006930.png
Processed and saved: ./images_out/0001TP_006960.png
Processed and saved: ./images_out/0001TP_006990.png
Processed and saved: ./images_out/0001TP_007020.png
Processed and saved: ./images_out/0001TP_007050.png
Processed and saved: ./images_out/0001TP_007080.png
Processed and saved: ./images_out/0001TP_007110.png
Processed and saved: ./images_out/0001TP_007140.png
Processed and saved: ./images_out/0001TP_007170.png
Processed and saved: ./images_out/0001TP_007200.png
Processed and saved: ./images_out/0001TP_007230.png
Processed an

# Generate depth maps using MiDaS

In [16]:
!pip install timm==0.6.7
!git clone https://github.com/isl-org/MiDaS.git
!cd MiDaS/ && mkdir images_out
!cp ./images_out/* ./MiDaS/images_out



Cloning into 'MiDaS'...


In [None]:
!pip install wget
import wget
url = 'https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_512.pt'
downloaded_file = wget.download(url)
!mv ./dpt_beit_large_512.pt ./MiDaS/weights/dpt_beit_large_512.pt

Collecting wget
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: wget
  Building wheel for wget (setup.py): started
  Building wheel for wget (setup.py): finished with status 'done'
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9710 sha256=f7e225e033cef239a38b5549f911fff1ddbdfd8b3338dced60b70e4f587e7996
  Stored in directory: c:\users\roscamitrut\appdata\local\pip\cache\wheels\04\5f\3e\46cc37c5d698415694d83f607f833f83f0149e49b3af9d0f38
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


mv: cannot remove `./MiDaS/weights/': Permission denied


In [20]:
!cd MiDaS && python run.py --input_path ./images_out/ --output_path ./generated_depths/ --model_type dpt_beit_large_512 --grayscale

Initialize
Device: cuda
Model loaded, number of parameters = 345M
Start processing
  Processing ./images_out\000000_10.png (1/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\000001_10.png (2/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\000002_10.png (3/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\000003_10.png (4/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\000004_10.png (5/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\000005_10.png (6/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\000006_10.png (7/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\000007_10.png (8/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\000008_10.png (9/8632)
    Input resized to

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


  Processing ./images_out\frankfurt_000001_038844.png (3407/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\frankfurt_000001_039895.png (3408/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\frankfurt_000001_040575.png (3409/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\frankfurt_000001_040732.png (3410/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\frankfurt_000001_041074.png (3411/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\frankfurt_000001_041354.png (3412/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\frankfurt_000001_041517.png (3413/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./images_out\frankfurt_000001_041664.png (3414/8632)
    Input resized to 512x512 before entering the encoder
  Processing ./i

In [21]:
!mv ./MiDaS/generated_depths/ ./generated_depths
!rm -r ./MiDaS/images_out/

In [22]:
if not os.path.exists("./midas_other_format/"):
    os.makedirs("./midas_other_format/")

for file in os.listdir("./generated_depths/"):
    file_path = os.path.join("./generated_depths/", file)
    if os.path.isfile(file_path) and not file.endswith(".png"):
        os.rename(file_path, "./midas_other_format/"+file)

In [23]:
!mv ./generated_depths ./generated_depths_int16

In [52]:
def grayscale_16bit_to_8bit(input_folder="./generated_depths_int16", output_folder="./generated_depths"):
    if not os.path.isdir(input_folder):
        print(f"Error: '{input_folder}' is not a valid directory.")
        return
    
    os.makedirs(output_folder, exist_ok=True)

    for filename in os.listdir(input_folder):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)

        try:
            with Image.open(input_path) as img:
                if img.mode == "I;16":
                    img_np = np.array(img, dtype=np.uint16)
                    img_np = (img_np / 256).astype(np.uint8)

                    img_8bit = Image.fromarray(img_np, mode="L")
                    img_8bit.save(output_path)

                    print(f"Converted: {filename} → {output_path}")
                else:
                    print(f"Skipping: {filename} (Mode: {img.mode})")
        except Exception as e:
            print(f"Error processing '{filename}': {e}")
            
grayscale_16bit_to_8bit()

Converted: 000000_10-dpt_beit_large_512.png → ./generated_depths\000000_10-dpt_beit_large_512.png
Converted: 000001_10-dpt_beit_large_512.png → ./generated_depths\000001_10-dpt_beit_large_512.png
Converted: 000002_10-dpt_beit_large_512.png → ./generated_depths\000002_10-dpt_beit_large_512.png
Converted: 000003_10-dpt_beit_large_512.png → ./generated_depths\000003_10-dpt_beit_large_512.png
Converted: 000004_10-dpt_beit_large_512.png → ./generated_depths\000004_10-dpt_beit_large_512.png
Converted: 000005_10-dpt_beit_large_512.png → ./generated_depths\000005_10-dpt_beit_large_512.png
Converted: 000006_10-dpt_beit_large_512.png → ./generated_depths\000006_10-dpt_beit_large_512.png
Converted: 000007_10-dpt_beit_large_512.png → ./generated_depths\000007_10-dpt_beit_large_512.png
Converted: 000008_10-dpt_beit_large_512.png → ./generated_depths\000008_10-dpt_beit_large_512.png
Converted: 000009_10-dpt_beit_large_512.png → ./generated_depths\000009_10-dpt_beit_large_512.png
Converted: 000010_10

In [53]:
for file in os.listdir("./generated_depths/"):
    file_path = os.path.join("./generated_depths/", file)
    if os.path.isfile(file_path) and file.endswith("-dpt_beit_large_512.png"):
        clean_filename = file
        clean_filename = clean_filename.replace("-dpt_beit_large_512.png",".png")
        os.rename(file_path,"./generated_depths/"+clean_filename)

# Create Prompt

In [2]:
def get_files_in_directory(directory):
    try:
        return set(f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f)))
    except Exception as e:
        print(f"Error accessing directory {directory}: {e}")
        return set()

def compare_folders(folder1, folder2, folder3):
    for folder in [folder1, folder2, folder3]:
        if not os.path.isdir(folder):
            print(f"Error: {folder} is not a valid directory")
            return []
    
    # Get files from each directory
    files1 = get_files_in_directory(folder1)
    files2 = get_files_in_directory(folder2)
    files3 = get_files_in_directory(folder3)
    
    # Find common files across all three folders
    common_files = files1.intersection(files2, files3)
    common_files_list = sorted(common_files)
    
    return common_files_list

In [4]:
common_files = compare_folders("./images_out/", "./masks_out/", "./generated_depths/")

In [5]:
with open('prompt.json', 'w') as f:
    for item in common_files:
        f.write(f'{{"source2": "generated_depths/{item}","source": "masks_out/{item}", "target": "images_out/{item}", "prompt": ""}}\n')

In [5]:
lines = open('prompt.json').readlines()
random.shuffle(lines)
open('prompt.json', 'w').writelines(lines)

In [6]:
x = 275
with open("./prompt.json",'r') as f1:
    data = f1.readlines()
with open("./prompt.json",'w') as f1:
    for line in data[x:]:
        f1.write(line)
with open("./prompt_10.json",'w') as f2:
    for line in data[:x]:
        f2.write(line)

# Zip it up

In [5]:
shutil.make_archive("./masks_out", 'zip', "./", 'masks_out')
shutil.make_archive("./generated_depths", 'zip', "./", 'generated_depths')
shutil.make_archive("./images_out", 'zip', "./", 'images_out')

'c:\\Users\\RoscaMitrut\\Desktop\\dataset\\images_out.zip'

# Done!
By now, you should have 5 outputs:<br>

-prompt.json<br>
-prompt_10.json<br>
-images_out.zip<br>
-masks_out.zip<br>
-generated_depths.zip<br>

# Merge masks and depth maps into 1 RGBA image

In [None]:
def combine_images(rgb_folder, grayscale_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    
    rgb_images = {f for f in os.listdir(rgb_folder) if f.lower().endswith(('png', 'jpg', 'jpeg'))}
    grayscale_images = {f for f in os.listdir(grayscale_folder) if f.lower().endswith(('png', 'jpg', 'jpeg'))}
    
    common_files = rgb_images & grayscale_images
    
    for filename in common_files:
        rgb_path = os.path.join(rgb_folder, filename)
        grayscale_path = os.path.join(grayscale_folder, filename)
        
        rgb_image = Image.open(rgb_path).convert("RGB")
        alpha_image = Image.open(grayscale_path).convert("L")
        
        if rgb_image.size != alpha_image.size:
            print(f"Skipping {filename}: Dimension mismatch")
            continue
        
        rgba_image = Image.merge("RGBA", (*rgb_image.split(), alpha_image))
        
        output_path = os.path.join(output_folder, filename)
        rgba_image.save(output_path, format="PNG")  # Save as PNG to preserve alpha channel
        print(f"Saved: {output_path}")

combine_images('./masks_out', './generated_depths', './rgba_images')

shutil.make_archive("./rgba_images", 'zip', "./", 'rgba_images')