## Preprocessing notebook

In [1]:
# Importing the libraries
import cv2
import os
import glob
from tqdm import tqdm

In [2]:
# Paths for train, val, and test sets
input_paths = {
    "train": "Cityscapes/leftImg8bit/train",
    "val": "Cityscapes/leftImg8bit/val",
    "test": "Cityscapes/leftImg8bit/test"
}
label_paths = {
    "train": "Cityscapes/gtFine/train",
    "val": "Cityscapes/gtFine/val",
    "test": "Cityscapes/gtFine/test"
}
output_image_paths = {
    "train": "Cityscapes/resized/leftImg8bit/train",
    "val": "Cityscapes/resized/leftImg8bit/val",
    "test": "Cityscapes/resized/leftImg8bit/test"
}
output_label_id_paths = {
    "train": "Cityscapes/resized/gtFine/train/labelIds",
    "val": "Cityscapes/resized/gtFine/val/labelIds",
    "test": "Cityscapes/resized/gtFine/test/labelIds"
}
output_color_paths = {
    "train": "Cityscapes/resized/gtFine/train/color",
    "val": "Cityscapes/resized/gtFine/val/color",
    "test": "Cityscapes/resized/gtFine/test/color"
}


In [3]:
# Defining target size
target_size = (512, 256)  # (width, height)

In [4]:
# Helper function to create output directories recursively
def create_output_path(file, base_input_path, base_output_path):
    relative_path = os.path.relpath(file, base_input_path)
    output_file = os.path.join(base_output_path, relative_path)
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    return output_file

In [5]:
# Resize input images
def resize_images(split):
    print(f"\n🔹 Resizing {split} input images...")
    input_files = glob.glob(os.path.join(input_paths[split], '**', '*.png'), recursive=True)
    for file in tqdm(input_files, desc=f"Resizing {split.capitalize()} Images", unit="file"):
        img = cv2.imread(file)
        img_resized = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR)
        output_file = create_output_path(file, input_paths[split], output_image_paths[split])
        cv2.imwrite(output_file, img_resized)

In [6]:
# Resize label IDs
def resize_labels(split):
    print(f"\n🔹 Resizing {split} ground truth label IDs...")
    label_files = glob.glob(os.path.join(label_paths[split], '**', '*_labelIds.png'), recursive=True)
    for file in tqdm(label_files, desc=f"Resizing {split.capitalize()} Label IDs", unit="file"):
        mask = cv2.imread(file, cv2.IMREAD_UNCHANGED)
        mask_resized = cv2.resize(mask, target_size, interpolation=cv2.INTER_NEAREST)  # Nearest-neighbor for IDs
        output_file = create_output_path(file, label_paths[split], output_label_id_paths[split])
        cv2.imwrite(output_file, mask_resized)

In [7]:
# Resize color masks
def resize_color_masks(split):
    print(f"\n🔹 Resizing {split} ground truth color masks...")
    color_files = glob.glob(os.path.join(label_paths[split], '**', '*_color.png'), recursive=True)
    for file in tqdm(color_files, desc=f"Resizing {split.capitalize()} Color Masks", unit="file"):
        mask = cv2.imread(file)
        mask_resized = cv2.resize(mask, target_size, interpolation=cv2.INTER_LINEAR)
        output_file = create_output_path(file, label_paths[split], output_color_paths[split])
        cv2.imwrite(output_file, mask_resized)

In [8]:
# Process train, val, and test sets
for split in ["train", "val", "test"]:
    resize_images(split)
    resize_labels(split)
    resize_color_masks(split)


🔹 Resizing train input images...


Resizing Train Images: 100%|██████████| 2975/2975 [06:47<00:00,  7.29file/s]



🔹 Resizing train ground truth label IDs...


Resizing Train Label IDs: 100%|██████████| 2975/2975 [00:38<00:00, 76.37file/s]



🔹 Resizing train ground truth color masks...


Resizing Train Color Masks: 100%|██████████| 2975/2975 [02:38<00:00, 18.75file/s]



🔹 Resizing val input images...


Resizing Val Images: 100%|██████████| 500/500 [01:31<00:00,  5.46file/s]



🔹 Resizing val ground truth label IDs...


Resizing Val Label IDs: 100%|██████████| 500/500 [00:09<00:00, 50.14file/s]



🔹 Resizing val ground truth color masks...


Resizing Val Color Masks: 100%|██████████| 500/500 [00:32<00:00, 15.38file/s]



🔹 Resizing test input images...


Resizing Test Images: 100%|██████████| 1525/1525 [04:24<00:00,  5.76file/s]



🔹 Resizing test ground truth label IDs...


Resizing Test Label IDs: 100%|██████████| 1525/1525 [00:19<00:00, 77.56file/s] 



🔹 Resizing test ground truth color masks...


Resizing Test Color Masks: 100%|██████████| 1525/1525 [01:08<00:00, 22.26file/s]
