In [7]:
import h5py
import numpy as np
import cv2
import os

# File and output directories
file_path = 'nyu_depth_v2_labeled.mat'
output_dir = 'output_data'
os.makedirs(output_dir, exist_ok=True)

# Load the data
with h5py.File(file_path, 'r') as f:
    # Access RGB images and depth maps
    rgb_images = np.array(f['images'])  # Shape: (1449, 3, 640, 480)
    depth_maps = np.array(f['depths'])  # Shape: (1449, 640, 480)

    print("RGB images shape:", rgb_images.shape)
    print("Depth maps shape:", depth_maps.shape)

    # Iterate through the dataset
    for i in range(rgb_images.shape[0]):
        # Extract an RGB image and transpose to (640, 480, 3)
        rgb_image = np.transpose(rgb_images[i], (2, 1, 0))  # (height, width, channels)
        rgb_image = (rgb_image / 255.0)  # Normalize RGB to [0, 1]

        # Extract the depth map
        depth_map = np.transpose(depth_maps[i],(1,0))  # (640, 480)

        # Save the RGB image as PNG
        rgb_path = os.path.join(output_dir, f'rgb_image_{i:04d}.png')
        cv2.imwrite(rgb_path, (rgb_image * 255).astype(np.uint8))  # Convert to [0, 255]

        # Save the depth map as a .npy file
        #depth_path = os.path.join(output_dir, f'depth_map_{i:04d}.npy')
        #np.save(depth_path, depth_map)

        # Optionally, save the depth map as a grayscale PNG image
        depth_norm = (255 * (depth_map - np.min(depth_map)) / (np.max(depth_map) - np.min(depth_map))).astype(np.uint8)
        depth_img_path = os.path.join(output_dir, f'depth_image_{i:04d}.png')
        cv2.imwrite(depth_img_path, depth_norm)

        print(f"Saved image {i+1}/{rgb_images.shape[0]}")


RGB images shape: (1449, 3, 640, 480)
Depth maps shape: (1449, 640, 480)
Saved image 1/1449
Saved image 2/1449
Saved image 3/1449
Saved image 4/1449
Saved image 5/1449
Saved image 6/1449
Saved image 7/1449
Saved image 8/1449
Saved image 9/1449
Saved image 10/1449
Saved image 11/1449
Saved image 12/1449
Saved image 13/1449
Saved image 14/1449
Saved image 15/1449
Saved image 16/1449
Saved image 17/1449
Saved image 18/1449
Saved image 19/1449
Saved image 20/1449
Saved image 21/1449
Saved image 22/1449
Saved image 23/1449
Saved image 24/1449
Saved image 25/1449
Saved image 26/1449
Saved image 27/1449
Saved image 28/1449
Saved image 29/1449
Saved image 30/1449
Saved image 31/1449
Saved image 32/1449
Saved image 33/1449
Saved image 34/1449
Saved image 35/1449
Saved image 36/1449
Saved image 37/1449
Saved image 38/1449
Saved image 39/1449
Saved image 40/1449
Saved image 41/1449
Saved image 42/1449
Saved image 43/1449
Saved image 44/1449
Saved image 45/1449
Saved image 46/1449
Saved image 47/1

In [11]:
import h5py
import numpy as np
import cv2
import os

# Paths and output directory
file_path = 'nyu_depth_v2_labeled.mat'
output_dir = 'output_data'

os.makedirs(output_dir, exist_ok=True)

def generate_colormap(num_classes):
    """Generate a color map where each label gets a unique RGB color."""
    np.random.seed(42)  # For reproducibility
    return np.random.randint(0, 255, size=(num_classes, 3), dtype=np.uint8)

with h5py.File(file_path, 'r') as f:
    # Check for a segmentation mask dataset
    if 'labels' in f:
        segmentation_masks = np.array(f['labels'])  # Shape: (1449, 640, 480)
        print("Segmentation masks shape:", segmentation_masks.shape)

        # Generate a colormap for the labels
        unique_labels = np.unique(segmentation_masks)
        colormap = generate_colormap(len(unique_labels))
        label_to_color = {label: colormap[i] for i, label in enumerate(unique_labels)}

        for i in range(segmentation_masks.shape[0]):
            # Extract and transpose a segmentation mask
            mask = segmentation_masks[i]  # (640, 480)
            mask = mask.T  # Flip width and height to correct orientation

            # Create an RGB mask
            rgb_mask = np.zeros((*mask.shape, 3), dtype=np.uint8)
            for label, color in label_to_color.items():
                rgb_mask[mask == label] = color

            # Save the RGB mask
            rgb_path = os.path.join(output_dir, f'rgb_segmentation_mask_{i:04d}.png')
            cv2.imwrite(rgb_path, cv2.cvtColor(rgb_mask, cv2.COLOR_RGB2BGR))

            print(f"Saved RGB segmentation mask {i+1}/{segmentation_masks.shape[0]}")
    else:
        print("No segmentation masks found in the dataset.")


Segmentation masks shape: (1449, 640, 480)
Saved RGB segmentation mask 1/1449
Saved RGB segmentation mask 2/1449
Saved RGB segmentation mask 3/1449
Saved RGB segmentation mask 4/1449
Saved RGB segmentation mask 5/1449
Saved RGB segmentation mask 6/1449
Saved RGB segmentation mask 7/1449
Saved RGB segmentation mask 8/1449
Saved RGB segmentation mask 9/1449
Saved RGB segmentation mask 10/1449
Saved RGB segmentation mask 11/1449
Saved RGB segmentation mask 12/1449
Saved RGB segmentation mask 13/1449
Saved RGB segmentation mask 14/1449
Saved RGB segmentation mask 15/1449
Saved RGB segmentation mask 16/1449
Saved RGB segmentation mask 17/1449
Saved RGB segmentation mask 18/1449
Saved RGB segmentation mask 19/1449
Saved RGB segmentation mask 20/1449
Saved RGB segmentation mask 21/1449
Saved RGB segmentation mask 22/1449
Saved RGB segmentation mask 23/1449
Saved RGB segmentation mask 24/1449
Saved RGB segmentation mask 25/1449
Saved RGB segmentation mask 26/1449
Saved RGB segmentation mask 27

In [12]:
colormap

array([[102, 220, 225],
       [ 95, 179,  61],
       [234, 203,  92],
       ...,
       [184,  79, 237],
       [118, 218, 189],
       [ 51, 193, 250]], dtype=uint8)

In [17]:
unique_labels

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18

In [16]:
label_to_color

{np.uint16(0): array([102, 220, 225], dtype=uint8),
 np.uint16(1): array([ 95, 179,  61], dtype=uint8),
 np.uint16(2): array([234, 203,  92], dtype=uint8),
 np.uint16(3): array([  3,  98, 243], dtype=uint8),
 np.uint16(4): array([ 14, 149, 245], dtype=uint8),
 np.uint16(5): array([ 46, 106, 244], dtype=uint8),
 np.uint16(6): array([ 99, 187,  71], dtype=uint8),
 np.uint16(7): array([212, 153, 199], dtype=uint8),
 np.uint16(8): array([188, 174,  65], dtype=uint8),
 np.uint16(9): array([153,  20,  44], dtype=uint8),
 np.uint16(10): array([203, 152, 102], dtype=uint8),
 np.uint16(11): array([214, 240,  39], dtype=uint8),
 np.uint16(12): array([121,  24,  34], dtype=uint8),
 np.uint16(13): array([114, 210,  65], dtype=uint8),
 np.uint16(14): array([239,  39, 214], dtype=uint8),
 np.uint16(15): array([244, 151,  25], dtype=uint8),
 np.uint16(16): array([ 74, 145, 222], dtype=uint8),
 np.uint16(17): array([ 14, 202,  85], dtype=uint8),
 np.uint16(18): array([145, 117,  87], dtype=uint8),
 np

In [1]:
x = 1449

# Open the file in write mode
with open("output_file.txt", "w") as file:
    for i in range(x + 1):
        formatted_index = f"{i:04}"  # Format the index to 4 characters with leading zeros
        line = (
            f'{"{"}"source2": "output_data/depth_image_{formatted_index}.png",'
            f'"source": "output_data/rgb_segmentation_mask_{formatted_index}.png", '
            f'"target": "output_data/rgb_image_{formatted_index}.png", '
            f'"prompt": "indoor room design"{"}"}\n'
        )
        file.write(line)
