In [None]:
import os
import typing
import cv2
import numpy as np
import tensorflow as tf

from local_paths import *
"""
local_paths defines local paths to the required folders,
the variables used for the paths are listed below:

IMAGE_DATASET_128_DIR
IMAGE_DATASET_256_DIR
OUTPUT_DATASET_DIR_128
OUTPUT_DATASET_DIR_256
"""

In [None]:
def dataset_generator(image_size: typing.Tuple[int, int],
                      images_dir: bytes):
    
    images_dir = str(images_dir).split('\'')[1]
    # it is not specified what encoding is used, or I did not find it

    images_paths = [os.path.join(images_dir, name)
                    for name in os.listdir(images_dir)]
    for path in images_paths:
        image = cv2.imread(path)


        height, width, *_ = image.shape
        if (np.all((height, width) != image_size)):
            image = cv2.resize(image.astype(np.float32), image_size)


        image = cv2.cvtColor(image.astype(np.float32), cv2.COLOR_BGR2LAB)

        # normalization of L-channel (0, 255) -> (0, 1)
        l_channel = image[:, :, 0] / 255.0
        # add dimension (h, w) -> (h, w, 1)
        l_channel = (l_channel[..., np.newaxis]).astype(np.float32)

        # normalization (0, 255) -> (-1, 1)
        ab_channel = ((image[:, :, 1:] - 128.0) / 128.0).astype(np.float32)

        yield tf.convert_to_tensor(l_channel), tf.convert_to_tensor(ab_channel)



In [None]:
IMAGE_SIZE = (128, 128)

dataset = tf.data.Dataset.from_generator(dataset_generator,
                                         args=(IMAGE_SIZE, IMAGE_DATASET_128_DIR), # converts strings to bytes
                                         output_signature=(tf.TensorSpec(shape=IMAGE_SIZE + (1,), dtype=tf.float32, name=None),
                                                           tf.TensorSpec(shape=IMAGE_SIZE + (2,), dtype=tf.float32, name=None)))


In [37]:
dataset.save(OUTPUT_DATASET_DIR_128)

In [38]:
IMAGE_SIZE = (256, 256)

dataset = tf.data.Dataset.from_generator(dataset_generator,
                                         args=(IMAGE_SIZE, IMAGE_DATASET_256_DIR),
                                         output_signature=(tf.TensorSpec(shape=IMAGE_SIZE + (1,), dtype=tf.float32, name=None),
                                                           tf.TensorSpec(shape=IMAGE_SIZE + (2,), dtype=tf.float32, name=None)))


In [39]:
dataset.save(OUTPUT_DATASET_DIR_256)