In [14]:
import os
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np

In [39]:
PARENT_PATH='./Assets/Datasets/*/'

TRAINING_PATH=PARENT_PATH + 'Train/*/*'
VALIDATION_PATH=PARENT_PATH + 'Valid/*/*'
TEST_PATH=PARENT_PATH + 'Test/*'

In [40]:
images_path_tf_data_train = tf.data.Dataset.list_files(TRAINING_PATH, shuffle=False)
images_path_tf_data_valid = tf.data.Dataset.list_files(VALIDATION_PATH, shuffle=False)
images_path_tf_data_test = tf.data.Dataset.list_files(TEST_PATH, shuffle=False)

In [46]:
print(f'data train: {images_path_tf_data_train}')
print(f'data valid: {images_path_tf_data_valid}')
print(f'data test: {images_path_tf_data_test}')

data train: <_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>
data valid: <_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>
data test: <_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>


In [48]:
print(f'number of data train: {images_path_tf_data_train.cardinality()}')
print(f'number of data valid: {images_path_tf_data_valid.cardinality()}')
print(f'number of data test: {images_path_tf_data_test.cardinality()}')

number of data train: 4589
number of data valid: 1350
number of data test: 243


In [192]:
def format_file_size(size, unit_file_size='bytes'):
    """Format file size to the specified unit."""
    units = ['bytes', 'kb', 'mb', 'gb']
    if unit_file_size.lower() not in units:
        raise ValueError(f"Invalid unit. Choose from {units}.")
    
    if unit_file_size.lower() == 'kb':
        size /= 1024
    elif unit_file_size.lower() == 'mb':
        size /= 1024 ** 2
    elif unit_file_size.lower() == 'gb':
        size /= 1024 ** 3
    
    return f'{size:.4f}' if unit_file_size.lower() != 'bytes' else size

# ==================================================== DATA TRAIN ====================================================
def show_files_path_info(files_path_data, kind_data, is_random=False, unit_file_size='bytes'):
    
    idx = np.random.randint(len(files_path_data)) if is_random else 1

    for file_path in files_path_data.skip(idx).take(1):
        print('=' * 60)
        print(' PATH INFO '.center(60, '='))
        print('=' * 60)
        print(f'File Path: {file_path}')
        print()
        
        print('=' * 60)
        print(' SPLIT FILE PATH '.center(60, '='))
        print('=' * 60)
        split_file_path = tf.strings.split(file_path, os.path.sep)
        print(f'Split File Path: {split_file_path}')
        print()
        
        print('=' * 60)
        print(' INDEXED PATH '.center(60, '='))
        print('=' * 60)
        result = {value: f'Index -> {index}' for index, value in enumerate(split_file_path.numpy())}
        for key, value in result.items():
            print(f'{value}: {key}')
        print()

        print('=' * 60)
        print(f' KIND DATA INDEX: {kind_data} '.center(60, '='))
        print('=' * 60)
        index = tf.where(tf.equal(split_file_path, kind_data))[0][0]
        print(f'Index of "{kind_data}": {index}')
        print()

        print('=' * 60)
        print(' INDEX LABEL '.center(60, '='))
        print('=' * 60)
        index_label = index + 1
        print(f'Index Label: {index_label}')
        print()

        print('=' * 60)
        print(' LABEL '.center(60, '='))
        print('=' * 60)
        print(f'Label: {split_file_path[index_label]}')
        print()

        print('=' * 60)
        print(' FILE NAME '.center(60, '='))
        print('=' * 60)
        file_name = split_file_path[-1].numpy().decode('utf-8')
        print(f'File Name: {file_name}')
        print()

        print('=' * 60)
        print(' FILE EXTENSION '.center(60, '='))
        print('=' * 60)
        file_extension = os.path.splitext(file_name)[1]
        print(f'File Extension: {file_extension}')
        print()

        print('=' * 60)
        print(' FILE SIZE '.center(60, '='))
        print('=' * 60)
        file_size = os.path.getsize(file_path.numpy().decode('utf-8'))
        file_size = format_file_size(file_size, unit_file_size=unit_file_size)
        print(f'File Size: {file_size} {unit_file_size}')
        print()

# ==================================================== DATA TEST ====================================================
def show_test_files_path_info(files_path_data, is_random=False, unit_file_size='bytes'):
    idx = np.random.randint(len(files_path_data)) if is_random else 1

    for file_path in files_path_data.skip(idx).take(1):
        print('=' * 60)
        print(' PATH INFO '.center(60, '='))
        print('=' * 60)
        print(f'File Path: {file_path}')
        print()
        
        print('=' * 60)
        print(' SPLIT FILE PATH '.center(60, '='))
        print('=' * 60)
        split_file_path = tf.strings.split(file_path, os.path.sep)
        print(f'Split File Path: {split_file_path}')
        print()
        
        print('=' * 60)
        print(' INDEXED PATH '.center(60, '='))
        print('=' * 60)
        result = {value: f'Index -> {index}' for index, value in enumerate(split_file_path.numpy())}
        for key, value in result.items():
            print(f'{value}: {key}')
        print()

        print('=' * 60)
        print(' FILE NAME '.center(60, '='))
        print('=' * 60)
        file_name = split_file_path[-1].numpy().decode('utf-8')
        print(f'File Name: {file_name}')
        print()

        print('=' * 60)
        print(' FILE EXTENSION '.center(60, '='))
        print('=' * 60)
        file_extension = os.path.splitext(file_name)[1]
        print(f'File Extension: {file_extension}')
        print()

        print('=' * 60)
        print(' FILE SIZE '.center(60, '='))
        print('=' * 60)
        file_size = os.path.getsize(file_path.numpy().decode('utf-8'))
        file_size = format_file_size(file_size, unit_file_size=unit_file_size)
        print(f'File Size: {file_size} {unit_file_size}')
        print()

In [193]:
show_files_path_info(images_path_tf_data_valid, kind_data='Valid', is_random=True, unit_file_size='KB')

File Path: b'.\\Assets\\Datasets\\SIBI dataset\\Valid\\J\\image_J_(1685190486.500457).jpg'

Split File Path: [b'.' b'Assets' b'Datasets' b'SIBI dataset' b'Valid' b'J'
 b'image_J_(1685190486.500457).jpg']

Index -> 0: b'.'
Index -> 1: b'Assets'
Index -> 2: b'Datasets'
Index -> 3: b'SIBI dataset'
Index -> 4: b'Valid'
Index -> 5: b'J'
Index -> 6: b'image_J_(1685190486.500457).jpg'

Index of "Valid": 4

Index Label: 5

Label: b'J'

File Name: image_J_(1685190486.500457).jpg

File Extension: .jpg

File Size: 8.9248 KB



In [200]:
show_test_files_path_info(images_path_tf_data_test, is_random=True, unit_file_size='KB')

File Path: b'.\\Assets\\Datasets\\SIBI dataset\\Test\\image_B_(1685775440.4310818).jpg'

Split File Path: [b'.' b'Assets' b'Datasets' b'SIBI dataset' b'Test'
 b'image_B_(1685775440.4310818).jpg']

Index -> 0: b'.'
Index -> 1: b'Assets'
Index -> 2: b'Datasets'
Index -> 3: b'SIBI dataset'
Index -> 4: b'Test'
Index -> 5: b'image_B_(1685775440.4310818).jpg'

File Name: image_B_(1685775440.4310818).jpg

File Extension: .jpg

File Size: 15.0410 KB



In [None]:
def convert_path_to_img_tf_data(image_path, label_idx_from_path, target_size, is_gray=True):
    split_img_path = tf.strings.split(image_path, os.path.sep)
    label = split_img_path[label_idx_from_path]

    channels = 1 if is_gray else 3 

    image = tf.io.read_file(image_path)
    image = tf.image.decode_image(image, channels=channels) 
    image.set_shape([None, None, channels])
    image = tf.image.resize(image, size=(target_size[0], target_size[1]))
    image = tf.cast(image, tf.uint8)

    return image, label

In [None]:
new_height=224
new_width=224
label_idx_from_path=5

images_tf_data = images_path_tf_data.map(
    map_func=lambda image_path: 
        convert_path_to_img_tf_data(
            image_path=image_path, 
            label_idx_from_path=label_idx_from_path, 
            target_size=(new_height, new_width),
            is_gray=False
        ),
    num_parallel_calls=tf.data.AUTOTUNE
)

In [None]:
print(f'info train data: {images_tf_data}')
print(f'number of train data: {len(images_tf_data)}')

In [None]:
for image, label in images_tf_data.skip(200).take(1):
    print(f"{'Check Train Data'.center(61, '=')}")
    print(f'''    shape-image: {image.shape}
    dtype-image: {image.dtype}
    max-intensity: {tf.reduce_max(image)}
    min-intensity: {tf.reduce_min(image)}

    label: {label}
    label-shape: {label.shape}
    image-type: {label.dtype}'''
    )

    plt.imshow(image, cmap='gray')
plt.show()