# Creating RGB Dataset from RGB-D dataset directory for Transfer Learning

Collects RGB images from RGB-D dataset into separate directory with subdirectories denoting label. Used to build tf dataset for transfer learning

## Create RGB dataset directory

In [3]:
import os
import pathlib
import shutil

In [4]:
rgbd_data_path = 'full_data/extracted/rgbd-dataset'
rgb_out_path = 'RGB_dataset'

In [80]:
non_rgb = ['mask', 'depth']
def is_rgb_im(file):
    is_img = file.suffix == '.png'
    is_rgb = not any([x in file.name for x in non_rgb])

    return is_img and is_rgb

In [66]:
def get_files_in_dir(path):
    files = []
    for entry in path.iterdir():
        if entry.is_file(): 
            files.append(entry)
        elif entry.is_dir(): 
            files += get_files_in_dir(entry)
    
    return files

In [90]:
for label in labels:

    # make directory for label if one doesn't exist
    if not os.path.isdir(f'{rgb_out_path}/{label}'): 
        os.mkdir(f'{rgb_out_path}/{label}')
    
    # find all rgb image files for this label in the RGB-D dataset directory
    path = pathlib.Path(f'{rgbd_data_path}/{label}')
    rgb_imgs = [file for file in get_files_in_dir(path) if is_rgb_im(file)]

    # copy rgb image files to RGB dataset directory under their label subdirectory
    for rgb_img in rgb_imgs:
        shutil.copy(rgb_img, f'{rgb_out_path}/{label}')

## Create tensorflow dataset

In [1]:
import tensorflow as tf

In [5]:
IM_SIZE = (640, 480)
rgb_dataset = tf.keras.preprocessing.image_dataset_from_directory(rgb_out_path, labels='inferred', label_mode='categorical',                                                                                image_size=IM_SIZE, batch_size=32, shuffle=True)

Found 5924 files belonging to 2 classes.


In [31]:
rgb_dataset.class_names

['apple', 'banana']

In [32]:
rgb_dataset.element_spec

(TensorSpec(shape=(None, 640, 480, 3), dtype=tf.float32, name=None),
 TensorSpec(shape=(None, 2), dtype=tf.float32, name=None))