# Creating RGB Dataset from RGB-D dataset directory for Transfer Learning

Collects RGB images from RGB-D dataset into separate directory with subdirectories denoting label. Used to build tf dataset for transfer learning

## Create RGB dataset directory

In [1]:
import os
import pathlib

import utils

from tqdm import tqdm

import cv2 as cv
import matplotlib.pyplot as plt

In [2]:
rgbd_data_path = 'C:/Users/awnya/Documents/Projects/RGBD Object Classification/full_data/extracted/rgbd-dataset'
rgb_out_path = 'C:/Users/awnya/Documents/Projects/RGBD Object Classification/RGB_dataset'

In [3]:
non_rgb = ['mask', 'depth']
def is_rgb_im(file):
    is_img = file.suffix == '.png'
    is_rgb = not any([x in file.name for x in non_rgb])

    return is_img and is_rgb

In [4]:
def get_files_in_dir(path):
    '''gets list of all files in a directory given by a pathlib Path. (recursive)'''
    files = []
    for entry in path.iterdir():
        if entry.is_file(): 
            files.append(entry)
        elif entry.is_dir(): 
            files += get_files_in_dir(entry)
    
    return files

In [5]:
label_path = {dir_.name: dir_ for dir_ in pathlib.Path(rgbd_data_path).iterdir() if dir_.is_dir()}
label_path

{'apple': WindowsPath('C:/Users/awnya/Documents/Projects/RGBD Object Classification/full_data/extracted/rgbd-dataset/apple'),
 'ball': WindowsPath('C:/Users/awnya/Documents/Projects/RGBD Object Classification/full_data/extracted/rgbd-dataset/ball'),
 'banana': WindowsPath('C:/Users/awnya/Documents/Projects/RGBD Object Classification/full_data/extracted/rgbd-dataset/banana'),
 'bell_pepper': WindowsPath('C:/Users/awnya/Documents/Projects/RGBD Object Classification/full_data/extracted/rgbd-dataset/bell_pepper'),
 'binder': WindowsPath('C:/Users/awnya/Documents/Projects/RGBD Object Classification/full_data/extracted/rgbd-dataset/binder'),
 'bowl': WindowsPath('C:/Users/awnya/Documents/Projects/RGBD Object Classification/full_data/extracted/rgbd-dataset/bowl'),
 'calculator': WindowsPath('C:/Users/awnya/Documents/Projects/RGBD Object Classification/full_data/extracted/rgbd-dataset/calculator'),
 'camera': WindowsPath('C:/Users/awnya/Documents/Projects/RGBD Object Classification/full_data/e

In [6]:
crop_size_y, crop_size_x = 200, 200
full_size_y, full_size_x, = (480, 640)
x = (full_size_x - crop_size_x)//2
y = (full_size_y - crop_size_y)//2

def process_image(bgr_image):
    # crop
    processed_img = bgr_image[y:y+crop_size_y, x:x+crop_size_x]

    return processed_img

In [7]:
for label, path in tqdm(label_path.items()):
    
    # partition one object for testing, and rest for training
    subdirs = [obj for obj in path.iterdir() if obj.is_dir()]
    train_dirs = subdirs[:-1]
    test_dir = subdirs[-1]

    # make train directory for label if one doesn't exist
    if not os.path.isdir(f'{rgb_out_path}/train/{label}'): 
        os.mkdir(f'{rgb_out_path}/train/{label}')
    
    # get training rgb image files for this label in the RGB-D dataset directory
    rgb_imgs_train = []
    for sub_dir in train_dirs:
        rgb_imgs_train += [file for file in get_files_in_dir(sub_dir) if is_rgb_im(file)]
    
    # process rgb image files and write to RGB dataset train directory under their label subdirectory
    for rgb_img in rgb_imgs_train:
        bgr_image = cv.imread(str(rgb_img), cv.IMREAD_UNCHANGED)
        processed_img = process_image(bgr_image)
        cv.imwrite(f'{rgb_out_path}/train/{label}/{rgb_img.name}', processed_img)
    
        rgb_imgs_train = []
    

    
    # make test directory for label if one doesn't exist
    if not os.path.isdir(f'{rgb_out_path}/test/{label}'): 
        os.mkdir(f'{rgb_out_path}/test/{label}')

    # get test rgb image files for this label in the RGB-D dataset directory
    rgb_imgs_test = [file for file in get_files_in_dir(test_dir) if is_rgb_im(file)]
    
    # process rgb image files and write to RGB dataset test directory under their label subdirectory
    for rgb_img in rgb_imgs_test:
        bgr_image = cv.imread(str(rgb_img), cv.IMREAD_UNCHANGED)
        processed_img = process_image(bgr_image)
        cv.imwrite(f'{rgb_out_path}/test/{label}/{rgb_img.name}', processed_img)

100%|██████████| 51/51 [1:04:59<00:00, 76.47s/it]
