In [1]:
!mkdir data/driving_coco
!mkdir data/driving_coco/images
!mkdir data/driving_coco/images/train
!mkdir data/driving_coco/images/val
!mkdir data/driving_coco/labels
!mkdir data/driving_coco/labels/train
!mkdir data/driving_coco/labels/val

In [2]:
import os
from glob import glob

from tqdm import tqdm

In [3]:
folder_name_list = ['indian_driving_coco', 'self_driving_cars_coco']
set_type_list = ['train', 'val']
data_type_list = ['images', 'labels']

In [4]:
def get_class_idx_list(class_idx_list, path):
    with open(path, 'r') as file:
        lines = file.readlines()
        for line in lines:
            class_idx = line.split()[0]
            class_idx_list.append(int(class_idx))
    return list(set(class_idx_list))

In [5]:
def update_class_idx(max_class_idx, from_path, to_path):
    with open(from_path, 'r') as file:
        lines = file.readlines()
        new_lines = []
        for line in lines:
            line_info = line.split()
            class_idx = int(line_info[0])
            x, y, w, h = line_info[1], line_info[2], line_info[3], line_info[4]
            new_lines.append(f'{class_idx + max_class_idx} {x} {y} {w} {h}')

    with open(to_path, 'w') as file:
        file.write('\n'.join(new_lines))

In [6]:
indian_driving_coco_class_idx_list = []
for folder_name in folder_name_list:
    for data_type in data_type_list:
        ext = '.txt' if data_type == 'labels' else '.jpg'
        for set_type in set_type_list:
            glob_path = f'./data/{folder_name}/{data_type}/{set_type}/*{ext}'
            print('Running....', glob_path)
            _paths = glob(glob_path)
            for from_path in tqdm(_paths):
                to_path = from_path.replace(folder_name, 'driving_coco')

                # Change class_idx in labels while combining two datasets
                if folder_name == 'indian_driving_coco' and data_type == 'labels':
                    indian_driving_coco_class_idx_list = get_class_idx_list(indian_driving_coco_class_idx_list, from_path)
                if folder_name == 'self_driving_cars_coco' and data_type == 'labels':
                    max_class_idx = max(indian_driving_coco_class_idx_list)
                    update_class_idx(max_class_idx, from_path, to_path)
                else:
                    os.system(f'cp {from_path} {to_path}')

Running.... ./data/indian_driving_coco/images/train/*.jpg


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9451/9451 [00:29<00:00, 320.21it/s]


Running.... ./data/indian_driving_coco/images/val/*.jpg


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5392/5392 [00:17<00:00, 312.24it/s]


Running.... ./data/indian_driving_coco/labels/train/*.txt


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9451/9451 [00:22<00:00, 421.62it/s]


Running.... ./data/indian_driving_coco/labels/val/*.txt


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5392/5392 [00:13<00:00, 409.03it/s]


Running.... ./data/self_driving_cars_coco/images/train/*.jpg


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 18000/18000 [00:43<00:00, 412.99it/s]


Running.... ./data/self_driving_cars_coco/images/val/*.jpg


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4241/4241 [00:09<00:00, 425.25it/s]


Running.... ./data/self_driving_cars_coco/labels/train/*.txt


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 18000/18000 [00:03<00:00, 4869.92it/s]


Running.... ./data/self_driving_cars_coco/labels/val/*.txt


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4241/4241 [00:00<00:00, 4340.97it/s]


In [7]:
def create_txt_path_file(root_path, dataset_name):
    file_path_list = glob(os.path.join(root_path, dataset_name, '*'))
    file_path_list = [path.replace('data/driving_coco', '.') + '\n' for path in file_path_list]
    with open(f'data/driving_coco/{dataset_name}.txt', 'w') as file:
        file.writelines(file_path_list)

In [8]:
create_txt_path_file('data/driving_coco/images/', 'train')

In [9]:
create_txt_path_file('data/driving_coco/images/', 'val')