This notebook takes multiple directory of training data and merges it into one dataset. It was created specifically for datasets genereated with the CARLA simulator.


```
📦MonoDTR_root/data/custom  
 ┣ 📂merged                 
 ┃ ┣ 📂training             
 ┃ ┃ ┣ 📂calib              
 ┃ ┃ ┣ 📂image_2            
 ┃ ┃ ┣ 📂label_2            
 ┃ ┃ ┗ 📂velodyne           
 ┃ ┣ 📜train.txt            
 ┃ ┗ 📜val.txt              
 ┣ 📂vehicle.tesla.model3_1 
 ┃ ┗ 📂kitti_object         
 ┃ ┃ ┣ 📂ImageSets          
 ┃ ┃ ┃ ┣ 📜train.txt        
 ┃ ┃ ┃ ┗ 📜val.txt          
 ┃ ┃ ┗ 📂training           
 ┃ ┃ ┃ ┣ 📂calib            
 ┃ ┃ ┃ ┣ 📂image_2          
 ┃ ┃ ┃ ┣ 📂label_2          
 ┃ ┃ ┃ ┗ 📂velodyne         
 ┣ 📂vehicle.tesla.model3_7 
 ┃ ┗ 📂kitti_object         
 ┃ ┃ ┣ 📂ImageSets          
 ┃ ┃ ┃ ┣ 📜train.txt        
 ┃ ┃ ┃ ┗ 📜val.txt          
 ┃ ┃ ┗ 📂training           
 ┃ ┃ ┃ ┣ 📂calib            
 ┃ ┃ ┃ ┣ 📂image_2          
 ┃ ┃ ┃ ┣ 📂label_2          
 ┃ ┃ ┃ ┗ 📂velodyne         
 ┣ 📂vehicle.tesla.model3_9 
 ┃ ┗ 📂kitti_object         
 ┃ ┃ ┣ 📂ImageSets          
 ┃ ┃ ┃ ┣ 📜train.txt        
 ┃ ┃ ┃ ┗ 📜val.txt          
 ┃ ┃ ┗ 📂training           
 ┃ ┃ ┃ ┣ 📂calib            
 ┃ ┃ ┃ ┣ 📂image_2          
 ┃ ┃ ┃ ┣ 📂label_2          
 ┃ ┃ ┃ ┗ 📂velodyne         
 ┗ 📜copier.ipynb           
``` 


In [1]:
import os
import shutil
import random

new_index = 0
split_rate = 0.8
base_folder = "."
subfolders = ["calib", "image_2", "label_2", "velodyne"]

In [2]:
result_base_folder = os.path.join(base_folder, "merged")
result_folder = os.path.join(result_base_folder, "training")
os.makedirs(result_folder, exist_ok=True)

for name in subfolders:
    tmp_folder = os.path.join(result_folder, name)
    os.makedirs(tmp_folder, exist_ok=True)

In [3]:
def check_subfolder_consistency(vehicle_path):
    first_folder = os.path.join(vehicle_path, subfolders[0])

    files = os.listdir(first_folder)
    file_count = len(files)

    consistent = True

    for name in subfolders:
        folder = os.path.join(vehicle_path, name)
        files = os.listdir(folder)
        tmp_file_count = len(files)

        if(file_count != tmp_file_count):
            consistent = False

    return consistent

def copy_subfolder(folder_path, copy_index, result_path):
    files = os.listdir(folder_path)
    files.sort()
    for i, file_name in enumerate(files):
        base, ext = os.path.splitext(file_name)
        new_name = f"{copy_index:06d}{ext}"

        src_path = os.path.join(folder_path, file_name)
        dst_path = os.path.join(result_path, new_name)
        #print(src_path)
        #print(dst_path)

        shutil.copyfile(src_path, dst_path)

        copy_index += 1

    

    return copy_index

In [4]:
folder_size = new_index
for vehicle_folder in os.listdir(base_folder):
    if vehicle_folder.startswith("vehicle"):
        print(new_index)
        vehicle_path = os.path.join(base_folder, vehicle_folder, "kitti_object", "training")

        if (check_subfolder_consistency(vehicle_path)):
            for name in subfolders:
                print(new_index)
                folder_size = copy_subfolder(os.path.join(vehicle_path, name), new_index, os.path.join(result_folder, name))
        
        print()
        print(folder_size)

        new_index = folder_size
        print()
        print()
        print("--------------------------------------------------")
        print()
        print()

0
0


0
0
0

274


--------------------------------------------------


274
274
274
274
274

548


--------------------------------------------------


548
548
548
548
548

641


--------------------------------------------------


641
641
641
641
641

915


--------------------------------------------------


915
915
915
915
915

1008


--------------------------------------------------


1008
1008
1008
1008
1008

1282


--------------------------------------------------


1282

1282


--------------------------------------------------


1282
1282
1282
1282
1282

1556


--------------------------------------------------


1556
1556
1556
1556
1556

1830


--------------------------------------------------


1830
1830
1830
1830
1830

2104


--------------------------------------------------


2104
2104
2104
2104
2104

2378


--------------------------------------------------


2378
2378
2378
2378
2378

2652


--------------------------------------------------


2652

2652


------------------

In [5]:
def generate_random_order(start, end):
    numbers = list(range(start, end))
    random.shuffle(numbers)
    return numbers

def write_to_file(filename, numbers):
    with open(filename, 'w') as f:
        for number in numbers:
            f.write(f"{number:06d}\n")



szamok = generate_random_order(0, new_index)

train_szamok = szamok[:int(split_rate * len(szamok))]
val_szamok = szamok[int(split_rate * len(szamok)):]

write_to_file(os.path.join(result_base_folder, "train.txt"), train_szamok)
write_to_file(os.path.join(result_base_folder, "val.txt"), val_szamok)
