# Combining Roboflow Datasets to Train BigYOLO

In [1]:
import sys
sys.path.append('../src')
from data_utils import download_images_and_labels
import os
import shutil

## Download Images from Bucket

In [None]:
# Downloading the three additional datasets
bucket_name = 'tree-counting-project'

prefix_219 = 'Dataset 219-27-27/' 
local_dir_219 = '../Data/Dataset 219-27-27'

prefix_64 = 'Dataset 64-18-9/' 
local_dir_64 = '../Data/Dataset 64-18-9'

prefix_97 = 'Dataset 97-14-14/' 
local_dir_97 = '../Data/Dataset 97-14-14'

# Download images and labels (RUN ONCE)
download_images_and_labels(bucket_name, prefix_219, local_dir_219)
download_images_and_labels(bucket_name, prefix_64, local_dir_64)
download_images_and_labels(bucket_name, prefix_97, local_dir_97)

## Create Directory Structure

In [9]:
datasets = [
    '../Data/Dataset 64-18-9',
    '../Data/Dataset 97-14-14',
    '../Data/Dataset 219-27-27',
    '../Data/Dataset 348-17-15'
]

# Combined dataset path
combined_dataset_path = '../Data/Combined Dataset'

In [6]:
os.makedirs(os.path.join(combined_dataset_path, 'train/images'), exist_ok=True)
os.makedirs(os.path.join(combined_dataset_path, 'train/labels'), exist_ok=True)
os.makedirs(os.path.join(combined_dataset_path, 'valid/images'), exist_ok=True)
os.makedirs(os.path.join(combined_dataset_path, 'valid/labels'), exist_ok=True)
os.makedirs(os.path.join(combined_dataset_path, 'test/images'), exist_ok=True)
os.makedirs(os.path.join(combined_dataset_path, 'test/labels'), exist_ok=True)

# Copying Train, Validation and Test Data

In [14]:
def copy_files(src_folder, dst_folder, subfolder):
    src_images = os.path.join(src_folder, subfolder, 'images')
    src_labels = os.path.join(src_folder, subfolder, 'labels')
    dst_images = os.path.join(dst_folder, subfolder, 'images')
    dst_labels = os.path.join(dst_folder, subfolder, 'labels')

    for file_name in os.listdir(src_images):
        src_file = os.path.join(src_images, file_name)
        if os.path.isfile(src_file):
            shutil.copy(src_file, dst_images)
    
    for file_name in os.listdir(src_labels):
        src_file = os.path.join(src_labels, file_name)
        if os.path.isfile(src_file):
            shutil.copy(src_file, dst_labels)

In [13]:
for dataset in datasets:
    copy_files(dataset, combined_dataset_path, 'train')
    copy_files(dataset, combined_dataset_path, 'valid')
    copy_files(dataset, combined_dataset_path, 'test')

In [19]:
expected_test_images = 65
actual_test_images = len([f for f in os.listdir(os.path.join(combined_dataset_path, 'valid/images')) if os.path.isfile(os.path.join(combined_dataset_path, 'valid/images', f))])
actual_test_images

76

# Combining Roboflow Datasets to Train RetinaNet

In [12]:
xml_datasets = [
    '../Data/Datasets XML/Dataset 64-18-9',
    '../Data/Datasets XML/Dataset 97-14-14',
    '../Data/Datasets XML/Dataset 219-27-27',
    '../Data/Datasets XML/Dataset 348-17-15'
]

# Combined dataset path
combined_dataset_path = '../Data/Combined Dataset XML'

In [3]:
def copy_files(src_folder, dest_folder):
    for item in os.listdir(src_folder):
        src_path = os.path.join(src_folder, item)
        dest_path = os.path.join(dest_folder, item)
        if os.path.isdir(src_path):
            shutil.copytree(src_path, dest_path, dirs_exist_ok=True)
        else:
            shutil.copy2(src_path, dest_path)


In [16]:
for dataset in xml_datasets:
    copy_files(os.path.join(dataset, 'train'), os.path.join(combined_dataset_path, 'train'))
    copy_files(os.path.join(dataset, 'test'), os.path.join(combined_dataset_path, 'test'))
    copy_files(os.path.join(dataset, 'valid'), os.path.join(combined_dataset_path, 'valid'))



In [18]:
actual_test_images = len([f for f in os.listdir(os.path.join(combined_dataset_path, 'test')) if os.path.isfile(os.path.join(combined_dataset_path, 'test', f))])
actual_test_images

152

In [2]:
xml_datasets = [
    '../Data/Datasets XML/Dataset 64-18-9',
    '../Data/Datasets XML/Dataset 97-14-14',
    '../Data/Datasets XML/Dataset 219-27-27']

# Combined dataset path
combined_dataset_path = '../Data/Combined Dataset XML No Aug'

In [5]:
for dataset in xml_datasets:
    copy_files(os.path.join(dataset, 'train'), os.path.join(combined_dataset_path, 'train'))
    copy_files(os.path.join(dataset, 'test'), os.path.join(combined_dataset_path, 'test'))
    copy_files(os.path.join(dataset, 'valid'), os.path.join(combined_dataset_path, 'valid'))

In [6]:
actual_test_images = len([f for f in os.listdir(os.path.join(combined_dataset_path, 'test')) if os.path.isfile(os.path.join(combined_dataset_path, 'test', f))])
actual_test_images

100