In [1]:
import os
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import shutil

### Move images and annotations to DOTA
To use the yolo v8 model by ultralytics the data has to be in a certain folder structure following the DOTA dataset.
```bash
├── DOTA
│   ├── images
│   │   ├── train
│   │   ├── val
│   │   ├── test
│   ├── labels
│   │   ├── train
│   │   ├── val
│   │   ├── test
```

### 1. Setting the source and destination folders


In [2]:
base_path = "B:\\multispectral-ped-detection\\data\\RAW\\kaist-cvpr15\\"
day_visible_image_path = f'{base_path}images_renamed\\day\\visible'
day_blended_image_path = f'{base_path}images_renamed\\day\\blended'
night_visible_image_path = f'{base_path}images_renamed\\night\\visible'
night_blended_image_path = f'{base_path}images_renamed\\night\\blended'

day_visible_images = os.listdir(day_visible_image_path)
day_blended_images = os.listdir(day_blended_image_path)
night_visible_images = os.listdir(night_visible_image_path)
night_blended_images = os.listdir(night_blended_image_path)

day_annotations_folder = "B:\\multispectral-ped-detection\\data\\RAW\\kaist-cvpr15\\annotations-yolo\\day\\labels\\day"
night_annotations_folder = "B:\\multispectral-ped-detection\\data\\RAW\\kaist-cvpr15\\annotations-yolo\\night\\labels\\night"

In [3]:
visible_dota_root_folder = "B:\\multispectral-ped-detection\\data\\DOTA_CONTROL"

visible_dota_images_folder = os.path.join(visible_dota_root_folder, "images")
visible_dota_annotations_folder = os.path.join(visible_dota_root_folder, "labels")

visible_destination_images_train = os.path.join(visible_dota_images_folder, "train")
visible_destination_images_val = os.path.join(visible_dota_images_folder, "val")
visible_destination_images_test = os.path.join(visible_dota_images_folder, "test")

visible_destination_annotations_train = os.path.join(visible_dota_annotations_folder, "train")
visible_destination_annotations_val = os.path.join(visible_dota_annotations_folder, "val")
visible_destination_annotations_test = os.path.join(visible_dota_annotations_folder, "test")

blended_dota_root_folder = "B:\\multispectral-ped-detection\\data\\DOTA_BLENDED"

blended_dota_images_folder = os.path.join(blended_dota_root_folder, "images")
blended_dota_annotations_folder = os.path.join(blended_dota_root_folder, "labels")

blended_destination_images_train = os.path.join(blended_dota_images_folder, "train")
blended_destination_images_val = os.path.join(blended_dota_images_folder, "val")
blended_destination_images_test = os.path.join(blended_dota_images_folder, "test")

blended_destination_annotations_train = os.path.join(blended_dota_annotations_folder, "train")
blended_destination_annotations_val = os.path.join(blended_dota_annotations_folder, "val")
blended_destination_annotations_test = os.path.join(blended_dota_annotations_folder, "test")

In [4]:
day_annotations = [os.path.join(day_annotations_folder, annotation_name) for annotation_name in os.listdir(day_annotations_folder)]
night_annotations = [os.path.join(night_annotations_folder, annotation_name) for annotation_name in os.listdir(night_annotations_folder)]

day_annotation_file_names = [annotation[:annotation.rfind('.')] for annotation in os.listdir(day_annotations_folder)]
night_annotation_file_names = [annotation[:annotation.rfind('.')] for annotation in os.listdir(night_annotations_folder)]

### 2. Splitting the day and night dataset
First split the dataset into 80% training and 20% testing.
Then split the training set into 80% training and 20% validation.

In [5]:
train_day_annotation_file_names, test_day_annotation_file_names = train_test_split(day_annotation_file_names, test_size=0.2,random_state=42)
train_night_annotation_file_names, test_night_annotation_file_names = train_test_split(night_annotation_file_names, test_size=0.2,random_state=42)

train_day_annotation_file_names, val_day_annotation_file_names = train_test_split(train_day_annotation_file_names, test_size=0.2, random_state=42)
train_night_annotation_file_names, val_night_annotation_file_names = train_test_split(train_night_annotation_file_names, test_size=0.2, random_state=42)

### 3. Merge day and night sets into full training/validation/test

In [6]:
visible_train_images = [os.path.join(day_visible_image_path, image_name) for image_name in day_visible_images if image_name[:image_name.rfind('.')] in train_day_annotation_file_names] + [os.path.join(night_visible_image_path, image_name) for image_name in night_visible_images if image_name[:image_name.rfind('.')] in train_night_annotation_file_names]
visible_val_images =  [os.path.join(day_visible_image_path, image_name) for image_name in day_visible_images if image_name[:image_name.rfind('.')] in val_day_annotation_file_names] + [os.path.join(night_visible_image_path, image_name) for image_name in night_visible_images if image_name[:image_name.rfind('.')] in val_night_annotation_file_names]
visible_test_images = [os.path.join(day_visible_image_path, image_name) for image_name in day_visible_images if image_name[:image_name.rfind('.')] in test_day_annotation_file_names] + [os.path.join(night_visible_image_path, image_name) for image_name in night_visible_images if image_name[:image_name.rfind('.')] in test_night_annotation_file_names]

blended_train_images = [os.path.join(day_blended_image_path, image_name) for image_name in day_blended_images if image_name[:image_name.rfind('.')] in train_day_annotation_file_names] + [os.path.join(night_blended_image_path, image_name) for image_name in night_blended_images if image_name[:image_name.rfind('.')] in train_night_annotation_file_names]
blended_val_images =  [os.path.join(day_blended_image_path, image_name) for image_name in day_blended_images if image_name[:image_name.rfind('.')] in val_day_annotation_file_names] + [os.path.join(night_blended_image_path, image_name) for image_name in night_blended_images if image_name[:image_name.rfind('.')] in val_night_annotation_file_names]
blended_test_images = [os.path.join(day_blended_image_path, image_name) for image_name in day_blended_images if image_name[:image_name.rfind('.')] in test_day_annotation_file_names] + [os.path.join(night_blended_image_path, image_name) for image_name in night_blended_images if image_name[:image_name.rfind('.')] in test_night_annotation_file_names]

train_annotations = [os.path.join(day_annotations_folder, f'{annotation_name}.txt') for annotation_name in train_day_annotation_file_names] + [os.path.join(night_annotations_folder, f'{annotation_name}.txt') for annotation_name in train_night_annotation_file_names]
val_annotations = [os.path.join(day_annotations_folder, f'{annotation_name}.txt') for annotation_name in val_day_annotation_file_names] + [os.path.join(night_annotations_folder, f'{annotation_name}.txt') for annotation_name in val_night_annotation_file_names]
test_annotations = [os.path.join(day_annotations_folder, f'{annotation_name}.txt') for annotation_name in test_day_annotation_file_names] + [os.path.join(night_annotations_folder, f'{annotation_name}.txt') for annotation_name in test_night_annotation_file_names]

In [7]:
print(f'Number of visible train images: {len(visible_train_images)}, annotations: {len(train_annotations)}')
print(f'Number of visible val images: {len(visible_val_images)}, annotations: {len(val_annotations)}')
print(f'Number of visible test images: {len(visible_test_images)}, annotations: {len(test_annotations)}')

print(f'Number of blended train images: {len(blended_train_images)}, annotations: {len(train_annotations)}')
print(f'Number of blended val images: {len(blended_val_images)}, annotations: {len(val_annotations)}')
print(f'Number of blended test images: {len(blended_test_images)}, annotations: {len(test_annotations)}')

Number of visible train images: 24984, annotations: 24984
Number of visible val images: 6247, annotations: 6247
Number of visible test images: 7808, annotations: 7808
Number of blended train images: 24984, annotations: 24984
Number of blended val images: 6247, annotations: 6247
Number of blended test images: 7808, annotations: 7808


### 4. Copy the images and labels to their correct folder

In [8]:
for train_image in tqdm(visible_train_images):
    file_name = os.path.basename(train_image)
    destination = os.path.join(visible_destination_images_train, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(train_image, destination)

for val_image in tqdm(visible_val_images):
    file_name = os.path.basename(val_image)
    destination = os.path.join(visible_destination_images_val, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(val_image, destination)

for test_image in tqdm(visible_test_images):
    file_name = os.path.basename(test_image)
    destination = os.path.join(visible_destination_images_test, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(test_image, destination)

for train_annotation in tqdm(train_annotations):
    file_name = os.path.basename(train_annotation)
    destination = os.path.join(visible_destination_annotations_train, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(train_annotation, destination)

for val_annotation in tqdm(val_annotations):
    file_name = os.path.basename(val_annotation)
    destination = os.path.join(visible_destination_annotations_val, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(val_annotation, destination)

for test_annotation in tqdm(test_annotations):
    file_name = os.path.basename(test_annotation)
    destination = os.path.join(visible_destination_annotations_test, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(test_annotation, destination)

100%|██████████| 24984/24984 [00:01<00:00, 12669.39it/s]
100%|██████████| 6247/6247 [00:00<00:00, 13376.87it/s]
100%|██████████| 7808/7808 [00:00<00:00, 14196.40it/s]
100%|██████████| 24984/24984 [00:01<00:00, 13957.08it/s]
100%|██████████| 6247/6247 [00:00<00:00, 16057.80it/s]
100%|██████████| 7808/7808 [00:00<00:00, 18074.13it/s]


In [9]:
for train_image in tqdm(blended_train_images):
    file_name = os.path.basename(train_image)
    destination = os.path.join(blended_destination_images_train, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(train_image, destination)

for val_image in tqdm(blended_val_images):
    file_name = os.path.basename(val_image)
    destination = os.path.join(blended_destination_images_val, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(val_image, destination)

for test_image in tqdm(blended_test_images):
    file_name = os.path.basename(test_image)
    destination = os.path.join(blended_destination_images_test, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(test_image, destination)

for train_annotation in tqdm(train_annotations):
    file_name = os.path.basename(train_annotation)
    destination = os.path.join(blended_destination_annotations_train, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(train_annotation, destination)

for val_annotation in tqdm(val_annotations):
    file_name = os.path.basename(val_annotation)
    destination = os.path.join(blended_destination_annotations_val, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(val_annotation, destination)

for test_annotation in tqdm(test_annotations):
    file_name = os.path.basename(test_annotation)
    destination = os.path.join(blended_destination_annotations_test, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(test_annotation, destination)

  0%|          | 0/24984 [00:00<?, ?it/s]

100%|██████████| 24984/24984 [04:00<00:00, 103.94it/s]
100%|██████████| 6247/6247 [05:23<00:00, 19.30it/s]
100%|██████████| 7808/7808 [06:38<00:00, 19.57it/s]
100%|██████████| 24984/24984 [24:12<00:00, 17.20it/s]  
100%|██████████| 6247/6247 [04:35<00:00, 22.64it/s]
100%|██████████| 7808/7808 [05:48<00:00, 22.39it/s]
