In [1]:
import os
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import shutil

### Move images and annotations to DOTA
To use the yolo v8 model by ultralytics the data has to be in a certain folder structure following the DOTA dataset.
```bash
├── DOTA
│   ├── images
│   │   ├── train
│   │   ├── val
│   │   ├── test
│   ├── labels
│   │   ├── train
│   │   ├── val
│   │   ├── test
```

### 1. Setting the source and destination folders


In [2]:
day_images_folder = "data/sample_dataset/RAW/Set1_V000/V000/visible/"
night_images_folder ="data/sample_dataset/RAW/Set4_V000/V000/visible/"
day_annotations_folder = "data/sample_dataset/RAW/Set1_V000/V000/yolo_annotations/labels/day"
night_annotations_folder = "data/sample_dataset/RAW/Set4_V000/V000/yolo_annotations/labels/night"

dota_root_folder = os.path.join("data","sample_dataset","DOTA")
dota_images_folder = os.path.join(dota_root_folder, "images")
dota_annotations_folder = os.path.join(dota_root_folder, "labels")

destination_images_train = os.path.join(dota_images_folder, "train")
destination_images_val = os.path.join(dota_images_folder, "val")
destination_images_test = os.path.join(dota_images_folder, "test")

destination_annotations_train = os.path.join(dota_annotations_folder, "train")
destination_annotations_val = os.path.join(dota_annotations_folder, "val")
destination_annotations_test = os.path.join(dota_annotations_folder, "test")

In [3]:
day_annotation_paths = [os.path.join(day_annotations_folder, annotation_name) for annotation_name in os.listdir(day_annotations_folder)]
night_annotation_paths = [os.path.join(night_annotations_folder, annotation_name) for annotation_name in os.listdir(night_annotations_folder)]

day_annotation_file_names = [annotation[:annotation.rfind('.')] for annotation in os.listdir(day_annotations_folder)]
day_image_names = [image_name for image_name in os.listdir(day_images_folder) if image_name[:image_name.rfind('.')] in day_annotation_file_names]

night_annotation_file_names = [annotation[annotation.find("_")+1:annotation.rfind('.')] for annotation in os.listdir(night_annotations_folder)]
night_image_names = [image_name for image_name in os.listdir(night_images_folder) if image_name[:image_name.rfind(".")] in night_annotation_file_names]

day_image_paths = [os.path.join(day_images_folder, image_name) for image_name in day_image_names]
night_image_paths = [os.path.join(night_images_folder, image_name) for image_name in night_image_names]
new_night_image_path = {image_path: os.path.join(os.path.dirname(image_path),f'night_{os.path.basename(image_path)}') for image_path in night_image_paths}

In [4]:
len(new_night_image_path)

1871

### 2. Splitting the day and night dataset
First split the dataset into 80% training and 20% testing.
Then split the training set into 80% training and 20% validation.

In [5]:
day_train_images, day_test_images, day_train_annotations, day_test_annotations = train_test_split(day_image_paths, day_annotation_paths, test_size=0.2,random_state=42)
night_train_images, night_test_images, night_train_annotations, night_test_annotations = train_test_split(night_image_paths, night_annotation_paths, test_size=0.2,random_state=42)

day_train_images, day_val_images, day_train_annotations, day_val_annotations = train_test_split(day_train_images, day_train_annotations, test_size=0.2, random_state=42)
night_train_images, night_val_images, night_train_annotations, night_val_annotations = train_test_split(night_train_images, night_train_annotations, test_size=0.2, random_state=42)

### 3. Merge day and night sets into full training/validation/test

In [6]:
train_images = day_train_images + night_train_images
val_images = day_val_images + night_val_images
test_images = day_test_images + night_test_images

train_annotations = day_train_annotations + night_train_annotations
val_annotations = day_val_annotations + night_val_annotations
test_annotations = day_test_annotations + night_test_annotations

In [7]:
print(f'Number of train images: {len(train_images)}, annotations: {len(train_annotations)}')
print(f'Number of val images: {len(val_images)}, annotations: {len(val_annotations)}')
print(f'Number of test images: {len(test_images)}, annotations: {len(test_annotations)}')

Number of train images: 1762, annotations: 1762
Number of val images: 442, annotations: 442
Number of test images: 552, annotations: 552


### 4. Copy the images and labels to their correct folder

In [8]:
for train_image in tqdm(train_images):
    file_name = os.path.basename(new_night_image_path[train_image] if train_image in new_night_image_path else train_image)
    destination = os.path.join(destination_images_train, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(train_image, destination)

for val_image in tqdm(val_images):
    file_name = os.path.basename(new_night_image_path[val_image] if val_image in new_night_image_path else val_image)
    destination = os.path.join(destination_images_val, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(val_image, destination)

for test_image in tqdm(test_images):
    file_name = os.path.basename(new_night_image_path[test_image] if test_image in new_night_image_path else test_image)
    destination = os.path.join(destination_images_test, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(test_image, destination)

for train_annotation in tqdm(train_annotations):
    file_name = os.path.basename(train_annotation)
    destination = os.path.join(destination_annotations_train, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(train_annotation, destination)

for val_annotation in tqdm(val_annotations):
    file_name = os.path.basename(val_annotation)
    destination = os.path.join(destination_annotations_val, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(val_annotation, destination)

for test_annotation in tqdm(test_annotations):
    file_name = os.path.basename(test_annotation)
    destination = os.path.join(destination_annotations_test, file_name)
    if not os.path.exists(destination):
        shutil.copyfile(test_annotation, destination)

100%|██████████| 1762/1762 [00:43<00:00, 40.53it/s]
100%|██████████| 442/442 [00:09<00:00, 45.17it/s] 
100%|██████████| 552/552 [00:13<00:00, 42.38it/s] 
100%|██████████| 1762/1762 [00:50<00:00, 35.07it/s]
100%|██████████| 442/442 [00:12<00:00, 34.15it/s]
100%|██████████| 552/552 [00:15<00:00, 35.02it/s]
