In [1]:
import albumentations as A
import cv2
from glob import glob
import os
from uuid import uuid4
import random
import math
import shutil
import numpy as np

old_images_path = "./datasets/old-images"
old_bboxes_path = "./datasets/old-bboxes"

final_dataset_path = "./datasets/final"

augmentations_count = 5

  check_for_updates()


In [2]:
transform = A.Compose([
  A.HorizontalFlip(),
  A.RandomBrightnessContrast(0.2, 0.2),
  A.Rotate((10, 10)),
  A.GaussNoise(p=0.1),
  A.Blur(p=0.3),
  A.HueSaturationValue(p=0.1),
  A.Resize(640, 640, cv2.INTER_LANCZOS4, cv2.INTER_LANCZOS4)
], bbox_params=A.BboxParams(format="yolo"))

In [3]:
os.makedirs(final_dataset_path, exist_ok=True)
dataset_subsets_names = ["train", "val", "test"]
dataset_subsets_ratio = [0.7, 0.2, 0.1]

for name in dataset_subsets_names:
  os.makedirs(os.path.join(final_dataset_path, "images", name), exist_ok=True)
  os.makedirs(os.path.join(final_dataset_path, "labels", name), exist_ok=True)

missed_count = 0
processed_count = 0

subsets_images_count = {
  "train": 0,
  "val": 0,
  "test": 0
}

images_paths = glob(os.path.join(old_images_path, "**/*.PNG"))
random.shuffle(images_paths)

for image_path in images_paths:
  img = cv2.imread(image_path)
  
  bbox_path = image_path.replace("old-images", "old-bboxes").replace(".PNG", ".txt")
  
  bboxes = []
  class_labels = []
  try:
    with open(bbox_path, "r") as file:
      for line in file:
        cols = line.strip().split()
        class_labels.append(int(cols[0]))
        bboxes.append(np.array(list(map(lambda x: max(float(x), 0), cols[1:]))))
  except:
    print(f"Missed {image_path} because it has no bboxes")
    missed_count += 1
    continue
  
  subset = dataset_subsets_ratio[0]
  p = random.random()
  a = 0
  for i in range(len(dataset_subsets_ratio)):
    a += dataset_subsets_ratio[i]
    if (p < a):
      subset = dataset_subsets_names[i]
      break

  try:
    for _ in range(augmentations_count):
      aug_result = transform(image=img, bboxes=bboxes, class_labels=class_labels)
      aug_image = aug_result["image"]
      aug_bboxes = aug_result["bboxes"]
      
      uuid = uuid4().hex
      
      cv2.imwrite(os.path.join(final_dataset_path, "images", subset, f"{uuid}.PNG"), aug_image)
      
      with open(os.path.join(final_dataset_path, "labels", subset, f"{uuid}.txt"), "w") as file:
        for bbox, class_id in zip(aug_bboxes, class_labels):
          file.write(f"{class_id} {' '.join(map(str, bbox))}\n")
      subsets_images_count[subset] += 1
  except Exception as e:
    print(f"Error creating bboxes {image_path}: {e}")
    missed_count += 1
    continue
    
  processed_count += 1
  
print(f"Processed {processed_count} images, missed {missed_count} images")
print(f"Train: {subsets_images_count['train']}\nVal: {subsets_images_count['val']}\nTest: {subsets_images_count['test']}")

Missed ./datasets/old-images\p3\frame_000010.PNG because it has no bboxes
Missed ./datasets/old-images\p3\frame_000466.PNG because it has no bboxes
Missed ./datasets/old-images\p3\frame_000025.PNG because it has no bboxes
Missed ./datasets/old-images\p3\frame_000278.PNG because it has no bboxes
Error creating bboxes ./datasets/old-images\p2\frame_001195.PNG: Expected y_min for bbox [ 7.4530482e-02 -4.7683716e-07  9.9751151e-01  8.5516649e-01] to be in the range [0.0, 1.0], got -4.76837158203125e-07.
Missed ./datasets/old-images\p4\frame_000247.PNG because it has no bboxes
Missed ./datasets/old-images\p3\frame_000146.PNG because it has no bboxes
Missed ./datasets/old-images\p3\frame_000229.PNG because it has no bboxes
Missed ./datasets/old-images\p2\frame_000018.PNG because it has no bboxes
Missed ./datasets/old-images\p3\frame_000442.PNG because it has no bboxes
Missed ./datasets/old-images\p3\frame_000202.PNG because it has no bboxes
Missed ./datasets/old-images\p2\frame_000142.PNG be