In [None]:
from pathlib import Path
from matplotlib import pyplot as plt
from concurrent.futures import ThreadPoolExecutor
import numpy as np
import shutil
import cv2 as cv

In [None]:
def equal_ranges(n, total_n):
    if total_n < n:
        raise ValueError()
    range_len = total_n // n
    remainder = total_n % n
    ranges = [range(range_len * i, range_len * (i + 1)) for i in range(n)]
    if remainder > 0:
        ranges[-1] = range(range_len * (n - 1), range_len * n + remainder)
    return ranges

def random_quad(rmin, rmax):
    low = 0.5 * (1 - np.random.uniform(low=rmax, high=rmin, size=4))
    high = 0.5 * (1 + np.random.uniform(low=rmin, high=rmax, size=4))
    return np.array([
        [low[0], low[1]], [high[0], low[2]],
        [high[2], high[3]], [low[3], high[1]]
    ])

def normal_to_image(image_shape, points):
    h, w, *_ = image_shape
    return (points * (w, h)).astype(int)

def random_colour(min_, max_):
    return np.random.randint(low=min_, high=max_, size=3).tolist()

def generate_image(radius_range, colour_range, bg, fg, path, id_):
    image = cv.imread(bg)
    overlay = cv.resize(cv.imread(fg), image.shape[1::-1])
    contour = random_quad(*radius_range)
    contour_image = normal_to_image(image.shape, contour)
    mask = np.zeros(image.shape, dtype=np.uint8)
    mask = cv.drawContours(mask, [contour_image], 0, (255, 255, 255), thickness=-1)
    image = cv.bitwise_and(image, cv.bitwise_not(mask)) + cv.bitwise_and(overlay, mask)
    image = cv.polylines(image, [contour_image], True, random_colour(*colour_range), thickness=1)
    cv.imwrite(path / "images" / f"{id_}.jpeg", image)
    with open(path / "labels" / f"{id_}.txt", "w") as f:
        points = map(str, np.ravel(contour).tolist())
        f.write(f"0 {' '.join(points)}\n")

In [None]:
images = list(Path("datasets/coco2017/train/images").glob("*.jpg"))
dataset_path = Path("datasets/random-overlay/val")
dataset_num = 2000
chunks = 40

def generate_images(ids):
    for i in ids:
        background = images[np.random.randint(0, len(images))]
        foreground = images[np.random.randint(0, len(images))]
        generate_image((0.2, 0.6), (0, 256), background, foreground, dataset_path, i)

if dataset_path.exists():
    shutil.rmtree(dataset_path)
(dataset_path / "images").mkdir(parents=True)
(dataset_path / "labels").mkdir(parents=True)
with ThreadPoolExecutor(max_workers=chunks) as executor:
    ranges = equal_ranges(chunks, dataset_num)
    futures = [executor.submit(generate_images, r) for r in ranges]
    for future in futures:
        future.result()