In [None]:
from pathlib import Path
from matplotlib import pyplot as plt
from concurrent.futures import ThreadPoolExecutor
import numpy as np
import shutil
import cv2 as cv

In [None]:
def random_shape(xy1max, xy2min):
    x1, y1 = np.random.uniform(high=xy1max, size=2)
    x2, y2 = np.random.uniform(low=xy2min, size=2) * (1 - x1, 1 - y1) + (x1, y1)
    bounded_normal = np.clip(np.random.normal(loc=3, size=4) / 6, min=0, max=1)
    length = bounded_normal * np.array([x2 - x1, y2 - y1, x2 - x1, y2 - y1])
    points = np.array([
        [x1 + length[0], y1],
        [x2, y1 + length[1]],
        [x1 + length[2], y2],
        [x1, y1 + length[3]]
    ])
    return points

def normal_to_image(image_shape, points):
    h, w, *_ = image_shape
    return (points * (w, h)).astype(int)

def random_colour(min_, max_):
    return np.random.randint(low=min_, high=max_, size=3).tolist()

def generate_image(background_path, dataset_path, image_name, xy1max, xy2min, crange, canny=None):
    image = cv.imread(background_path)
    points = random_shape(xy1max, xy2min)
    if canny:
        image = cv.Canny(image, canny[0], canny[1])
    image = cv.polylines(image, [normal_to_image(image.shape, points)], True, random_colour(crange[0], crange[1]), 2)
    cv.imwrite(dataset_path / "images" / f"{image_name}.jpeg", image)
    with open(dataset_path / "labels" / f"{image_name}.txt", "w") as f:
        lines = map(str, np.ravel(points).tolist())
        f.write(f"0 {' '.join(lines)}\n")

def equal_ranges(n, total_n):
    if total_n < n:
        raise ValueError()
    range_len = total_n // n
    remainder = total_n % n
    ranges = [range(range_len * i, range_len * (i + 1)) for i in range(n)]
    if remainder > 0:
        ranges[-1] = range(range_len * (n - 1), range_len * n + remainder)
    return ranges

In [None]:
backgrounds = list(Path("datasets/coco2017/train/images").glob("*.jpg"))
dataset_path = Path("datasets/canny-random-quad/train")
dataset_num = 10000
chunks = 40

def generate_images(ids):
    for i in ids:
        background = backgrounds[np.random.randint(0, len(backgrounds))]  # Don't use np.random.choice, slow for some reason.
        generate_image(background, dataset_path, str(i), 0.6, 0.3, (255, 256), canny=(100, 200))

if dataset_path.exists():
    shutil.rmtree(dataset_path)
(dataset_path / "images").mkdir(parents=True)
(dataset_path / "labels").mkdir(parents=True)
with ThreadPoolExecutor(max_workers=chunks) as executor:
    ranges = equal_ranges(chunks, dataset_num)
    futures = [executor.submit(generate_images, r) for r in ranges]
    for future in futures:
        future.result()