# Data Synthesis: MNIST to Object Detection

Let's synthesize a dataset from MINST since digits are usually not standalone in the real world.

- Input: Images of digits
- Output: An image of scattered digits with bounding boxes (`label`, `x_rel`, `y_rel`, `w_rel`, `h_rel`) for each digit

In [1]:
import random
import os

import tensorflow as tf
import tensorflow_datasets as tfds

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from PIL import Image

2024-04-23 09:56:22.118385: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-23 09:56:22.118867: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-23 09:56:22.121782: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-23 09:56:22.161900: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.makedirs('generated/mnist-detection/images', exist_ok=True)
os.makedirs('generated/mnist-detection/labels', exist_ok=True)

def save_example(filename, image, label_rows=[]):
    """Save an image and its label to disk.
    
    Args:
        filename: (string) - the filename to save the image and label to
        image: (np.ndarray) - the image to save
        label_rows: (tuple[label, x_rel, y_rel, w_rel, h_rel]) the list of label rows to save
        
    Returns:
        None
    """
    # save image file
    img = Image.fromarray(image.squeeze(), mode='L')
    with open(os.path.join('generated', 'mnist-detection', 'images', f'{filename}.png'), 'wb') as f:
        img.save(f)
    
    # save label file
    with open(os.path.join('generated', 'mnist-detection', 'labels', f'{filename}.txt'), 'w') as f:
        for row in label_rows:
            label, x_rel, y_rel, w_rel, h_rel = row
            txt_rows = f'{label}\t{x_rel}\t{y_rel}\t{w_rel}\t{h_rel}\n'
            f.write(txt_rows)

In [3]:
MIN_NUM_OBJECTS = 5

OBJ_HEIGHT = 28
OBJ_WIDTH = 28
 
IMG_HEIGHT = OBJ_HEIGHT * 8
IMG_WIDTH = OBJ_WIDTH * 8

MARGIN_Y = OBJ_HEIGHT
MARGIN_X = OBJ_WIDTH

NUM_EXAMPLES = 10

BATCH_SIZE = MIN_NUM_OBJECTS

In [4]:
ds, info = tfds.load('mnist', split='train', shuffle_files=True, as_supervised=True, with_info=True)

In [5]:
batches = ds.batch(BATCH_SIZE)

for i in range(NUM_EXAMPLES):
    image = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.uint8)
    label_rows = []
    for features, labels in batches.skip(i*BATCH_SIZE).take(1):
        for j in range(features.shape[0]):
            # pick a random position, subject to margin constraints
            y_pos = int(max(MARGIN_Y, min(random.random() * IMG_HEIGHT, IMG_HEIGHT - MARGIN_Y*2)))
            x_pos = int(max(MARGIN_X, min(random.random() * IMG_WIDTH, IMG_WIDTH - MARGIN_X*2)))
            
            # paste the pixels at that position, using bit-wise OR to allow for overlapping objects
            image[y_pos:y_pos+OBJ_HEIGHT, x_pos:x_pos+OBJ_WIDTH] = image[y_pos:y_pos+OBJ_HEIGHT, x_pos:x_pos+OBJ_WIDTH] | features[j]
            
            # DEBUG: show the image
            # plt.imshow(image, cmap='gray')
            # plt.show()
            
            y_rel = y_pos / IMG_HEIGHT
            x_rel = x_pos / IMG_WIDTH
            w_rel = OBJ_WIDTH / IMG_WIDTH
            h_rel = OBJ_HEIGHT / IMG_HEIGHT
            label_rows.append((labels[j], x_rel, y_rel, w_rel, h_rel))
    save_example(f'{i+1:0{int(len(str(NUM_EXAMPLES)))}}', image, label_rows)

2024-04-23 09:56:24.171364: W tensorflow/core/kernels/data/cache_dataset_ops.cc:858] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.
2024-04-23 09:56:24.198183: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-04-23 09:56:24.252387: W tensorflow/core/kernels/data/cache_dataset_ops.cc:858] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.