model/data_gen.py

"""
@author: Ming Ming Zhang, mmzhangist@gmail.com

Data Generator
"""

# influenced by
# https://github.com/matterport/Mask_RCNN/blob/master/mrcnn/model.py

import time
import numpy as np
import concurrent.futures

import anchors
                

def data_generator(dataset, config, shuffle=True, subset='train'):
    """
    Generates the input data asynchronously (multi-CPU).

    Parameters
    ----------
    dataset : class
        Described in utils.Dataset().
    config : class
        A custom configuration, see config.Config().
    shuffle : boolean, optional
        Whether to shuffle the dataset after each epoch. The default is True.
    subset : string, optional
        The dataset is for 'train' or 'val'. The default is 'train', i.e.,
        augmentations or more_scales techniques maybe appiled to the dataset,
        whereas 'val' has no any of those techniques; see config.Config().

    Yields
    ------
    data : dictionary with keys
        * 'images' : numpy array, [batch_size, height, width, 3]
            A batch of input images with the same shape.
        * 'gt_anchor_indicators' : numpy array, [batch_size, num_anchors]
            Ground-truth anchors indicate negative -1, neutral 0 and positive 1 
            anchors, generated by anchors.anchors_targets().
        * 'gt_anchor_class_ids' : numpy array, [batch_size, num_anchors, num_object_classes]
            Ground-truth anchor class ids, generated by anchors.anchors_targets(). 
            Note that rows with all zeros indicate negative anchors, i.e., 
            background.
        * 'gt_anchor_offsets' : numpy array, [batch_size, num_anchors, 4]
            Ground-truth anchor offsets, where 4 is center coordinates 
            (y, x, h, w) and generated by anchors.anchors_targets().

    """
    assert subset in ['train', 'val']
    
    # augmentation and more_scales techniques will not apply to val dataset
    if subset == 'val':
        augmenters = None
        more_scales = False
    else:
        augmenters = config.augmenters
        more_scales = config.more_scales
        
    image_ids = np.copy(dataset.image_ids)
    # repeat image_ids at least 2 times to have enough images in every step
    idxes = np.tile(
        np.arange(len(image_ids)), 
        max(2, int(np.ceil(config.batch_size_global / len(image_ids))))
        )
    num_steps_per_epoch = int(np.ceil(
            len(image_ids) / config.batch_size_global))
    counts = 0
    
    # generate anchors, [num_anchors, 4] where 4 is (y1, x1, y2, x2)
    anchors_fpn = anchors.anchors_from_fpn(
        config.scales, 
        config.ratios, 
        config.fmap_sizes, 
        config.fmap_strides, 
        config.denser)
    anchors_fpn = np.concatenate(anchors_fpn, axis=0)
    num_anchors = anchors_fpn.shape[0]
    
    # generator, loop forever
    while True:
        if counts == num_steps_per_epoch:
            counts = 0
            if shuffle: 
                np.random.shuffle(image_ids)
            
        for i in range(num_steps_per_epoch):
            batch_images = np.zeros(
                (config.batch_size_global,) + config.image_shape, 
                np.float32)
            batch_anchor_indicators = np.zeros(
                (config.batch_size_global, num_anchors), 
                np.int32)
            batch_anchor_class_ids = np.zeros(
                (config.batch_size_global, 
                 num_anchors, 
                 config.num_object_classes), 
                np.int32)
            batch_anchor_offsets = np.zeros(
                (config.batch_size_global, num_anchors, 4), 
                np.float32)
            
            # image_ids_i, image ids for the i-th step
            idxes_i = idxes[
                i*config.batch_size_global:(i+1)*config.batch_size_global]
            image_ids_i = image_ids[idxes_i]
            
            def f(image_id, b):
                """
                Generates data for an image of a batch.

                Parameters
                ----------
                image_id : integer
                    The image id from dataset.
                b : integer
                    The b-th image of the batch will be processed.

                Returns
                -------
                None.

                """
                # more_scales technique
                if more_scales:
                    resize_modes = ['pad_square', 'crop']
                    seed = np.random.uniform()
                    if seed > 0.5:
                        resize_mode = resize_modes[0]
                    else:
                        resize_mode = resize_modes[1]
                else:
                    resize_mode = config.resize_mode
                
                # load data
                image, boxes, class_ids, cache = dataset.load_data(
                    image_id, 
                    config.shortest_side, 
                    config.longest_side, 
                    config.upscale_factor,
                    resize_mode,
                    augmenters,
                    config.max_num_crops)
                
                # identify images that have no object due to cropping
                if not np.any(class_ids > 0):
                    print('\nError in processing image %s' \
                          % dataset.get_image_info(image_id)['id'])
                
                # generate anchors targets
                an_indicators, an_class_ids, an_offsets = \
                    anchors.anchors_targets(
                        anchors_fpn, 
                        boxes, 
                        class_ids, 
                        config.num_object_classes)
                    
                # add to the batch
                # standardize images
                if config.channels_mean is not None \
                    and config.channels_std is not None:
                        batch_images[b] = (
                            image - config.channels_mean
                            ) / config.channels_std
                elif config.channels_mean is not None:
                    batch_images[b] = image - config.channels_mean
                elif config.channels_std is not None:
                    batch_images[b] = image / config.channels_std
                else:
                    batch_images[b] = image / 255.0
                batch_anchor_indicators[b] = an_indicators
                batch_anchor_class_ids[b] = an_class_ids
                # standardize offsets
                if config.offsets_mean is not None and \
                    config.offsets_std is not None:
                        batch_anchor_offsets[b] = (
                            an_offsets - config.offsets_mean
                            ) / config.offsets_std
                else:
                    batch_anchor_offsets[b] = an_offsets
            
            # generate data asynchronously (multi-CPU) for a batch of images
            with concurrent.futures.ThreadPoolExecutor() as executor:
                for b in range(config.batch_size_global):
                    executor.submit(f, image_ids_i[b], b)
                    
            data = {
                'images': batch_images, 
                'gt_anchor_indicators': batch_anchor_indicators,
                'gt_anchor_class_ids': batch_anchor_class_ids,
                'gt_anchor_offsets': batch_anchor_offsets
                }
            yield data    
            counts += 1
            

def get_ds_info(dataset, config):
    """
    Get the information and stats of a dataset asynchronously (multi-CPU).

    Parameters
    ----------
    dataset : class
        Described in utils.Dataset().
    config : class
        A custom configuration, see config.Config().

    Returns
    -------
    meta : list
        The meta data about each image in the dataset, each is a dictionary
        with keys 'id' and 'info' where 'info' includes original image shape, 
        number of boxes in original image, resized image shape, number of boxes 
        in resized image, window in resized image and crop from original image.
    stats : tuple
        The stats for all images in the dataset, collecting image sizes, mean
        pixel per channel, box sizes, number of objects, number of positive,
        netural and negative anchors, and offsets for positive anchors.

    """
    # anchors, [num_anchors, 4]
    anchors_fpn = anchors.anchors_from_fpn(
        config.scales, 
        config.ratios, 
        config.fmap_sizes, 
        config.fmap_strides, 
        config.denser)
    anchors_fpn = np.concatenate(anchors_fpn, axis=0)
    
    # initialize outputs
    meta = []
    image_sizes, rgb_images, box_sizes, num_objects = [], [], [], []
    num_pos_anchors, num_nu_anchors, num_neg_anchors, offsets = [], [], [], []
    
    def f(image_id):
        """
        Gets meta and stats for an image.

        Parameters
        ----------
        image_id : integer
            The image id.

        Returns
        -------
        None.

        """
        image_name = dataset.get_image_info(image_id)['id']
        
        # original image and corresponding boxes and class_ids
        image, boxes, class_ids, cache = dataset.load_data(
            image_id, 
            mode='none')
        # identify images that have no object
        if not np.any(class_ids > 0):
            print('\nError in processing image %s' % image_name)
            
        image_sizes.append(image.shape[:2])
        rgb_images.append(np.mean(image, axis=(0,1)))
        h = boxes[:,2] - boxes[:,0]
        w = boxes[:,3] - boxes[:,1]
        box_size = np.stack([h, w], axis=1)
        box_sizes.append(box_size)
        num_objects.append(len(class_ids))
        
        # resized images and corresponding boxes and class ids
        image, boxes, class_ids, cache = dataset.load_data(
            image_id, 
            config.shortest_side, 
            config.longest_side, 
            config.upscale_factor,
            config.resize_mode,
            augmenters=None,
            max_num_crops=1) 
        # identify images that have no object
        if not np.any(class_ids > 0):
            print('\nError in processing image %s' % image_name)
            
        # meta
        masks, window, scale, padding, crop, image_shape, image1_shape = cache
        if crop is None:
            crop = (0,0,0,0)
        info = (
            list(image_shape) + # original image shape, size=3
            [box_size.shape[0]] + # number of boxes in original image, size=1
            list(image1_shape) + # resized image shape, size=3
            [masks.shape[-1]] + # number of boxes in resized image, size=1
            list(window) + # window in resized image, size=4
            [scale] + # resized scale factor, size=1
            list(crop) # crop from original image, size=4
            )
        meta.append({'id':image_name, 'info':info})
        
        # offsets
        an_indicators, an_class_ids, an_offsets = anchors.anchors_targets(
            anchors_fpn, 
            boxes, 
            class_ids, 
            config.num_object_classes)
        pos_anchor_idxes = np.where(an_indicators==1)[0]
        num_pos_anchors.append(pos_anchor_idxes.shape[0])
        num_nu_anchors.append(np.where(an_indicators==0)[0].shape[0])
        num_neg_anchors.append(np.where(an_indicators==-1)[0].shape[0])
        offsets.append(an_offsets[pos_anchor_idxes]) 
    
    # generate meta and stats for the dataset asynchronously (multi-CPU)
    t1 = time.time()
    with concurrent.futures.ThreadPoolExecutor() as executor:
        executor.map(f, dataset.image_ids)
    t2 = time.time()
    print('time: %fs\n' %(t2-t1))
    
    stats = (
        image_sizes, 
        rgb_images, 
        box_sizes, 
        num_objects,
        num_pos_anchors, 
        num_nu_anchors, 
        num_neg_anchors, 
        offsets
        )
    return meta, stats