Crop.py

'''
Uses the annotation files generated by panda_decode.py and the images from the PANDA
dataset to build a COCO dataset with cropped images.

For now the input dataset image location is hardcoded

When making a new output dataset, set up the folder structure beforehand!
- new_dataset
|  - images
|  |  - train
|  |  - val
|  - annotations

Also, some programs are hardcoded to expect the coco2017 format-- to easily fix this,
make symlinks in the proper format after the dataset has been output, for example
# cd new_dataset/images
# ln -s train train2017
'''

import cv2
import json
import copy
import random
import plotille
import collections
from math import sqrt
from tqdm import tqdm
from datetime import datetime

from multiprocessing import Queue, Process, Manager, Lock, set_start_method

CROP_MAX_WIDTH = 1080
CROP_MIN_WIDTH = 512
CROP_ASPECT = 0.75
MAX_PEOPLE = 30
OCCUPIED_RANDOM_CROP_VAL = 20
RETRY_EMPTY_CHANCE = 0.00001

output_dataset = 'change path to outputed dataset' #change path to wherever the outputed dataset should be

#img_count = 1
#anno_count = 1
#anno_stats = []


def print_stats(anno_stats):
#    global anno_stats
    n = len(anno_stats)
    print(f'Created {n} images with {sum(anno_stats)} total detections')
    print(f'Percentage of images with 0 people:         {100*len([a for a in anno_stats if a == 0])/n:.2f}')
    print(f'Percentage of images with 1 or more people: {100*len([a for a in anno_stats if a > 0])/n:.2f}')
    print(f'Percentage of images with 5 or more people: {100*len([a for a in anno_stats if a > 4])/n:.2f}')

def get_crop(imgw, imgh):
    global CROP_MAX_WIDTH
    global CROP_MIN_WIDTH
    global CROP_ASPECT

    w = random.uniform(CROP_MIN_WIDTH, CROP_MAX_WIDTH)
    h = w * CROP_ASPECT

    x = max(0, random.uniform(0, imgw - w - 1))
    y = max(0, random.uniform(0, imgh - h - 1))

    return [int(x),int(y),int(w),int(h)]


# Get a crop guaranteed to have at least one annotation
def get_occupied_crop(imgw, imgh, anno_data):
    global CROP_MAX_WIDTH
    global CROP_MIN_WIDTH
    global CROP_ASPECT

    bbox = random.choice(anno_data)['bbox']

    w = bbox[2]
    h = bbox[3]
    x1 = bbox[0]
    y1 = bbox[1]
    x2 = x1+bbox[2]
    y2 = y1+bbox[3]

    # Find the minimum width that will accomodate bbox
    new_min_width = max(CROP_MIN_WIDTH, w, h/CROP_ASPECT)

    W = random.uniform(new_min_width, CROP_MAX_WIDTH)
    H = W * CROP_ASPECT

    # Find a position for crop that both contains whole bbox and fits within image
    Xmin = max(0, x2 - W)
    Ymin = max(0, y2 - H)
    Xmax = min(x1, imgw-W)
    Ymax = min(y1, imgh-H)
    X = max(0, random.uniform(Xmin, Xmax))
    Y = max(0, random.uniform(Ymin, Ymax))

    return [int(X),int(Y),int(W),int(H)]


def crop_okay(n):
    if n != 0:
        return True
    if random.uniform(0,1) > RETRY_EMPTY_CHANCE:
        return True
    return False


def get_annos(crop, anno_data):
    ret = []
    diags = []

    for anno in anno_data:
        anno = copy.deepcopy(anno)
        bbox = anno['bbox']
        seg = anno['segmentation']
        kps = anno['keypoints']

        if bbox[0] < crop[0] or \
           bbox[1] < crop[1] or \
           bbox[0] > crop[0]+crop[2] or \
           bbox[1] > crop[1]+crop[3]:
            continue

        diag = sqrt(bbox[2]**2 + bbox[3]**2)
        # TODO: reject small annotations?

        # Put bbox, seg(bbox), and kps into new frame
        bbox[0] -= crop[0]
        bbox[1] -= crop[1]

        for i in range(len(seg[0]) // 2):
            seg[0][2*i] -= crop[0]
            seg[0][2*i+1] -= crop[1]

        for i in range(len(kps) // 3):
            kps[3*i] -= crop[0]
            kps[3*i+1] -= crop[1]

        anno['bbox'] = bbox
        anno['segmentation'] = seg
        anno['keypoints'] = kps

        ret.append(anno)
        diags.append(diag)
    return ret, diags


def random_crop(lock, image_data, anno_data, img_count, anno_count, anno_stats, train_num = 230, val_num = 60):
    global MAX_PEOPLE
#    global img_count
#    global anno_count
#    global anno_stats
#    global lock

    image = cv2.imread('change_path_here' + image_data['file_name']) #change the path to primary dataset images

    imgw = image_data['width']
    imgh = image_data['height']

    train_imgd = []
    train_annos = []
    val_imgd = []
    val_annos = []

    # Diagonals of bboxes, to analyze sizes of detections
    diags = []

    total_count = train_num + val_num

    for i in range(total_count):
        lock.acquire()
        local_img_count = img_count.value
        img_count.value += 1
        lock.release()

        crop = get_crop(imgw, imgh)
        annos, d = get_annos(crop, anno_data)

        if i % OCCUPIED_CROP_VAL != 0:
            x = 0
            while len(annos) > MAX_PEOPLE or len(annos) == 0:
                crop = get_occupied_crop(imgw, imgh, anno_data)
                annos, d = get_annos(crop, anno_data)
                x += 1
                if x > 1000:
                    print(image_data['file_name'], crop)
        else:
            while len(annos) > MAX_PEOPLE:
                crop = get_crop(imgw, imgh)
                annos, d = get_annos(crop, anno_data)

        diags += d

        lock.acquire()
        local_count = anno_count.value
        anno_count.value += len(annos)
        anno_stats.append(len(annos))
        lock.release()

        for j in range(len(annos)):
            annos[j].update({'image_id':local_img_count,
                             'id':local_count})
            local_count += 1

#        lock.acquire()
#        anno_stats.append(len(annos))
#        lock.release()

        img = image[crop[1]:crop[1]+crop[3], crop[0]:crop[0]+crop[2]]
        filename = str(local_img_count).zfill(12) + '.jpg'

        imgd = {'license': 1,
                'file_name': filename,
                'coco_url': '127.0.0.1',
                'height': crop[3],
                'width': crop[2],
                'date_captured': '1970-01-01 00:00:00',
                'flickr_url': '127.0.0.1',
                'id': local_img_count}

        if i < train_num:
            train_imgd.append(imgd)
            train_annos += annos
            try:
                cv2.imwrite(f'{output_dataset}/images/train/{filename}', img)
            except Exception as e:
                print(f'{crop}')
                raise(e)
        else:
            val_imgd.append(imgd)
            val_annos += annos
            try:
                cv2.imwrite(f'{output_dataset}/images/val/{filename}', img)
            except Exception as e:
                print(f'{crop}')
                raise(e)

#        lock.acquire()
#        img_count.value += 1
#        lock.release()

    return train_imgd, train_annos, val_imgd, val_annos, diags


def worker_process(lock, i, image_data, anno_dict, train_imgd, train_annos, val_imgd, val_annos, diags, img_count, anno_count, anno_stats):
    pbar = tqdm(total = len(image_data), position = i)

    # Sample crops, write out new images
    for imgd in image_data:
        pbar.set_description(f'Worker {i}: Processing {imgd["file_name"]}')

        try:
            annd = anno_dict[imgd['id']]
#            annd = [a for a in anno_data if a['image_id'] == imgd['id']]
            t_imgd, t_annos, v_imgd, v_annos, d = random_crop(lock, imgd, annd, img_count, anno_count, anno_stats)
        except KeyboardInterrupt:
            print('KeyboardInterrupt caught during image loop! Ending prematurely.')
            break

        list = [train_imgd.put(i) for i in t_imgd]
        list = [train_annos.put(i) for i in t_annos]
        list = [val_imgd.put(i) for i in v_imgd]
        list = [val_annos.put(i) for i in v_annos]
        list = [diags.put(i) for i in d]

        pbar.update()
#        break

    pbar.close()


def main():
    # COCO metadata
    info = {'description': 'PANDA dataset sample',
            'url': '127.0.0.1',
            'version': '0.1',
            'year': 2021,
            'contributor': 'Nathan Hewitt, Armin Danesh, Ghazal Alienzhad',
            'date_created': datetime.today().strftime('%Y/%m/%d') }

    licenses = [{'url': 'https://creativecommons.org/licenses/by-nc-sa/4.0/',
                 'id': 1,
                 'name': 'Attibution-NonCommercial-ShareAlike License'}]

    categories = json.load(open('categories.json'))

    # Load previously annotated PANDA
    image_data = json.load(open('images.json'))
    anno_data = json.load(open('annotations.json'))
    anno_dict = collections.defaultdict(list)
    for a in anno_data:
        anno_dict[a['image_id']].append(a)

#    train_imgd = []
#    train_annos = []
#    val_imgd = []
#    val_annos = []
#    diags = []
#
#    pbar = tqdm(total = len(image_data))

    workers = 16
    worker_inputs = [[] for i in range(workers)]
    for i, imgd in enumerate(image_data):
        worker_inputs[i % workers].append(imgd)

    # Multiprocess inputs and outputs
    # Must use managed queues; otherwise workers will not join
    procs = []
    m = Manager()
    lock = m.Lock()
    t_imgd = m.Queue()
    t_annos = m.Queue()
    v_imgd = m.Queue()
    v_annos = m.Queue()
    d = m.Queue()
    img_count = m.Value('i', 0)
    anno_count = m.Value('i', 0)
    anno_stats = m.list([])

    for i, input in enumerate(worker_inputs):
        args = (lock, i, input, anno_dict, t_imgd, t_annos, v_imgd,
                v_annos, d, img_count, anno_count, anno_stats)

        p = Process(target=worker_process, args=args)
        procs.append(p)
        p.start()

    for p in procs:
        p.join()

    t_imgd.put('end')
    t_annos.put('end')
    v_imgd.put('end')
    v_annos.put('end')
    d.put('end')

    train_imgd = []
    train_annos = []
    val_imgd = []
    val_annos = []
    diags = []

    for i in iter(t_imgd.get, 'end'):
        train_imgd.append(i)
    for i in iter(t_annos.get, 'end'):
        train_annos.append(i)
    for i in iter(v_imgd.get, 'end'):
        val_imgd.append(i)
    for i in iter(v_annos.get, 'end'):
        val_annos.append(i)
    for i in iter(d.get, 'end'):
        diags.append(i)

    # Build COCO jsons
    train_data = {'info': info,
                  'licenses': licenses,
                  'images': train_imgd,
                  'annotations': train_annos,
                  'categories': categories}

    val_data = {'info': info,
                  'licenses': licenses,
                  'images': val_imgd,
                  'annotations': val_annos,
                  'categories': categories}

    json.dump(train_data, open(f'{output_dataset}/annotations/person_keypoints_train.json', 'w'))
    json.dump(val_data, open(f'{output_dataset}/annotations/person_keypoints_val.json', 'w'))

    print('Bbox diagonal stats:')
    print(plotille.hist(diags, bins=30))
    print_stats(anno_stats)


if __name__=='__main__':
    set_start_method('spawn')
    main()