In [14]:
import argparse
import cv2
import os
import numpy as np
import pandas as pd
from pycocotools.coco import COCO

def get_coco_file_path(coco_dir, dataset):
    coco_file = os.path.join(coco_dir,'annotations','instances_' + dataset + '.json')
    return coco_file
    

def coco2csv(coco_file, coco_dir, dataset, csv_file):

    # read COCO json file
    coco = COCO(coco_file)

    # get all images
    image_ids = coco.getImgIds()
    images = coco.loadImgs(image_ids)
    print('found', len(images),'images')

    # get annotations (annotations exist only for images with objects)
    all_annotations = []
    for image in images:
        ann_ids = coco.getAnnIds(imgIds=image['id'], iscrowd=None)
        annotations = coco.loadAnns(ann_ids)
        if annotations:
            all_annotations.extend(annotations)

    # convert annotations to panda dataframe        
    annotations = pd.DataFrame(data = all_annotations)
    print('found', annotations.shape[0],'annotations')

    # split bbox into coordinate columns & convert from x1,y1,w,h to x1,y1,x2,y2
    annotations[['x', 'y', 'w', 'h']] = pd.DataFrame(annotations['bbox'].values.tolist(), index=annotations.index).astype(int)
#     annotations = annotations.assign(x2 = annotations['x1'] + annotations['x2'], y2 = annotations['y1'] + annotations['y2'])

    # add image names
    images = pd.DataFrame(images)
    images.rename(columns={'id':'image_id'}, inplace=True)
    images.set_index('image_id')

    annotations_csv = pd.merge(annotations, images, on=['image_id'], how='right') #annotations.join(images, on='image_id')
    annotations_csv = annotations_csv.replace(np.nan, '', regex=True)

    # select only columns required for the csv format, and fix image path
    colnames = ['file_name', 'x', 'y', 'w', 'h']
    annotations_csv = annotations_csv[colnames]
    annotations_csv['file_name'] = annotations_csv['file_name'].apply(lambda x : os.path.join(coco_dir, dataset, x))
    annotations_csv['file_name'] = annotations_csv['file_name'].str.replace(".jpg","")
    # write annotations to file
    annotations_csv.columns = ['image_id', 'x', 'y', 'w', 'h']
    colnames = ['image_id', 'x', 'y', 'w', 'h']
    annotations_csv.to_csv(path_or_buf=csv_file, index=False, header=True, columns=colnames)
    print('exported', annotations_csv.shape[0],'annotations')
    print(annotations_csv.head())

def main():

    coco_file = "coco_tile_train.json"
    coco2csv(coco_file, "", "", "tile_train.csv")
    coco_file = "coco_tile_val.json"
    coco2csv(coco_file, "", "", "tile_val.csv")
    coco_file = "coco_pseudo_train.json"
    coco2csv(coco_file, "", "", "pseudo_train.csv")


if __name__== "__main__":
    main()

loading annotations into memory...
Done (t=0.91s)
creating index...
index created!
found 2919 images
found 131359 annotations
exported 131359 annotations
    image_id   x    y    w    h
0  00333207f   0  654   37  111
1  00333207f   0  817  135   98
2  00333207f   0  192   22   81
3  00333207f   4  342   63   38
4  00333207f  82  334   82   81
loading annotations into memory...
Done (t=0.06s)
creating index...
index created!
found 454 images
found 16434 annotations
exported 16434 annotations
    image_id    x    y   w   h
0  00ea5e5ee  327    3  35  49
1  00ea5e5ee  928    0  90  38
2  00ea5e5ee  952   38  69  44
3  00ea5e5ee  994   82  30  54
4  00ea5e5ee  921  174  72  28
loading annotations into memory...
Done (t=0.25s)
creating index...
index created!
found 1233 images
found 50454 annotations
exported 50454 annotations
                                            image_id    x    y    w    h
0  2_3_f74ede365_0ff60b2bc_b0d4a70e0_4b4f6de9b_07...    0  264   74  126
1  2_3_f74ede365_0f

In [12]:
!df -h

Filesystem      Size  Used Avail Use% Mounted on
overlay         388G  341G   47G  88% /
tmpfs            64M     0   64M   0% /dev
tmpfs            32G     0   32G   0% /sys/fs/cgroup
shm             4.0G  1.6G  2.5G  39% /dev/shm
/dev/sda1       388G  341G   47G  88% /tmp/.X11-unix
tmpfs            32G   12K   32G   1% /proc/driver/nvidia
tmpfs           6.4G   25M  6.4G   1% /run/nvidia-persistenced/socket
udev             32G     0   32G   0% /dev/nvidia0
tmpfs            32G     0   32G   0% /proc/acpi
tmpfs            32G     0   32G   0% /proc/scsi
tmpfs            32G     0   32G   0% /sys/firmware
