In [1]:
import matplotlib.pyplot as plt
import numpy as np
import rasterio
import rasterio.plot
import math
import matplotlib.patches as patches
import cv2 as cv
import pandas as pd
import json

In [2]:
image_dir = '/efs/data/jpb/data/tiles/Dagahaley/images/'
anno_dir = '/efs/data/jpb/data/tiles/Dagahaley/annotations/'
coco_dir = '/efs/data/jpb/data/tiles/Dagahaley/cocoannotations/'

In [3]:
annos = pd.read_csv(anno_dir + 'annotations.csv')

In [5]:
annos.head()

Unnamed: 0.1,Unnamed: 0,filename,xmin,ymin,width,height
0,0,dagahaley-x900y6000.png,273,281,9,10
1,1,dagahaley-x900y6000.png,290,245,9,9
2,2,dagahaley-x900y6300.png,47,284,9,9
3,3,dagahaley-x900y6300.png,101,275,13,14
4,4,dagahaley-x900y6300.png,135,252,10,11


In [6]:
filenames = np.unique(annos['filename'])

In [27]:
np.random.shuffle(filenames)

In [29]:
train_filenames = filenames[:int(0.8*len(filenames))]
val_filenames = filenames[int(0.8*len(filenames)):]

In [None]:
width,height = 300,300

In [31]:
def create_coco_file(filenames, width, height, outname):

    adaptation_data = {}
    for file in filenames:
        test_annos = annos[annos['filename'] == file].reset_index()
        bboxes = []
        for i in range(len(test_annos)):
            bboxes.append([int(test_annos['xmin'][i]), int(test_annos['ymin'][i]), int(test_annos['width'][i]), int(test_annos['height'][i])])
        adaptation_data['{}'.format(file)]=bboxes
    
    # To prepare this in COCO format, we have to first enumerate all the files in the adaptation data
    file_ids = {}
    file_id = 1
    images = []
    for fname in adaptation_data.keys():
        images.append({'file_name':fname,'coco_url':'','flickr_url':'','date_captured':'',
                      'height':height,'width':width,'license':1,'id':file_id})
        file_ids[fname] = file_id
        file_id += 1
    images = pd.DataFrame(images).to_dict(orient='records')
    
    # Now make a record for each bounding box
    annotations = []
    object_id = 1
    for fname in adaptation_data:
        for bbox in adaptation_data[fname]:
            annotations.append({'id': object_id, 
             'image_id': file_ids[fname], 
             'area': bbox[2]*bbox[3], 
             'bbox': bbox, 
             'category_id': 1, 
             'iscrowd': 0, 
             'segmentation': []})
            
    # Other metadata that makes up the COCO annotation format    
    license = [{'id': 1,
                'name': 'DigitalGlobe/NextView/UNOSAT',
                'url': ''}]
    
    datainfo = {'contributor': 'UNOSAT/UN Global Pulse',
     'date_created': '2018/06/20',
     'description': 'UNOSAT Refugee Settlements Dataset',
     'url': 'http://unitar.org/unosat/',
     'version': '1.0',
     'year': 2018}
    
    categories = [{'id': 1, 'name': 'structure', 'supercategory': 'structure'}]
    
    # Now save to a new annotation file
    dataset = {'info': datainfo,
               'licenses': license,
               'categories': categories,
               'images': images,
               'annotations': annotations
              }
    
    with open(coco_dir + '{}.json'.format(outname),'w') as outfile:
        json.dump(dataset,outfile)

In [32]:
create_coco_file(train_filenames,width,height,'dagahaley_train')

In [33]:
create_coco_file(val_filenames,width,height,'dagahaley_val')