# Reformat VIA Annotations for retinanet

In [1]:
# -*- coding: utf-8 -*-
"""
Created on Wed May  8 15:38:09 2019

Convenience script that converts all bounding polygons to minimum rectangular bounding boxes in a VIA file, and then outputs a csv format which aligns with dataloader

VIA Image format: We convert
{"name":"polygon","all_points_x":[22,13,256,289,271,22],"all_points_y":[881,921,1045,1025,983,881]}
to
{"name":"rect","x":710,"y":405,"width":78,"height":137}

@author: Daniel Wu
"""

import sys
import pandas as pd
import json
import os
import numpy as np

print('Warning: the current version of this script ignores class ids, and just assigns all to class 1')



In [12]:
#Treats first command line option as filepath, unless unspecified
def reformat_via(filepath, outpath):
    
    labels = pd.read_csv(filepath)
    base_dir = os.path.abspath(os.path.dirname(filepath))

    img_names = []
    class_ids = []
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []

    #Go through row by row
    for i in range(labels.shape[0]):

        box = json.loads(labels.iloc[i].region_shape_attributes)

        #Skip empty boxes
        if "name" not in box.keys():
            continue

        if box["name"] == "polygon":
            max_x = max(box['all_points_x'])
            min_x = min(box['all_points_x'])
            max_y = max(box['all_points_y'])
            min_y = min(box['all_points_y'])

            box = {"name":"rect","x":min_x,"y":min_y,"width":max_x - min_x,"height":max_y - min_y}

        xmins.append(box['x'])
        ymins.append(box['y'])
        xmaxs.append(box['x'] + box['width'])
        ymaxs.append(box['y'] + box['height'])

        img_names.append(os.path.join(base_dir, labels.iloc[i]['#filename']))
        class_ids.append('sperm')

    print(f'There were {len(img_names)} total annotations.')
    annots = np.vstack((img_names, xmins, ymins, xmaxs, ymaxs, class_ids)).T
    new_labels = pd.DataFrame(annots, columns = ['image_name', 'xmin', 'ymin', 'xmax', 'ymax',  'class_id'])

    #Save and export
    outpath = os.path.join(os.path.dirname(filepath), outpath)
    new_labels.to_csv(outpath, index=False, header=False)

In [13]:
reformat_via('datasets/TESE_retinanet/via_regions_all.csv', 'annot_all.csv')
reformat_via('datasets/TESE_retinanet/via_regions_train.csv', 'annot_train.csv')
reformat_via('datasets/TESE_retinanet/via_regions_val.csv', 'annot_val.csv')
reformat_via('datasets/TESE_retinanet/via_regions_test.csv', 'annot_test.csv')

There were 1377 total annotations.
There were 1094 total annotations.
There were 141 total annotations.
There were 142 total annotations.


In [14]:
df = pd.read_csv('datasets/TESE_retinanet/annot_all.csv')
df

Unnamed: 0,/home/users/danjwu/sperm/datasets/TESE_retinanet/01172018-OC-TESE_597.jpg,355,152,425,295,sperm
0,/home/users/danjwu/sperm/datasets/TESE_retinan...,3,329,116,404,sperm
1,/home/users/danjwu/sperm/datasets/TESE_retinan...,237,165,384,221,sperm
2,/home/users/danjwu/sperm/datasets/TESE_retinan...,245,255,334,324,sperm
3,/home/users/danjwu/sperm/datasets/TESE_retinan...,404,244,478,337,sperm
4,/home/users/danjwu/sperm/datasets/TESE_retinan...,230,130,339,193,sperm
5,/home/users/danjwu/sperm/datasets/TESE_retinan...,404,238,504,281,sperm
6,/home/users/danjwu/sperm/datasets/TESE_retinan...,395,277,529,322,sperm
7,/home/users/danjwu/sperm/datasets/TESE_retinan...,46,336,128,391,sperm
8,/home/users/danjwu/sperm/datasets/TESE_retinan...,273,202,377,254,sperm
9,/home/users/danjwu/sperm/datasets/TESE_retinan...,547,353,621,403,sperm


In [None]:
#Remember to write class annotations map