In [33]:
import uuid
import json

roiIndex = 1
cellIndex = 1

def get_annotation(filename):

    with open(filename) as f:
        data = json.load(f)
        f.close()
        return data['regions']
    
def get_rois(regions,tagGroup,formatAnnotationTagLookup):
    rois = []
    index = 0
    global roiIndex
    for region in regions:
        if region['tags'][0].startswith(tagGroup):
            if region['tags'][0].startswith("NAME"):
                extractionValue = "BLOCK_ALPHANUMERIC_CLASSIFICATION"
            elif region['tags'][0].startswith("ADDRESS"):
                extractionValue = "BLOCK_ALPHANUMERIC_CLASSIFICATION"
            else: 
                extractionValue = "NUMERIC_CLASSIFICATION"
                
            try:
                annotationTagsValue = formatAnnotationTagLookup[region['tags'][0]]
            except KeyError as ke:
                annotationTagsValue = region['tags'][0]
            
            rois.append({
                "annotationTags": annotationTagsValue,
                "extractionMethod": extractionValue,
                "roiId": str(roiIndex),
                "index": index,
                "rect": {

                    "top": int(region['boundingBox']['top']),

                    "left": int(region['boundingBox']['left']),

                    "bottom": int(region['boundingBox']['top']) + int(region['boundingBox']['height']),

                    "right": int(region['boundingBox']['left']) + int(region['boundingBox']['width'])

                }

            })
            index = index + 1
            roiIndex = roiIndex +1 

    return rois

def get_cells(regions,tagGroups,formatLookup,formatAnnotationTagLookup):
    cells_data = []
    renderIndex = 1
    global cellIndex
    for tagGroup in tagGroups: 
              try:
                  formatValue = formatLookup[str(tagGroup)]
              except KeyError as ke:
                  formatValue = str(tagGroup)

    
              cells_data.append({
                          "cellId": str(cellIndex),
                          "rois": get_rois(regions,tagGroup,formatAnnotationTagLookup),
                          "render": {
                              "index": renderIndex
                          },
                          "format": {
                              "name": str(tagGroup),
                              "value": formatValue
                          },
                          "validate": {
                              "regExp": ""
                          }
              })
              renderIndex = renderIndex +1   
              cellIndex = cellIndex +1 
    return cells_data

def get_layout(cells):
    layout_data = []
    layout_data.append({
        "layout": {
            "version": "1.0",
            "name": "Personal Details Sheet Form",
            "cells": cells
        }
    })    
    return layout_data[0]

def pp_json(json_thing, sort=True, indents=4):
    if type(json_thing) is str:
        print(json.dumps(json.loads(json_thing), sort_keys=sort, indent=indents))
    else:
        print(json.dumps(json_thing, sort_keys=sort, indent=indents))
    return None 

In [34]:
regions = get_annotation("PERSONAL_DETAILS_vottraw.json")

In [35]:
tagGroups = ['IDENTIFIERID1', 'NAME1', 'ADDRESS1', 'PINCODE1', 'CONTACTNO1']


formatLookup = {
    'IDENTIFIERID1' : 'IDENTIFIERID',
    'NAME1' : 'NAME',
    'ADDRESS1' : 'ADDRESS',
    'PINCODE1' : 'PINCODE',
    'CONTACTNO1' : 'CONTACTNO'
}



formatAnnotationTagLookup = {
    'IDENTIFIERID1_TEN' : 'IDENTIFIERID1_10',
    'IDENTIFIERID1_ELE' : 'IDENTIFIERID1_11',
    'IDENTIFIERID1_TWL' : 'IDENTIFIERID1_12',
    'NAME1_TEN' : 'NAME1_10',
    'NAME1_ELE' : 'NAME1_11',
    'NAME1_TWL' : 'NAME1_12',
    'NAME1_THRT' : 'NAME1_13',
    'NAME1_FORT' : 'NAME1_14',
    'NAME1_FIFT' : 'NAME1_15',
    'ADDRESS1_TEN': 'ADDRESS1_10',
    'ADDRESS1_ELE': 'ADDRESS1_11',
    'ADDRESS1_TWL': 'ADDRESS1_12 ',
    'ADDRESS1_THRT': 'ADDRESS1_13',
    'ADDRESS1_FORT': 'ADDRESS1_14',
    'ADDRESS1_FIFT': 'ADDRESS1_15',
    'ADDRESS1_SIXT': 'ADDRESS1_16',
    'ADDRESS1_SVNT': 'ADDRESS1_17',
    'ADDRESS1_EGTN': 'ADDRESS1_18',
    'ADDRESS1_NITN': 'ADDRESS1_19',
    'ADDRESS1_TWTY': 'ADDRESS1_20',
    'ADDRESS1_TWTYONE': 'ADDRESS1_21',
    'ADDRESS1_TWTYTWO': 'ADDRESS1_22',
    'ADDRESS1_TWTYTHREE': 'ADDRESS1_23',
    'ADDRESS1_TWTYFOUR': 'ADDRESS1_24',
    'ADDRESS1_TWTYFIVE': 'ADDRESS1_25',
    'ADDRESS1_TWTYSIX': 'ADDRESS1_26',
    'ADDRESS1_TWTYSVN': 'ADDRESS1_27',
    'ADDRESS1_TWTYEGT': 'ADDRESS1_28',
    'ADDRESS1_TWTYNINE': 'ADDRESS1_29',
    'ADDRESS1_THRTY': 'ADDRESS1_30',
    'ADDRESS1_THRTYONE': 'ADDRESS1_31',
    'ADDRESS1_THRTYTWO': 'ADDRESS1_32',
    'ADDRESS1_THRTYTHREE': 'ADDRESS1_33',
    'ADDRESS1_THRTYFOUR': 'ADDRESS1_34',
    'ADDRESS1_THRTYFIVE': 'ADDRESS1_35',
    'ADDRESS1_THRTYSIX': 'ADDRESS1_36',
    'ADDRESS1_THRTYSVN': 'ADDRESS1_37',
    'ADDRESS1_THRTYEGT': 'ADDRESS1_38',
    'ADDRESS1_THRTYNINE': 'ADDRESS1_39',
    'ADDRESS1_FORTY': 'ADDRESS1_40',
    'ADDRESS1_FORTYONE': 'ADDRESS1_41',
    'ADDRESS1_FORTYTWO': 'ADDRESS1_42',
    'ADDRESS1_FORTYTHREE': 'ADDRESS1_43',
    'ADDRESS1_FORTYFOUR': 'ADDRESS1_44',
    'ADDRESS1_FORTYFIVE': 'ADDRESS1_45',
    'ADDRESS1_FORTYSIX': 'ADDRESS1_46',
    'ADDRESS1_FORTYSVN': 'ADDRESS1_47',
    'ADDRESS1_FORTYEGT': 'ADDRESS1_48',
    'ADDRESS1_FORTYNINE': 'ADDRESS1_49',
    'ADDRESS1_FIFTY': 'ADDRESS1_50',
    'ADDRESS1_FIFTYONE': 'ADDRESS1_51',
    'CONTACTNO1_TEN': 'CONTACTNO1_10',
}


In [36]:
cells = get_cells(regions,tagGroups,formatLookup,formatAnnotationTagLookup)

In [37]:
pp_json(get_layout(cells),False)

{
    "layout": {
        "version": "1.0",
        "name": "Personal Details Sheet Form",
        "cells": [
            {
                "cellId": "1",
                "rois": [
                    {
                        "annotationTags": "IDENTIFIERID1_1",
                        "extractionMethod": "NUMERIC_CLASSIFICATION",
                        "roiId": "1",
                        "index": 0,
                        "rect": {
                            "top": 66,
                            "left": 34,
                            "bottom": 90,
                            "right": 54
                        }
                    },
                    {
                        "annotationTags": "IDENTIFIERID1_2",
                        "extractionMethod": "NUMERIC_CLASSIFICATION",
                        "roiId": "2",
                        "index": 1,
                        "rect": {
                            "top": 66,
                            "left": 56,
        