In [41]:
import uuid
import json

roiIndex = 1
cellIndex = 1

def get_annotation(filename):

    with open(filename) as f:
        data = json.load(f)
        f.close()
        return data['regions']
    
def get_rois(regions,tagGroup,formatAnnotationTagLookup):
    rois = []
    index = 0
    global roiIndex
    for region in regions:
        if region['tags'][0].startswith(tagGroup):
            if (region['tags'][0].find("NAME")) != -1:
                extractionValue = "BLOCK_ALPHANUMERIC_CLASSIFICATION"
            else:
                extractionValue = "NUMERIC_CLASSIFICATION"
           
            try:
                annotationTagsValue = formatAnnotationTagLookup[region['tags'][0]]
            except KeyError as ke:
                annotationTagsValue = region['tags'][0]
            
            rois.append({
                "annotationTags": annotationTagsValue,
                "extractionMethod": extractionValue,
                "roiId": str(roiIndex),
                "index": index,
                "rect": {

                    "top": int(region['boundingBox']['top']),

                    "left": int(region['boundingBox']['left']),

                    "bottom": int(region['boundingBox']['top']) + int(region['boundingBox']['height']),

                    "right": int(region['boundingBox']['left']) + int(region['boundingBox']['width'])

                }

            })
            index = index + 1
            roiIndex = roiIndex +1 

    return rois

def get_cells(regions,tagGroups,formatLookup,formatAnnotationTagLookup):
    cells_data = []
    renderIndex = 1
    global cellIndex
    for tagGroup in tagGroups: 
              try:
                  formatValue = formatLookup[str(tagGroup)]
              except KeyError as ke:
                  formatValue = str(tagGroup)

    
              cells_data.append({
                          "cellId": str(cellIndex),
                          "rois": get_rois(regions,tagGroup,formatAnnotationTagLookup),
                          "render": {
                              "index": renderIndex
                          },
                          "format": {
                              "name": str(tagGroup),
                              "value": formatValue
                          },
                          "validate": {
                              "regExp": ""
                          }
              })
              renderIndex = renderIndex +1   
              cellIndex = cellIndex +1 
    return cells_data


def get_layout(cells):
    layout_data = []
    layout_data.append({
        "layout": {
            "version": "1.0",
            "name": "BENEFICIARY DETAILS FROM",
            "cells": cells
        }
    })    
    return layout_data[0]

def pp_json(json_thing, sort=True, indents=4):
    if type(json_thing) is str:
        print(json.dumps(json.loads(json_thing), sort_keys=sort, indent=indents))
    else:
        print(json.dumps(json_thing, sort_keys=sort, indent=indents))
    return None 

In [42]:
regions = get_annotation("beneficiary_details_vottraw.json")

In [43]:
tagGroups = ['CHILDID', 'CHILDFIRSTNAME', 'CHILDLASTNAME', 'MOTHERFIRSTNAME', 'MOTHERLASTNAME', 'FATHERFIRSTNAME', 'FATHERLASTNAME', 'CHILDADHAARID', 'DOB', 'CASTE', 'RELIGION', 'GENDER', 'DISABILITY']

formatLookup = {
    'CHILDID': 'CHILDID',
    'CHILDFIRSTNAME': 'CHILD FIRST NAME',
    'CHILDLASTNAME': 'CHILD LAST NAME',
    'MOTHERFIRSTNAME': 'MOTHER FIRST NAME',
    'MOTHERLASTNAME': 'MOTHER LAST NAME',
    'FATHERFIRSTNAME': 'FATHER FIRST NAME',
    'FATHERLASTNAME': 'FATHER LAST NAME',
    'CHILDADHAARID': 'ADHAAR CARD ID',
    'DOB': 'DOB',
    'CASTE': 'CASTE',
    'RELIGION': 'RELIGION',
    'GENDER': 'GENDER',
    'DISABILITY': 'DISABILITY'

}

formatAnnotationTagLookup = {
    'CHILDIDTEN': 'CHILDID10',
    'CHILDIDELE': 'CHILDID11',
    'CHILDIDTWL': 'CHILDID12',
    'CHILDIDTHRTN': 'CHILDID13',
    'CHILDIDFORTN': 'CHILDID14',
    'CHILDIDFIFTN': 'CHILDID15',
    'CHILDIDSIXTN': 'CHILDID16',
    'CHILDIDSVNTN': 'CHILDID17',
    'CHILDIDEGTN': 'CHILDID18',
    'CHILDIDNITN': 'CHILDID19',
    'CHILDIDTWNTY': 'CHILDID20',
    'CHILDFIRSTNAMETEN': 'CHILDFIRSTNAME10',
    'CHILDFIRSTNAMEELE': 'CHILDFIRSTNAME11',
    'CHILDFIRSTNAMETWL': 'CHILDFIRSTNAME12',
    'CHILDFIRSTNAMETHRTN': 'CHILDFIRSTNAME13',
    'CHILDFIRSTNAMEFORTN': 'CHILDFIRSTNAME14',
    'CHILDFIRSTNAMEFIFTN': 'CHILDFIRSTNAME15',
    'CHILDFIRSTNAMESIXTN': 'CHILDFIRSTNAME16',
    'CHILDFIRSTNAMESVNTN': 'CHILDFIRSTNAME17',
    'CHILDFIRSTNAMEEGTN': 'CHILDFIRSTNAME18',
    'CHILDFIRSTNAMENITN': 'CHILDFIRSTNAME19',
    'CHILDFIRSTNAMETWNTY': 'CHILDFIRSTNAME20',
    'CHILDLASTNAMETEN': 'CHILDLASTNAME10',
    'CHILDLASTNAMEELE': 'CHILDLASTNAME11',
    'CHILDLASTNAMETWL': 'CHILDLASTNAME12',
    'CHILDLASTNAMETHRTN': 'CHILDLASTNAME13',
    'CHILDLASTNAMEFORTN': 'CHILDLASTNAME14',
    'CHILDLASTNAMEFIFTN': 'CHILDLASTNAME15',
    'CHILDLASTNAMESIXTN': 'CHILDLASTNAME16',
    'CHILDLASTNAMESVNTN': 'CHILDLASTNAME17',
    'CHILDLASTNAMEEGTN': 'CHILDLASTNAME18',
    'CHILDLASTNAMENITN': 'CHILDLASTNAME19',
    'CHILDLASTNAMETWNTY': 'CHILDLASTNAME20',
    'MOTHERFIRSTNAMETEN': 'MOTHERFIRSTNAME10',
    'MOTHERFIRSTNAMEELE': 'MOTHERFIRSTNAME11',
    'MOTHERFIRSTNAMETWL': 'MOTHERFIRSTNAME12',
    'MOTHERFIRSTNAMETHRTN': 'MOTHERFIRSTNAME13',
    'MOTHERFIRSTNAMEFORTN': 'MOTHERFIRSTNAME14',
    'MOTHERFIRSTNAMEFIFTN': 'MOTHERFIRSTNAME15',
    'MOTHERFIRSTNAMESIXTN': 'MOTHERFIRSTNAME16',
    'MOTHERFIRSTNAMESVNTN': 'MOTHERFIRSTNAME17',
    'MOTHERFIRSTNAMEEGTN': 'MOTHERFIRSTNAME18',
    'MOTHERFIRSTNAMENITN': 'MOTHERFIRSTNAME19',
    'MOTHERFIRSTNAMETWNTY': 'MOTHERFIRSTNAME20',
    'MOTHERLASTNAMETEN': 'MOTHERLASTNAME10',
    'MOTHERLASTNAMEELE': 'MOTHERLASTNAME11',
    'MOTHERLASTNAMETWL': 'MOTHERLASTNAME12',
    'MOTHERLASTNAMETHRTN': 'MOTHERLASTNAME13',
    'MOTHERLASTNAMEFORTN': 'MOTHERLASTNAME14',
    'MOTHERLASTNAMEFIFTN': 'MOTHERLASTNAME15',
    'MOTHERLASTNAMESIXTN': 'MOTHERLASTNAME16',
    'MOTHERLASTNAMESVNTN': 'MOTHERLASTNAME17',
    'MOTHERLASTNAMEEGTN': 'MOTHERLASTNAME18',
    'MOTHERLASTNAMENITN': 'MOTHERLASTNAME19',
    'MOTHERLASTNAMETWNTY': 'MOTHERLASTNAME20',
    'FATHERFIRSTNAMETEN': 'FATHERFIRSTNAME10',
    'FATHERFIRSTNAMEELE': 'FATHERFIRSTNAME11',
    'FATHERFIRSTNAMETWL': 'FATHERFIRSTNAME12',
    'FATHERFIRSTNAMETHRTN': 'FATHERFIRSTNAME13',
    'FATHERFIRSTNAMEFORTN': 'FATHERFIRSTNAME14',
    'FATHERFIRSTNAMEFIFTN': 'FATHERFIRSTNAME15',
    'FATHERFIRSTNAMESIXTN': 'FATHERFIRSTNAME16',
    'FATHERFIRSTNAMESVNTN': 'FATHERFIRSTNAME17',
    'FATHERFIRSTNAMEEGTN': 'FATHERFIRSTNAME18',
    'FATHERFIRSTNAMENITN': 'FATHERFIRSTNAME19',
    'FATHERFIRSTNAMETWNTY': 'FATHERFIRSTNAME20',
    'FATHERLASTNAMETEN': 'FATHERLASTNAME10',
    'FATHERLASTNAMEELE': 'FATHERLASTNAME11',
    'FATHERLASTNAMETWL': 'FATHERLASTNAME12',
    'FATHERLASTNAMETHRTN': 'FATHERLASTNAME13',
    'FATHERLASTNAMEFORTN': 'FATHERLASTNAME14',
    'FATHERLASTNAMEFIFTN': 'FATHERLASTNAME15',
    'FATHERLASTNAMESIXTN': 'FATHERLASTNAME16',
    'FATHERLASTNAMESVNTN': 'FATHERLASTNAME17',
    'FATHERLASTNAMEEGTN': 'FATHERLASTNAME18',
    'FATHERLASTNAMENITN': 'FATHERLASTNAME19',
    'FATHERLASTNAMETWNTY': 'FATHERLASTNAME20',
    'CHILDADHAARIDTEN': 'CHILDADHAARID10',
    'CHILDADHAARIDELE': 'CHILDADHAARID11',
    'CHILDADHAARIDTWL': 'CHILDADHAARID12'
}


In [44]:
cells = get_cells(regions,tagGroups,formatLookup,formatAnnotationTagLookup)

In [45]:
pp_json(get_layout(cells),False)

{
    "layout": {
        "version": "1.0",
        "name": "BENEFICIARY DETAILS FROM",
        "cells": [
            {
                "cellId": "1",
                "rois": [
                    {
                        "annotationTags": "CHILDID1",
                        "extractionMethod": "NUMERIC_CLASSIFICATION",
                        "roiId": "1",
                        "index": 0,
                        "rect": {
                            "top": 22,
                            "left": 148,
                            "bottom": 55,
                            "right": 171
                        }
                    },
                    {
                        "annotationTags": "CHILDID2",
                        "extractionMethod": "NUMERIC_CLASSIFICATION",
                        "roiId": "2",
                        "index": 1,
                        "rect": {
                            "top": 22,
                            "left": 172,
                      