In [7]:
# Dileep Gadiraju - Odisha ROI Transformation from VoTT Raw format
# Takes filename of VoTT raw format file and generates ROI configuration
import uuid
import json

def get_annotation(filename):

    with open(filename) as f:
        data = json.load(f)
        f.close()
        return data['regions']

def get_rois(regions,tagGroup):
    rois  = []
    index = 0
    roiIndex = 1 

    for region in regions:
        if region['tags'][0].startswith(tagGroup):
            rois.append({
                "annotationId": region['id'],
                "annotationTags": region['tags'][0],
                "extractionMethod": "NUMERIC_CLASSIFICATION",
                "roiId": roiIndex,
                "index": index,
                "rect": {

                    "top": int(region['boundingBox']['top']),

                    "left": int(region['boundingBox']['left']),

                    "bottom": int(region['boundingBox']['top']) + int(region['boundingBox']['height']),

                    "right": int(region['boundingBox']['left']) + int(region['boundingBox']['width'])

                }

            })
            index = index + 1
            roiIndex = roiIndex +1 
        
    return rois

def get_cells(regions,tagGroups):
    
    cells_data = []
    renderIndex = 1
    cellIndex = 1
    for tagGroup in tagGroups: 
              cells_data.append({
                          "cellId": cellIndex,
                          "rois": get_rois(regions,tagGroup),
                          "render": {
                              "index": renderIndex
                          },
                          "format": {
                              "name": tagGroup.replace("_", ""),
                              "value": tagGroup.replace("_", "")
                          },
                          "validate": {
                              "regExp": ""
                          }
              })
              renderIndex = renderIndex +1   
              cellIndex = cellIndex +1 
    return cells_data

def get_layout(cells):
    layout_data = []
    layout_data.append({
        "layout": {
            "version": "1.0",
            "name": "Odisha SAT Exam Sheet Form",
            "cells": cells
        }
    })    
    return layout_data[0]

def pp_json(json_thing, sort=True, indents=4):
    if type(json_thing) is str:
        print(json.dumps(json.loads(json_thing), sort_keys=sort, indent=indents))
    else:
        print(json.dumps(json_thing, sort_keys=sort, indent=indents))
    return None

pp_json(get_layout(cells))

{
    "layout": {
        "cells": [
            {
                "cellId": 1,
                "format": {
                    "name": "STUDENTID",
                    "value": "STUDENTID"
                },
                "render": {
                    "index": 1
                },
                "rois": [
                    {
                        "annotationId": "eKS5GDoKy",
                        "annotationTags": "STUDENTID_1",
                        "extractionMethod": "NUMERIC_CLASSIFICATION",
                        "index": 0,
                        "rect": {
                            "bottom": 50,
                            "left": 192,
                            "right": 210,
                            "top": 30
                        },
                        "roiId": 1
                    },
                    {
                        "annotationId": "07gfo-X6k",
                        "annotationTags": "STUDENTID_2",
                        "extraction

In [8]:
regions=get_annotation("sat_odisha_vottraw.json")
regions


[{'id': 'eKS5GDoKy',
  'type': 'RECTANGLE',
  'tags': ['STUDENTID_1'],
  'boundingBox': {'height': 20.06715689132463,
   'width': 18.818058473389357,
   'left': 192.39873293067228,
   'top': 30.094832235307837},
  'points': [{'x': 192.39873293067228, 'y': 30.094832235307837},
   {'x': 211.21679140406164, 'y': 30.094832235307837},
   {'x': 211.21679140406164, 'y': 50.16198912663246},
   {'x': 192.39873293067228, 'y': 50.16198912663246}]},
 {'id': '07gfo-X6k',
  'type': 'RECTANGLE',
  'tags': ['STUDENTID_2'],
  'boundingBox': {'height': 22.313025323275863,
   'width': 23.851433234229575,
   'left': 215.7896926706308,
   'top': 29.783081896551725},
  'points': [{'x': 215.7896926706308, 'y': 29.783081896551725},
   {'x': 239.6411259048604, 'y': 29.783081896551725},
   {'x': 239.6411259048604, 'y': 52.09610721982759},
   {'x': 215.7896926706308, 'y': 52.09610721982759}]},
 {'id': 'bv3o6dbOL',
  'type': 'RECTANGLE',
  'tags': ['STUDENTID_3'],
  'boundingBox': {'height': 20.170698887645056,
 

In [9]:
tagGroups = ["STUDENTID_", "QUESTION1_", "QUESTION2_","QUESTION3_","QUESTION4_","QUESTION5_","QUESTION6_","QUESTION7_","QUESTION8_","QUESTION9_","QUESTION10_","QUESTION11_","QUESTION12_","QUESTION13_","QUESTION14_","QUESTION15_","QUESTION16_","QUESTION17_","QUESTION18_","QUESTION19_","QUESTION20_","MAX_MARKS_","MARKS_OBTAINED_"]
rois=get_rois(regions,tagGroups[0])


In [10]:
cells=get_cells(regions,tagGroups)

In [11]:
get_layout(cells)

{'layout': {'version': '1.0',
  'name': 'Odisha SAT Exam Sheet Form',
  'cells': [{'cellId': 1,
    'rois': [{'annotationId': 'eKS5GDoKy',
      'annotationTags': 'STUDENTID_1',
      'extractionMethod': 'NUMERIC_CLASSIFICATION',
      'roiId': 1,
      'index': 0,
      'rect': {'top': 30, 'left': 192, 'bottom': 50, 'right': 210}},
     {'annotationId': '07gfo-X6k',
      'annotationTags': 'STUDENTID_2',
      'extractionMethod': 'NUMERIC_CLASSIFICATION',
      'roiId': 2,
      'index': 1,
      'rect': {'top': 29, 'left': 215, 'bottom': 51, 'right': 238}},
     {'annotationId': 'bv3o6dbOL',
      'annotationTags': 'STUDENTID_3',
      'extractionMethod': 'NUMERIC_CLASSIFICATION',
      'roiId': 3,
      'index': 2,
      'rect': {'top': 30, 'left': 242, 'bottom': 50, 'right': 265}},
     {'annotationId': 'bxUpSr1kN',
      'annotationTags': 'STUDENTID_4',
      'extractionMethod': 'NUMERIC_CLASSIFICATION',
      'roiId': 4,
      'index': 3,
      'rect': {'top': 30, 'left': 269, 'b