In [75]:
# Dileep Gadiraju - Odisha ROI Transformation from VoTT Raw format
# Takes filename of VoTT raw format file and generates ROI configuration
# v1.5 saral ocr version

import uuid
import json

def get_annotation(filename):

    with open(filename) as f:
        data = json.load(f)
        f.close()
        return data['regions']

def get_rois(regions,tagGroup):
    rois  = []
    index = 0
    roiIndex = 1
    for region in regions:
        if region['tags'][0].startswith(tagGroup):
            
            if region['tags'][0].startswith('CELL_'):
              extraMethod=""
            else:
              extraMethod="NUMERIC_CLASSIFICATION"
            rois.append({
                 "annotationId": region['id'],
                "annotationTag": region['tags'][0],
                "extractionMethod": extraMethod,
                "roiId": roiIndex,
                "index": index,
                "rect": {

                    "top": int(region['boundingBox']['top']),

                    "left": int(region['boundingBox']['left']),

                    "bottom": int(region['boundingBox']['top']) + int(region['boundingBox']['height']),

                    "right": int(region['boundingBox']['left']) + int(region['boundingBox']['width'])

                }

            })
            index = index + 1
            roiIndex = roiIndex + 1
        
    return rois

def get_cells(regions,tagGroups,validationInfo,printLabels):
    
    cells_data = []
    renderIndex = 1
    cellIndex = 1
    for tagGroup in tagGroups: 
              try:
                  labelText = printLabels[str(tagGroup)]
              except KeyError as ke:
                  labelText = ""

              try:
                  validRegExp = validationInfo[str(tagGroup.rstrip('_'))]['regExp']
                  validName = validationInfo[str(tagGroup.rstrip('_'))]['name']
                  validErrorMsg = validationInfo[str(tagGroup.rstrip('_'))]['errorMessage']
                  validSource = validationInfo[str(tagGroup.rstrip('_'))]['source']
              except KeyError as ke:
                  validRegExp = ""
                  validName = ""
                  validErrorMsg = ""
                  validSource = ""
                  
              cells_data.append({
                          "cellId": cellIndex,
                          "rois": get_rois(regions,tagGroup),
                          "render": {
                              "index": renderIndex,
                              "text": labelText,
                              "alignment": "center"
                          },
                          "format": {
                              "name": tagGroup.rstrip('_'),
                              "value": tagGroup.replace("_", " ")
                          },
                          "validate": {
                              "name": validName,
                              "regExp":  validRegExp,
                              "errorMessage": validErrorMsg,
                              "source": validSource
                          }
              })
              renderIndex = renderIndex +1
              cellIndex = cellIndex + 1
    return cells_data

def get_layout(cells,responseExcludeFields):
    layout_data = []
    layout_data.append({
        "layout": {
            "version": "1.0",
            "name": "Odisha SAT 20 Questions Exam Sheet Form",
            "type": "SAT_20_MARKSHEET",
            "tolerance": {
                "predictionMin": 0.95,
                "roiMinWidth": 15,
                "roiMinHeight": 15
            },
            "responseExcludeFields": responseExcludeFields,
            #"identifiers": [{"name":"teacherId","value":"2321121"}],
            "cells": cells
        }
    })    
    return layout_data[0]

def pp_json(json_thing, sort=True, indents=4):
    if type(json_thing) is str:
        print(json.dumps(json.loads(json_thing), sort_keys=sort, indent=indents))
    else:
        print(json.dumps(json_thing, sort_keys=sort, indent=indents))
    return None

regions=get_annotation("sat_up_1_vottraw.json")
regions

[{'id': 'av3CIJxCK',
  'type': 'RECTANGLE',
  'tags': ['CELL_ROLLNUMBER_0'],
  'boundingBox': {'height': 62.5789785438055,
   'width': 125.84080323688592,
   'left': 17.65527425062448,
   'top': 23.7251280044742},
  'points': [{'x': 17.65527425062448, 'y': 23.7251280044742},
   {'x': 143.4960774875104, 'y': 23.7251280044742},
   {'x': 143.4960774875104, 'y': 86.30410654827969},
   {'x': 17.65527425062448, 'y': 86.30410654827969}]},
 {'id': 'w2YWhWjDD',
  'type': 'RECTANGLE',
  'tags': ['ROLLNUMBER_1'],
  'boundingBox': {'height': 34.17052883948391,
   'width': 25.479125468359697,
   'left': 155.46748152581182,
   'top': 39.3683586162944},
  'points': [{'x': 155.46748152581182, 'y': 39.3683586162944},
   {'x': 180.9466069941715, 'y': 39.3683586162944},
   {'x': 180.9466069941715, 'y': 73.53888745577831},
   {'x': 155.46748152581182, 'y': 73.53888745577831}]},
 {'id': 'd8IE27EHC',
  'type': 'RECTANGLE',
  'tags': ['CELL_ROLLNUMBER_1'],
  'boundingBox': {'height': 61.04350201165372,
   'w

In [76]:
validationInfo = {
    'ROLLNUMBER': { 'name': 'Between 2 to 3 Digits' , 'regExp': '^[1-9][0-9]{1,3}$' , 'errorMessage': 'Should be 2 to 3 Digits', 'source': 'BACKEND_SCHOOL' },
    'HINDI_QUESTION_1': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'HINDI QUESTION 1 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'HINDI_QUESTION_2': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'HINDI QUESTION2 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'HINDI_QUESTION_3': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'HINDI QUESTION3 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'HINDI_QUESTION_4': { 'name': 'Between 0 to 10 Marks' , 'regExp': '[0-10]' , 'errorMessage': 'HINDI QUESTION4 should range from 0 to 10 Marks', 'source': 'BACKEND_EXAM' },
    'HINDI_QUESTION_5': { 'name': 'Between 0 to 10 Marks' , 'regExp': '[0-10]' , 'errorMessage': 'HINDI QUESTION5 should range from 0 to 10 Marks', 'source': 'BACKEND_EXAM' },
    'HINDI_QUESTION_6': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'HINDI QUESTION6 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'HINDI_QUESTION_7': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'HINDI QUESTION7 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'HINDI_QUESTION_8': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'HINDI QUESTION8 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'HINDI_QUESTION_9': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'HINDI QUESTION9 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'HINDI_QUESTION_TEN': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'HINDI QUESTION10 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    
    'ENGLISH_QUESTION_1': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'ENGLISH QUESTION 1 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'ENGLISH_QUESTION_2': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'ENGLISH QUESTION2 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'ENGLISH_QUESTION_3': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'ENGLISH QUESTION3 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'ENGLISH_QUESTION_4': { 'name': 'Between 0 to 10 Marks' , 'regExp': '[0-10]' , 'errorMessage': 'ENGLISH QUESTION4 should range from 0 to 10 Marks', 'source': 'BACKEND_EXAM' },
    'ENGLISH_QUESTION_5': { 'name': 'Between 0 to 10 Marks' , 'regExp': '[0-10]' , 'errorMessage': 'ENGLISH QUESTION5 should range from 0 to 10 Marks', 'source': 'BACKEND_EXAM' },
    'ENGLISH_QUESTION_6': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'ENGLISH QUESTION6 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'ENGLISH_QUESTION_7': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'ENGLISH QUESTION7 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'ENGLISH_QUESTION_8': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'ENGLISH QUESTION8 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'ENGLISH_QUESTION_9': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'ENGLISH QUESTION9 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'ENGLISH_QUESTION_TEN': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'ENGLISH QUESTION10 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },

    'MATHS_QUESTION_1': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'MATHS QUESTION 1 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'MATHS_QUESTION_2': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'MATHS QUESTION2 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'MATHS_QUESTION_3': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'MATHS QUESTION3 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'MATHS_QUESTION_4': { 'name': 'Between 0 to 10 Marks' , 'regExp': '[0-10]' , 'errorMessage': 'MATHS QUESTION4 should range from 0 to 10 Marks', 'source': 'BACKEND_EXAM' },
    'MATHS_QUESTION_5': { 'name': 'Between 0 to 10 Marks' , 'regExp': '[0-10]' , 'errorMessage': 'MATHS QUESTION5 should range from 0 to 10 Marks', 'source': 'BACKEND_EXAM' },
    'MATHS_QUESTION_6': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'MATHS QUESTION6 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'MATHS_QUESTION_7': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'MATHS QUESTION7 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'MATHS_QUESTION_8': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'MATHS QUESTION8 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'MATHS_QUESTION_9': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'MATHS QUESTION9 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'MATHS_QUESTION_TEN': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'MATHS QUESTION10 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },

    'EVS_QUESTION_1': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'EVS QUESTION 1 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'EVS_QUESTION_2': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'EVS QUESTION2 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'EVS_QUESTION_3': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'EVS QUESTION3 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'EVS_QUESTION_4': { 'name': 'Between 0 to 10 Marks' , 'regExp': '[0-10]' , 'errorMessage': 'EVS QUESTION4 should range from 0 to 10 Marks', 'source': 'BACKEND_EXAM' },
    'EVS_QUESTION_5': { 'name': 'Between 0 to 10 Marks' , 'regExp': '[0-10]' , 'errorMessage': 'EVS QUESTION5 should range from 0 to 10 Marks', 'source': 'BACKEND_EXAM' },
    'EVS_QUESTION_6': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'EVS QUESTION6 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'EVS_QUESTION_7': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'EVS QUESTION7 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'EVS_QUESTION_8': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'EVS QUESTION8 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'EVS_QUESTION_9': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'EVS QUESTION9 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'EVS_QUESTION_TEN': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'EVS QUESTION10 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    
    'SCIENCE_QUESTION_1': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'SCIENCE QUESTION 1 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'SCIENCE_QUESTION_2': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'SCIENCE QUESTION2 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'SCIENCE_QUESTION_3': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'SCIENCE QUESTION3 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'SCIENCE_QUESTION_4': { 'name': 'Between 0 to 10 Marks' , 'regExp': '[0-10]' , 'errorMessage': 'SCIENCE QUESTION4 should range from 0 to 10 Marks', 'source': 'BACKEND_EXAM' },
    'SCIENCE_QUESTION_5': { 'name': 'Between 0 to 10 Marks' , 'regExp': '[0-10]' , 'errorMessage': 'SCIENCE QUESTION5 should range from 0 to 10 Marks', 'source': 'BACKEND_EXAM' },
    'SCIENCE_QUESTION_6': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'SCIENCE QUESTION6 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'SCIENCE_QUESTION_7': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'SCIENCE QUESTION7 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'SCIENCE_QUESTION_8': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'SCIENCE QUESTION8 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'SCIENCE_QUESTION_9': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'SCIENCE QUESTION9 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' },
    'SCIENCE_QUESTION_TEN': { 'name': 'Between 0 to 5 Marks' , 'regExp': '[0-5]' , 'errorMessage': 'SCIENCE QUESTION10 should range from 0 to 5 Marks', 'source': 'BACKEND_EXAM' }

}

responseExcludeFields= ['rois','validate','render']

In [77]:
## Print Labels for rendering
printLabels={
    'CELL_ROLLNUMBER_0':'ROLL NUMBER',
    'CELL_ROLLNUMBER_4': '',
    'CELL_LABEL_0':'',
    'CELL_OBJECTIVE':'OBJECTIVE',
    'CELL_SUBJECTIVE':'SUBJECTIVE',
    'CELL_SUBJECT':'SUBJECT',
    'CELL_QUESTION_1':'1',
    'CELL_QUESTION_2':'2',
    'CELL_QUESTION_3':'3',
    'CELL_QUESTION_4':'4',
    'CELL_QUESTION_5':'5',
    'CELL_QUESTION_6':'6',
    'CELL_QUESTION_7':'7',
    'CELL_QUESTION_8':'8',
    'CELL_QUESTION_9':'9',
    'CELL_QUESTION_10':'10'
}

## Groups to construct cell and rois
tagGroups = ["CELL_ROLLNUMBER_0","CELL_ROLLNUMBER_1","CELL_ROLLNUMBER_2","CELL_ROLLNUMBER_3",
             "ROLLNUMBER","CELL_ROLLNUMBER_4",
             "CELL_LABEL_0","CELL_OBJECTIVE","CELL_SUBJECTIVE","CELL_SUBJECT",
             "CELL_QUESTION_1","CELL_QUESTION_2","CELL_QUESTION_3","CELL_QUESTION_4","CELL_QUESTION_5","CELL_QUESTION_6","CELL_QUESTION_7","CELL_QUESTION_8","CELL_QUESTION_9","CELL_QUESTION_10",
             "HINDI_QUESTION_1","HINDI_QUESTION_2","HINDI_QUESTION_3","HINDI_QUESTION_4","HINDI_QUESTION_5","HINDI_QUESTION_6","HINDI_QUESTION_7","HINDI_QUESTION_8","HINDI_QUESTION_9","HINDI_QUESTION_TEN",
            "ENGLISH_QUESTION_1","ENGLISH_QUESTION_2","ENGLISH_QUESTION_3","ENGLISH_QUESTION_4","ENGLISH_QUESTION_5","ENGLISH_QUESTION_6","ENGLISH_QUESTION_7","ENGLISH_QUESTION_8","ENGLISH_QUESTION_9","ENGLISH_QUESTION_TEN",
            "MATHS_QUESTION_1","MATHS_QUESTION_2","MATHS_QUESTION_3","MATHS_QUESTION_4","MATHS_QUESTION_5","MATHS_QUESTION_6","MATHS_QUESTION_7","MATHS_QUESTION_8","MATHS_QUESTION_9","MATHS_QUESTION_TEN",
            "EVS_QUESTION_1","EVS_QUESTION_2","EVS_QUESTION_3","EVS_QUESTION_4","EVS_QUESTION_5","EVS_QUESTION_6","EVS_QUESTION_7","EVS_QUESTION_8","EVS_QUESTION_9","EVS_QUESTION_TEN",
            "SCIENCE_QUESTION_1","SCIENCE_QUESTION_2","SCIENCE_QUESTION_3","SCIENCE_QUESTION_4","SCIENCE_QUESTION_5","SCIENCE_QUESTION_6","SCIENCE_QUESTION_7","SCIENCE_QUESTION_8","SCIENCE_QUESTION_9","SCIENCE_QUESTION_TEN"]
#get_rois(regions,"HINDI_QUESTION_1")
#printLabels[str("CELL_SUBJECT")]

cells=get_cells(regions,tagGroups,validationInfo,printLabels)
cells
pp_json(get_layout(cells,responseExcludeFields))

{
    "layout": {
        "cells": [
            {
                "cellId": 1,
                "format": {
                    "name": "CELL_ROLLNUMBER_0",
                    "value": "CELL ROLLNUMBER 0"
                },
                "render": {
                    "alignment": "center",
                    "index": 1,
                    "text": "ROLL NUMBER"
                },
                "rois": [
                    {
                        "annotationId": "av3CIJxCK",
                        "annotationTag": "CELL_ROLLNUMBER_0",
                        "extractionMethod": "",
                        "index": 0,
                        "rect": {
                            "bottom": 85,
                            "left": 17,
                            "right": 142,
                            "top": 23
                        },
                        "roiId": 1
                    }
                ],
                "validate": {
                    "errorMessage":