**DECLARE ALL FUNCTIONS**

In [11]:
import json, re

def loadFile(path: str):

    with open(path, 'r', encoding='utf-8') as file:
        return json.loads(file.read())
    
def checkIndent(previous_hts_record_INDENT: int, record_INDENT: int) -> str:

        if previous_hts_record_INDENT == record_INDENT:
             return 'EQUAL to previous indent'
        elif previous_hts_record_INDENT > record_INDENT:
             return 'LESS than previous indent'
        elif previous_hts_record_INDENT < record_INDENT:
             return 'GREATER than previous indent'

def checkNextHTS(next_hts_record: dict[str, any], record: dict[str, any]) -> str | bool:

     if next_hts_record['htsno'] != '':
          return next_hts_record['htsno']
     else:
          return False

def checkIndentFollowEH(currentEH: dict[str, any], previousEH: dict[str, any]) -> str:

     if currentEH['indent'] == previousEH['indent']:
          return 'EQUAL'
     elif currentEH['indent'] > previousEH['indent']:
          return 'DOWN'
     elif currentEH['indent'] < previousEH['indent']:
          return 'TOP'
     
def countSuperiorRecrods(htsfile: list[dict[str, any]]):
     countSup = 0
     for record in htsfile:
          if record['superior'] == 'true':
               countSup += 1

     print(f'Count of records: {len(htsfile)}\nCount of superior true records: {countSup}')

**MAIN FUNCTION**

Create the EH Record Review object where we see all the EH records with their respective relevant info.

In [12]:
def createEHRecordsReview(htsfile: list[dict[str, any]]) -> list[dict[str, any]]:

    empty_hts_review = []
    review_index = 0
    prev_EH = False

    for index, record in enumerate(htsfile):

        if record['htsno'] == '' and prev_EH == False:
            
            previous_hts_record = htsfile[index-1]
            next_hts_record = checkNextHTS(htsfile[index + 1], record)

            empty_hts_review.append({
                'recordIndexes': [index],
                'previous': previous_hts_record['htsno']
            })
            if next_hts_record: empty_hts_review[review_index]['next'] = next_hts_record
            empty_hts_review[review_index]['TYPE'] = checkIndent(previous_hts_record['indent'], record['indent'])

            review_index += 1
            prev_EH = True

        elif record['htsno'] == '' and prev_EH == True:

            try:
                next_hts_record = checkNextHTS(htsfile[index + 1], record)
                if next_hts_record: empty_hts_review[review_index-1]['next'] = next_hts_record
            except Exception as e:
                print(f'Found exception in record index: {index}')
                print(e)

            empty_hts_review[review_index-1]['recordIndexes'].append(index)
            empty_hts_review[review_index-1]['EH_escaling'] = checkIndentFollowEH(record, htsfile[index-1])

        elif record['htsno'] != '':

            prev_EH = False

    return empty_hts_review

**MAIN FUNCTION**

Create the EH Followup Review object related to EH recrods with followup EH records and how they are grouped.

In [13]:
def EHFollowupReview(empty_hts_review: list[dict[str, any]]) -> list[dict[str, any]]:
    review_follow_EH_records = {

    'countFollowEH': 0,
    'DOWN': {
        'count': 0,
        'records': []
    },
    'EQUAL': {
        'count': 0,
        'records': []
    },
    'TOP': {
        'count': 0,
        'records': []
    }
    }

    for record in empty_hts_review:

        if len(record['recordIndexes']) > 1:
            review_follow_EH_records['countFollowEH'] += 1

            if record['EH_escaling'] == 'EQUAL':
                review_follow_EH_records['EQUAL']['count'] += 1
                review_follow_EH_records['EQUAL']['records'].append(record)
            elif record['EH_escaling'] == 'DOWN':
                review_follow_EH_records['DOWN']['count'] += 1
                review_follow_EH_records['DOWN']['records'].append(record)
            elif record['EH_escaling'] == 'TOP':
                review_follow_EH_records['TOP']['count'] += 1
                review_follow_EH_records['TOP']['records'].append(record)

    return review_follow_EH_records

**EXECUTION**

In [14]:
htsfile = loadFile('../../db_hts/htsdata/htsdata.json')
htsfile[36]

{'htsno': '',
 'indent': '5',
 'description': 'For immediate slaughter:',
 'superior': 'true',
 'units': [],
 'general': '',
 'special': '',
 'other': '',
 'footnotes': None,
 'quotaQuantity': None,
 'additionalDuties': None,
 'addiitionalDuties': None}

In [15]:
countSuperiorRecrods(htsfile)

empty_hts_review = createEHRecordsReview(htsfile)

Count of records: 35744
Count of superior true records: 6001


In [16]:
empty_hts_review

[{'recordIndexes': [1],
  'previous': '0101',
  'next': '0101.21.00',
  'TYPE': 'GREATER than previous indent'},
 {'recordIndexes': [13],
  'previous': '0102',
  'next': '0102.21.00',
  'TYPE': 'GREATER than previous indent'},
 {'recordIndexes': [15],
  'previous': '0102.21.00',
  'next': '0102.21.00.10',
  'TYPE': 'GREATER than previous indent'},
 {'recordIndexes': [18],
  'previous': '0102.21.00.20',
  'next': '0102.21.00.30',
  'TYPE': 'LESS than previous indent'},
 {'recordIndexes': [26],
  'previous': '0102.29.40',
  'next': '0102.29.40.24',
  'TYPE': 'GREATER than previous indent'},
 {'recordIndexes': [29],
  'previous': '0102.29.40.28',
  'next': '0102.29.40.34',
  'TYPE': 'LESS than previous indent'},
 {'recordIndexes': [32],
  'previous': '0102.29.40.38',
  'next': '0102.29.40.54',
  'TYPE': 'LESS than previous indent'},
 {'recordIndexes': [35, 36],
  'previous': '0102.29.40.58',
  'TYPE': 'LESS than previous indent',
  'next': '0102.29.40.62',
  'EH_escaling': 'DOWN'},
 {'rec

**CHECKING THE FOLLOW EH RECORDS (MEANING EH RECORDS THAT ARE FOLLOWED BY OTHER EH RECORDS)**

This will be done to check the number of follow EH records and the types present, as well as how many are there.

In [17]:
eh_followup_review = EHFollowupReview(empty_hts_review)

eh_followup_review

{'countFollowEH': 687,
 'DOWN': {'count': 685,
  'records': [{'recordIndexes': [35, 36],
    'previous': '0102.29.40.58',
    'TYPE': 'LESS than previous indent',
    'next': '0102.29.40.62',
    'EH_escaling': 'DOWN'},
   {'recordIndexes': [53, 54],
    'previous': '0102.39.00.10',
    'TYPE': 'EQUAL to previous indent',
    'next': '0102.39.00.24',
    'EH_escaling': 'DOWN'},
   {'recordIndexes': [138, 139],
    'previous': '0201.20',
    'TYPE': 'GREATER than previous indent',
    'next': '0201.20.02.00',
    'EH_escaling': 'DOWN'},
   {'recordIndexes': [143, 144],
    'previous': '0201.20.06.00',
    'TYPE': 'LESS than previous indent',
    'next': '0201.20.10.00',
    'EH_escaling': 'DOWN'},
   {'recordIndexes': [160, 161],
    'previous': '0201.30',
    'TYPE': 'GREATER than previous indent',
    'next': '0201.30.02.00',
    'EH_escaling': 'DOWN'},
   {'recordIndexes': [165, 166],
    'previous': '0201.30.06.00',
    'TYPE': 'LESS than previous indent',
    'next': '0201.30.10.00

In [18]:
eh_followup_review

with open('../hts_algorithm_testing/empty_followup_records.json', 'w') as file:

    json.dump(eh_followup_review, file, indent=4)
