In [None]:
### imports

# external modules
import os
import sys
import glob
import json
import numpy as np
import tarfile as tar
import importlib

# local modules
thisdir = os.getcwd()
topdir = os.path.abspath(os.path.join(thisdir, '../../'))
sys.path.append(topdir)

import automasking.tools.automask_file_parsing
importlib.reload(automasking.tools.automask_file_parsing)
import automasking.tools.automask_operations
importlib.reload(automasking.tools.automask_operations)
from automasking.tools.automask_file_parsing import get_automask_descriptions_from_txt
from automasking.tools.automask_file_parsing import get_automask_from_description
from automasking.tools.automask_file_parsing import get_automask_from_txt
from automasking.tools.automask_file_parsing import automask_to_map
from automasking.tools.automask_file_parsing import map_to_automask
from automasking.tools.automask_file_parsing import automask_to_description
from automasking.tools.automask_file_parsing import get_automask_from_tarfile
from automasking.tools.automask_operations import simplify_automask

**Part 1: test on a single example file**

In [None]:
# find and read an example tar file

tarfile_idx = 135
txtfile_idx = 10

# find a tar file
inputdir = '/eos/project-m/mlplayground/public/PixelAutomasking/PixelAutoMasking/2024'
pattern = os.path.join(inputdir, '**/*.tar.xz')
tarfiles = glob.glob(pattern, recursive=True)
print(f'Found {len(tarfiles)} tar files in {inputdir}.')
tarfile = tarfiles[tarfile_idx]

# read it
with tar.open(tarfile) as f:
    # pick one of its members
    print(f'Found {len(f.getnames())} elements in {tarfile}.')
    name = f.getnames()[txtfile_idx]
    member = f.getmembers()[txtfile_idx]
    memberf = f.extractfile(member)
    content = memberf.readlines()
    content = [l.decode('utf-8') for l in content]
    
# print contents
print('Found following content:')
for line in content: print(line.strip('\n'))

In [None]:
# get the automask descriptions

automask_descriptions = get_automask_descriptions_from_txt(content)
automask_descriptions = [d for d in automask_descriptions if not d.startswith('FPix')] # filter out since not implemented yet
for automask_description in automask_descriptions: print(automask_description)

In [None]:
# get the automasks in ladder/module/ROC coordinates

automasks = [get_automask_from_description(d) for d in automask_descriptions]
for d, m in zip(automask_descriptions, automasks):
    print(f'{d} -> {m}')

In [None]:
# same as above but group per layer

automasks = get_automask_from_txt(content)
print(json.dumps(automasks))

In [None]:
# get maps

automask_maps = {key: automask_to_map(val, subsystem=key) for key,val in automasks.items()}

In [None]:
# convert maps back to ladder/module/ROC coordinates

automasks_check = {key: map_to_automask(val) for key,val in automask_maps.items()}
print(automasks_check)

In [None]:
# convert back to description

descriptions_check = []
for key, val in automasks_check.items():
    layer = int(key[-1])
    for automask in val:
        automask = [layer] + automask
        descriptions_check.append(automask_to_description(automask))
print('Originals:')
for automask_description in automask_descriptions: print(automask_description)
print('Check:')
for d in descriptions_check: print(d)

**Part 2: check self-consistency on everything**

In [None]:
# find and sort all tar files

# set input directory
inputdir = '/eos/project-m/mlplayground/public/PixelAutomasking/PixelAutoMasking/2024'

# find all .tar.xz files recursively
pattern = os.path.join(inputdir, '**/*.tar.xz')
tarfiles = glob.glob(pattern, recursive=True)
print(f'Found {len(tarfiles)} tar files in {inputdir}.')

# sort them according to date
tarfiles.sort(key=lambda x: os.path.basename(x))

In [None]:
# perform self-consistency check on all files

# initialize some counters
num_timestamps = 0
num_automasked_rocs = 0

# loop over files
for idx, tarfile in enumerate(tarfiles[:10]):
    print(f'Checking tar file {idx+1} / {len(tarfiles)}...', end='\r')
    
    # read automasks
    automask_info = get_automask_from_tarfile(tarfile)
    
    # loop over timestamps
    for timestamp, automasks_dict in automask_info.items():
        num_timestamps += 1
        
        # loop over pixel layers
        for pxlayer, automasks in automasks_dict.items():
            
            # convert automask to map
            automask_map = automask_to_map(automasks, subsystem=pxlayer)
            num_automasked_rocs += np.sum(automask_map.astype(int))
        
            # convert back
            automasks_check = map_to_automask(automask_map)
        
            # check if they are equal
            automasks_formatted = sorted(simplify_automask(automasks))
            automasks_check_formatted = sorted(automasks_check)
            if automasks_formatted != automasks_check_formatted:
                print(f'WARNING: found discrepancy in {tarfile}, {timestamp}, {pxlayer}:')
                print(f'  {automasks} (-> {automasks_formatted})')
                print(f'  {automasks_check} (-> {automasks_check_formatted})')

# print the results
print(f'Checked {num_timestamps} automask files.')
print(f'Checked {num_automasked_rocs} automasked ROCs.')