# EDR conversion handler

Top-level handler Notebook for converting EDRs to PDS4.

Note: paths should be changed to reflect the actual locations of the input PDS3 products on your system.


In [None]:
from multiprocessing import Pool
from pathlib import Path
import re
import time
import warnings

from cytoolz import frequencies
import pandas as pd
from pdr.pdr import DuplicateKeyWarning

from utilz import index_breadth_first, print_inline
from vo_edr_conversion import VikingEDRConverter, VikingEDRBrowseWriter

output_root = Path("/datascratch/viking/scratch_write/")

In [None]:
# build an index of the source EDRs
edrs = pd.DataFrame(index_breadth_first('/datascratch/viking/edr/'))
img_files = edrs.loc[edrs['path'].str.endswith('IMG')].copy()

In [None]:
# some EDRs, as described in source errata, have multiple versions.
# this lists their identifiers and describes which volume they first appeared on.
# check_errata_status, below, will add a version identifier to their filenames/LIDs.
errata_edrs = (
    {'id': "216S17", 'first': 3},
    {'id': "217S21", 'first': 3},
    {'id': "353S01", 'first': 5},
    {'id': "039A29", 'first': 11},
    {'id': "057A52", 'first': 11},
    {'id': "448S21", 'first': 8}
)

def check_errata_status(converter, collection="data"):
    if converter.data.metaget('IMAGE_ID') not in [rec['id'] for rec in errata_edrs]:
        return (
            converter,
            Path(
                output_root, 
                collection, 
                "edr", 
                str(converter.associations['orbit_number']).zfill(4)
            )
        )
    first = [
        rec['first'] 
        for rec in errata_edrs 
        if rec['id'] == converter.data.metaget('IMAGE_ID')
    ][0]
    # note: relies on having the volume # in the path!
    is_first = int(re.search(r"vo_\d\d(\d\d)", converter.data.filename).group(1)) == first
    if is_first:
        converter.output_stem += "_v1"
        converter.associations['pds4_lid'] += '_v1'
    else:
        converter.output_stem += "_v2"
        converter.associations['pds4_lid'] += '_v2'
    return converter, Path(output_root, collection, "edr", "errata")

In [None]:
def convert_edr(edr_file, write_browse=True):
    """
    handler function for converting a single product. constructs a VikingEDRConverter
    and uses it to write a PDS4 data product; then uses that converter to construct
    a VikingEDRBrowseWriter to write an associated browse product.
    """
    warnings.simplefilter("ignore", category=DuplicateKeyWarning)
    converter = VikingEDRConverter(edr_file, search_paths="format_files")
    converter, output_directory = check_errata_status(converter)
    output_directory.mkdir(parents=True, exist_ok=True)
    for obj in converter.object_names:
        converter.write_file(obj, output_directory)
    converter.convert_label()
    converter.write_label(output_directory)
    if write_browse is True:
        browse = VikingEDRBrowseWriter(converter)
        browse, browse_output_directory = check_errata_status(browse, "browse")
        browse_output_directory.mkdir(parents=True, exist_ok=True)
        browse.write_file("image", browse_output_directory)
        browse.convert_label()
        browse.write_label(browse_output_directory)
    return 0

In [None]:
# execute convert_edr in parallel across the input products.
pool = Pool(5)
results = {}
for ix, file in enumerate(img_files['path']):
    results[file] = pool.apply_async(convert_edr, (file, False))
pool.close()
ready = {}
while len(ready) < len(results):
    print_inline(f"{len(ready)}/{len(results)}")
    ready = [k for k, v in results.items() if v.ready()]
    time.sleep(1)

In [None]:
# retrieve error/success messages from the completed processes
final = {}
for k, v in results.items():
    try:
        final[k] = v.get()
    except KeyboardInterrupt:
        raise
    except Exception as ex:
        final[k] = ex
pool.terminate()
broken = {
    k: v for k, v in final.items() if isinstance(v, Exception)
}
# show error messages (if any) along with their frequencies of occurrence
frequencies(map(str, broken.values()))