# DTM conversion handler

Top-level handler Notebook for converting DTMs to PDS4.

Note: paths should be changed to reflect the actual locations of the input PDS3 products on your system.

In [None]:
from multiprocessing import Pool
from pathlib import Path
import time
import warnings

from cytoolz import frequencies
import numpy as np
import pandas as pd
from pdr.pdr import DuplicateKeyWarning

from utilz import index_breadth_first, make_edr_lidmap, print_inline
from vo_conversion import VikingDIMConverter, VikingDIMBrowseWriter

output_root = Path("/datascratch/viking/scratch_write/")

In [None]:
# build an index of the source DTMs
dtm_df = pd.DataFrame(
    index_breadth_first("/datascratch/viking/vo1_vo2-m-vis-5-dtm-v1.0/")
)
dtm = dtm_df.loc[dtm_df['path'].str.contains('/t')]
dtm = dtm.loc[dtm['path'].str.endswith('img')].reset_index(drop=True)
dtm = dtm.drop(
    columns=['excluded', 'directory', 'ATIME', 'CTIME', 'MTIME']
).copy()

In [None]:
# assign resolution codes and latitude bins to input products
# in order to construct the output directory tree
resmap = {
    'c': '0004',
    'e': '0016',
    'g': '0064',
    'i': '0256',
    'j': '0512',
    'k': '1024'
}
# note that, unlike other data sets, the dtm set contains
# no dupe / superseded / errata versions.
paths = dtm['path'].map(lambda p: Path(p))
dtm['name'] = [p.name for p in paths]
dtm['stem'] = [p.stem for p in paths]
dtm['dtype'] = dtm['name'].str.slice(0, 1)
dtm['res'] = dtm['name'].str.slice(1, 2)
latsign = dtm['name'].str.slice(4, 5)
lat = dtm['name'].str.slice(2, 4).astype(int)
dtm['lat_bin'] = (
    np.floor(lat / 10) * 10
).astype(int).astype(str).str.zfill(2) + latsign
parts = dtm['path'].str.split("/", expand=True)

In [None]:
# index the EDRs to associate map-projected products with their source products.
EDR_ROOT = '/datascratch/viking/scratch_write/data/edr'
edr_lidmap = make_edr_lidmap(EDR_ROOT)

In [None]:
# a handful of polar tiles have incorrectly-specified IMAGE_ID 
# (does not match file and is at the wrong pole.) fix this.
def check_image_id(converter):
    if Path(converter.data.filename).name.startswith(
        converter.data.metaget('IMAGE_ID').lower()
    ):
        return converter
    converter._make_associations()
    converter.associations['product_id'] = converter.output_stem
    return converter

def convert_dtm(row, write_browse=True):
    """
    handler function for converting a single product. constructs a VikingDIMConverter
    and uses it to write a PDS4 data product; then uses that converter to construct
    a VikingDIMBrowseWriter to write an associated browse product. Note that the
    same objects are used for the single-band DIMs and the DTMs (their formats
    are very similar).
    """
    warnings.simplefilter("ignore", category=DuplicateKeyWarning)
    warnings.simplefilter("error", category=RuntimeWarning)
    converter = VikingDIMConverter(row['path'], edr_lidmap)
    output_directory = Path(output_root, "data", "dtm")
    output_directory = Path(output_directory, resmap[row['res']], row['lat_bin'])
    output_directory.mkdir(parents=True, exist_ok=True)
    converter.write_file('image', output_directory)
    converter.convert_label()
    converter.write_label(output_directory)
    if write_browse is True:
        browse = VikingDIMBrowseWriter(converter)
        browse_output_directory = Path(str(output_directory).replace("/data/", "/browse/"))
        browse_output_directory.mkdir(parents=True, exist_ok=True)
        browse.write_file("image", browse_output_directory)
        browse.convert_label()
        browse.write_label(browse_output_directory)
    return 0

In [None]:
# execute convert_dtm in parallel across the input products
pool = Pool(5)
results = {}
for ix, row in dtm.iterrows():
    results[ix] = pool.apply_async(convert_dtm, (row, True))
pool.close()
ready = {}
while len(ready) < len(results):
    print_inline(f"{len(ready)}/{len(results)}")
    ready = [k for k, v in results.items() if v.ready()]
    time.sleep(1)
print_inline(f"{len(ready)}/{len(results)}")

In [None]:
# retrieve error/success messages from the completed processes
final = {}
for k, v in results.items():
    try:
        final[k] = v.get()
    except KeyboardInterrupt:
        raise
    except Exception as ex:
        final[k] = ex
pool.terminate()
broken = {
    k: v for k, v in final.items() if isinstance(v, Exception)
}
# show error messages (if any) along with their frequencies of occurrence
frequencies(map(str, broken.values()))