# notebook for processing "lonely" L0 images

This is a notebook for processing L0 images that didn't get reduced. This is a
boring notebook that is unlikely to present any serious performance
considerations or, indeed, generate interest of any type. Most of these files
are very small; they are generally from observations that were interrupted
quite early for some reason or other -- there's a reason the M3 team didn't
bother to reduce them.

This notebook's structure copies
[triplet_processor.ipynb](triplet_processor.ipynb). This results in it looking
kind of convoluted for something that converts individual files -- lots of
single-element iterables.

In [1]:
import datetime as dt
from types import MappingProxyType

import fs.path
import pandas as pd
from pvl.decoder import ParseError
import sh

from m3_bulk import basenamer, crude_time_log, make_m3_singlet, \
    fix_end_object_tags, m3_singlet_bundle_paths
from m3_conversion import M3L0Converter

In [2]:
L0_EXTS = (".LBL", ".HDR", ".IMG")


In [3]:
# root directories of PDS3 and PDS4 data sets respectively
input_dir = '/home/ubuntu/m3_input/'
output_dir = '/home/ubuntu/m3_output/'

# our directory of file mappings, grouped into m3 basename clusters
file_mappings = pd.read_csv('./directories/m3/m3_data_mappings.csv')
file_mappings["basename"] = file_mappings["filepath"].apply(basenamer)
basename_groups = list(file_mappings.groupby("basename"))

# what kind of files does each pds4 product have?
# paths to the locally-written versions are stored in the relevant attributes of 
# the associated PDSVersionConverter instance.
pds4_filetypes = MappingProxyType({
    'l0': ('pds4_label_file', 'clock_file', 'fits_image_file'),
    'l1b': ('pds4_label_file', 'loc_file', 'tim_file', 'rdn_file', 'obs_file'),
    'l2': ('pds4_label_file', 'sup_file', 'rfl_file')
})

In [4]:
# the triplets: what we are NOT converting here.
reduced_groups = [group for group in basename_groups if len(group[1]) == 3]

# the lonesome EDR images: what we ARE converting here.
edr_groups = [group for group in basename_groups if len(group[1]) == 1]



# just making darn sure with MappingProxyType that our constructors don't get edited
# somehow in the middle. this is really only done this way for pattern parity with
# triplet_processor.ipynb.
converters = MappingProxyType({
    'l0': M3L0Converter,
})
writers = {}  # empty dict to hold instances of the converter classes


In [5]:
# initialize iteration
lonely_enumerator = enumerate(edr_groups)
# these indices are for: restarting some way into the process after an error
# or for breaking this up between concurrently-running processes. this
# notebook was never parallelized and doesn't really need to be.
start_ix = 0
stop_ix = 80000 # i.e., infinity

In [None]:
product_type = 'l0'
for ix, group in lonely_enumerator:
    if ix < start_ix:
        continue
    if ix >= stop_ix:
        break
    print(ix, len(edr_groups))
    singlet_start_time = dt.datetime.now()

    # organize the singlet and get its data from s3
    group_files = make_m3_singlet(group)
    print("beginning product conversion")
    # read the PDS3 product and perform file conversions 
    local_pds3_label_file = input_dir + fs.path.split(group_files[product_type][0])[0]
    try:
        writers[product_type] = converters[product_type](
            local_pds3_label_file, suppress_warnings=True, clean=True, 
            template="./labels/m3/l0_lonely_template.xml"
        )
    except ParseError: # fix broken END_OBJECT tags in some of the target-mode files  
        print("fixing broken END_OBJECT tags")
        fix_end_object_tags(local_pds3_label_file)
        writers[product_type] = converters[product_type](
            local_pds3_label_file, suppress_warnings=True, clean=True, 
            template="./labels/m3/l0_lonely_template.xml"
        )

        # what are the correct outpaths (relative to the root of the pds4 bundle) for these products?
        bundle_paths = m3_singlet_bundle_paths(group)
        # write PDS4 label and product files
        # don't actually need to shave the extra / here but...
        # this would be more safely rewritten with PyFilesystem
        # (see clem-conversion)
        output_path = output_dir + bundle_paths[product_type][1:]
        sh.mkdir("-p", output_path)
        writers[product_type].write_pds4(
            output_path, write_product_files=True, clean=True
        )
    # delete input files (for efficiency!)
    for file in map(
            lambda filename: fs.path.split(filename)[0],
            group_files[product_type]
    ):
        sh.rm(input_dir+file)

    # occasionally (slow but very useful) spot-check with validate tool
    # note that this just invokes a one-line script at /usr/bin/validate
    # that links to the local install of the PDS Validate Tool; this
    # allows us to avoid throwing java stuff all over our environment
    if ix % 50 == 1:
        print("1-mod-50th singlet: running Validate Tool")
        validate_results = sh.validate("-t", writers[product_type].pds4_label_file)
        with open("validate_dump.txt", "a") as file:
            file.write(validate_results.stdout.decode())   
        print("validated successfully")

    # log transfer crudely
        crude_time_log(
        "m3_data_conversion_log_lonely.csv",
        writers[product_type],
        str((dt.datetime.now() - singlet_start_time).total_seconds())
    )

594 622
beginning download of M3T20090420T161925
beginning product conversion
fixing broken END_OBJECT tags
Converting ./remote/m3_input_7/M3T20090420T161925_V01_L0.LBL to PDS4.
Writing PDS4 label to ./remote/m3_output_7/m3t20090420t161925_l0.xml
beginning S3 upload.
done with this singlet; total seconds 19.480803
595 622
beginning download of M3T20090421T020859
beginning product conversion
fixing broken END_OBJECT tags
Converting ./remote/m3_input_7/M3T20090421T020859_V01_L0.LBL to PDS4.
Writing PDS4 label to ./remote/m3_output_7/m3t20090421t020859_l0.xml
beginning S3 upload.
done with this singlet; total seconds 30.72032
596 622
beginning download of M3T20090421T040659
beginning product conversion
fixing broken END_OBJECT tags
Converting ./remote/m3_input_7/M3T20090421T040659_V01_L0.LBL to PDS4.
Writing PDS4 label to ./remote/m3_output_7/m3t20090421t040659_l0.xml
beginning S3 upload.
done with this singlet; total seconds 31.369978
597 622
beginning download of M3T20090421T060459
begi

done with this singlet; total seconds 105.048354
620 622
beginning download of M3T20090707T161722
beginning product conversion
fixing broken END_OBJECT tags
Converting ./remote/m3_input_7/M3T20090707T161722_V01_L0.LBL to PDS4.
