# notebook for writing mosaic browse products and checking membership

Extremely simple. Just goes through the PDS3 archive,
picks the biggest browse image or images associated with
each tile, populates a simple label template or templates,
and moves them to a copy of the mosaic directory structure.

Also includes some simple membership checks for the mosaic
portions of the data collection -- not a core part of the
conversion process, just a goodie.

Throughput and IOPS are both plausible limiting factors --
IOPS for the HIRES mosaic in particular -- but I never 
bothered optimizing this part of the process and have no 
real tips.

In [None]:
import fs.copy
from fs.osfs import OSFS
import numpy as np
import pandas as pd

from clem_conversion import ClemBrowseWriter

In [None]:
# products in the basemap data set that are unique, or nearly so,
# and so not handled by ClemBrowseWriter -- labels for these products
# were manually written.
NONSTANDARD_MOSAIC_PRODUCT_IDS = [
    'basemap_emission_farside', 'basemap_emission_nearside',
    'basemap_farside', 'basemap_incidence_farside',
    'basemap_incidence_nearside', 'basemap_nearside',
    'basemap_north_pole', 'basemap_phase_farside',
    'basemap_phase_nearside', 'basemap_planetwide',
    'basemap_south_pole'
]

In [None]:
# root directory of the PDS3 archive
input_fs = OSFS('/home/ubuntu/buckets/clem_input/')
# root directory of the PDS4 bundle
output_fs = OSFS('/home/ubuntu/buckets/clem_output/')

In [None]:
# membership checks

# index of source mosaic files and our new locations for them
mosaic_manifest = pd.read_csv('./directories/clementine/mosaic_product_index.csv')

label_files = list(output_fs.walk.files(filter=['*.xml']))
extant_products = pd.DataFrame({
    'pds4_path': [fs.path.split(file)[0] for file in label_files],
    'pds4_product_id': [fs.path.split(file)[1][:-4] for file in label_files]
})
extant_products.columns = ['pds4_path', 'pds4_product_id']

weird_additions = extant_products.loc[
    np.logical_not(extant_products['pds4_product_id'].isin(mosaic_manifest['pds4_product_id']))
]
not_in_there = mosaic_manifest.loc[
    np.logical_not(mosaic_manifest['pds4_product_id'].isin(extant_products['pds4_product_id']))
]
assert len(not_in_there) == 0
assert all(weird_additions['pds4_product_id'].isin(NONSTANDARD_MOSAIC_PRODUCT_IDS))

In [None]:
# pick the big basemap jpegs
basemap_df = mosaic_manifest.loc[mosaic_manifest['pds3_path'].str.startswith('cl_30')].copy()
basemap_df['browse'] = basemap_df['pds3_path'].str.slice(0,8) \
    + 'browse/large/' + basemap_df['pds3_path'].str.slice(8,-4) + '.jpg'

In [None]:
# pick the big hires jpegs
hires_df = mosaic_manifest.loc[mosaic_manifest['pds3_path'].str.startswith('cl_60')].copy()
hires_df['browse'] = hires_df['pds3_path'].str.slice(0,-12) \
    + 'browse/' + hires_df['pds3_path'].str.slice(-12,-4) + '.jpg'

In [None]:
# pick the big uvvis jpegs of each type
uvvis_df = mosaic_manifest.loc[mosaic_manifest['pds3_path'].str.startswith('cl_40')].copy()
uvvis_df['browse_bw'] = uvvis_df['pds3_path'].str.slice(0,8) \
    + 'browse/750nm/large/' + uvvis_df['pds3_path'].str.slice(-12,-4) + '.jpg'
uvvis_df['browse_color'] = uvvis_df['pds3_path'].str.slice(0,8) \
    + 'browse/color/large/' + uvvis_df['pds3_path'].str.slice(-12,-4) + '.jpg'
uvvis_df['browse_ratio'] = uvvis_df['pds3_path'].str.slice(0,8) \
    + 'browse/ratio/large/' + uvvis_df['pds3_path'].str.slice(-12,-4) + '.jpg'

In [None]:
# pick the big nir jpegs of each type
nir_df = mosaic_manifest.loc[mosaic_manifest['pds3_path'].str.startswith('cl_50')].copy()
nir_df['browse_bw'] = nir_df['pds3_path'].str.slice(0,8) \
    + 'browse/2000nm/large/' + nir_df['pds3_path'].str.slice(-12,-4) + '.jpg'
nir_df['browse_color'] = nir_df['pds3_path'].str.slice(0,8) \
    + 'browse/color/large/' + nir_df['pds3_path'].str.slice(-12,-4) + '.jpg'
nir_df['browse_ratio'] = nir_df['pds3_path'].str.slice(0,8) \
    + 'browse/ratio/large/' + nir_df['pds3_path'].str.slice(-12,-4) + '.jpg'

In [None]:
# move basemap browse jpegs & write labels
for tile in basemap_df.itertuples():
    output_path = 'browse/' + tile.pds4_path[6:]
    output_fs.makedirs(output_path, recreate=True)
    fs.copy.copy_file(input_fs, tile.browse, output_fs, output_path + tile.pds4_product_id + "_browse.jpg")
    ClemBrowseWriter(tile.pds4_product_id, "basemap").write_pds4(output_fs.getsyspath(output_path))

In [None]:
# move uvvis browse jpegs & write labels=
for tile in uvvis_df.itertuples():
    if 'phase' in tile.pds4_product_id:
        continue
    output_path = 'browse/' + tile.pds4_path[6:]
    output_fs.makedirs(output_path, recreate=True)
    for image_type in ['bw', 'color', 'ratio']:
        fs.copy.copy_file(
            input_fs, 
            getattr(tile, 'browse_' + image_type), 
            output_fs, 
            output_path + tile.pds4_product_id + "_" + image_type + ".jpg"
        )
        ClemBrowseWriter(
            tile.pds4_product_id, "uvvis", image_type
        ).write_pds4(output_fs.getsyspath(output_path))

In [None]:
# move nir browse jpegs & write labels
for tile in nir_df.itertuples():
    output_path = 'browse/' + tile.pds4_path[6:]
    output_fs.makedirs(output_path, recreate=True)
    for image_type in ['bw', 'color', 'ratio']:
        fs.copy.copy_file(
            input_fs, 
            getattr(tile, 'browse_' + image_type), 
            output_fs, 
            output_path + tile.pds4_product_id + "_" + image_type + ".jpg"
        )
        ClemBrowseWriter(
            tile.pds4_product_id, "nir", image_type
        ).write_pds4(output_fs.getsyspath(output_path))

In [None]:
# move hires browse jpegs & write labels
for tile in hires_df.itertuples():
    if not tile.pds3_product_id.startswith('g'):
        continue
    output_path = 'browse/' + tile.pds4_path[6:]
    output_fs.makedirs(output_path, recreate=True)
    fs.copy.copy_file(input_fs, tile.browse, output_fs, output_path + tile.pds4_product_id + "_browse.jpg")
    ClemBrowseWriter(tile.pds4_product_id, "hires").write_pds4(output_fs.getsyspath(output_path))