In [7]:
import os, re, sys

# stage_in packages
from unity_sds_client.resources.collection import Collection

# stage_out packages
from datetime import datetime, timezone
from unity_sds_client.resources.dataset import Dataset
from unity_sds_client.resources.data_file import DataFile

# Import Files from STAC Item Collection

Load filenames from the stage_in STAC item collection file

In [8]:
input_stac_collection_file = 'test/stage_in/catalog.json' # type: stage-in
output_stac_catalog_dir    = 'process_results'                    # type: stage-out

output_filename_prefix = 'test_output'

In [9]:
inp_collection = Collection.from_stac(input_stac_collection_file)
data_filenames = inp_collection.data_locations()

data_filenames

if2


['/unity/ads/input_collections/EMIT_TEST/EMIT_L1B/EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc',
 '/unity/ads/input_collections/EMIT_TEST/EMIT_L1B/EMIT_L1B_OBS_001_20230620T084426_2317106_011.nc',
 '/unity/ads/input_collections/EMIT_TEST/EMIT_L2A/EMIT_L2A_RFL_001_20230620T084426_2317106_011.nc',
 '/unity/ads/input_collections/EMIT_TEST/EMIT_L2A/EMIT_L2A_RFLUNCERT_001_20230620T084426_2317106_011.nc',
 '/unity/ads/input_collections/EMIT_TEST/EMIT_L2A/EMIT_L2A_MASK_001_20230620T084426_2317106_011.nc']

# Grab input files for GHG process

Pass appropriate nc files into the ghg_process.py call

In [10]:
# need to add some robustness code and/or basic exception handling to deal with possibly mal-formed input catalog
r = re.compile('.*L1B_RAD*')
l1b_rad_filename = list(filter(r.match, data_filenames))[0]
r = re.compile('.*L1B_OBS*')
l1b_obs_filename = list(filter(r.match, data_filenames))[0]
r = re.compile('.*L2A_MASL*')
l2a_mask_filename = list(filter(r.match, data_filenames))[0]

#print(l1b_rad_filename)
#print(l1b_obs_filename)
#print(l2a_mask_filename)

# Create output directory
Create directory to store output files, it will be held locally at the path specified by the stage-out variable "output_stac_catalog_dir"

In [11]:
if not os.path.exists(output_stac_catalog_dir):
    os.system('mkdir ' + output_stac_catalog_dir)



# Run the GHG process
Pass appropriate nc files into the ghg_process.py call

In [None]:
sys.argv = ['ghg_process.py', l1b_rad_filename, l1b_obs_filename, l1b_rad_filename, 'blah.img', l2a_mask_filename, l2a_mask_filename, output_stac_catalog_dir+output_filename_prefix, '--state_subs', l2a_mask_filename, '--overwrite']
exec(open("./ghg_process.py").read())

INFO:2024-09-05,15:24:30 ||| starting parallel mf
INFO:2024-09-05,15:24:30 ||| Started processing input file: "/unity/ads/input_collections/EMIT_TEST/EMIT_L1B/EMIT_L1B_RAD_001_20230620T084426_2317106_011.nc"
INFO:2024-09-05,15:24:34 ||| Active wavelength range: [500.0, 2450.0]: 255 channels
INFO:2024-09-05,15:24:34 ||| load target library
INFO:2024-09-05,15:24:34 ||| Create output file, initialized with nodata
2024-09-05 15:24:34,399	INFO worker.py:1453 -- Calling ray.init() again after it has already been called.
INFO:2024-09-05,15:24:34 ||| load radiance for chunk 1 / 1
INFO:2024-09-05,15:24:35 ||| load masks
INFO:2024-09-05,15:24:37 ||| writing flare locations to process_resultstest_output_flares.json
INFO:2024-09-05,15:24:38 ||| initializing ray, adding data to shared memory
INFO:2024-09-05,15:24:39 ||| Run jobs
[2m[36m(mf_one_column pid=523)[0m DEBUG:2024-09-05,15:24:39 ||| Col: 0
[2m[36m(mf_one_column pid=523)[0m DEBUG:2024-09-05,15:24:39 ||| Column 0 mean: -7.6784897160119

# Create stage-out item catalog

In [9]:
# Create a collection
out_collection  = Collection("EMIT_TEST_FOR_JAY")

# Create Datasets for the MF output file
start_time=datetime.utcnow().replace(tzinfo=timezone.utc).isoformat()
end_time=datetime.utcnow().replace(tzinfo=timezone.utc).isoformat()
creation_time=datetime.utcnow().replace(tzinfo=timezone.utc).isoformat()

mf_envi_file = output_filename_prefix + '_ch4_mf'
dataset_envi_file = Dataset(
    name=mf_envi_file, 
    collection_id=out_collection.collection_id, 
    start_time=start_time, 
    end_time=end_time,
    creation_time=creation_time,
)
dataset_envi_file.add_data_file(DataFile("data", mf_envi_file)) # Add output file(s) to the dataset

mf_envi_hdr = output_filename_prefix + '_ch4_mf.hdr'
dataset_envi_hdr = Dataset(
    name=mf_envi_hdr, 
    collection_id=out_collection.collection_id, 
    start_time=start_time, 
    end_time=end_time,
    creation_time=creation_time,
)
dataset_envi_hdr.add_data_file(DataFile("data", mf_envi_hdr)) # Add output file(s) to the dataset

mf_target = output_filename_prefix + '_ch4_target'
dataset_target = Dataset(
    name=mf_target, 
    collection_id=out_collection.collection_id, 
    start_time=start_time, 
    end_time=end_time,
    creation_time=creation_time,
)
dataset_target.add_data_file(DataFile("data", mf_target)) # Add output file(s) to the dataset

# Add the dataset to the collection
#out_collection.add_dataset(dataset)
out_collection._datasets.append(dataset_envi_file)
out_collection._datasets.append(dataset_envi_hdr)
out_collection._datasets.append(dataset_target)

Collection.to_stac(out_collection, output_stac_catalog_dir)

In [10]:
out_collection.data_locations()

['test_output_ch4_mf', 'test_output_ch4_mf.hdr', 'test_output_ch4_target']