In [24]:
!pip install -e ..

Obtaining file:///projects/fireatlas_nrt
  Installing build dependencies ... [?25ldone
[?25h  Checking if build backend supports build_editable ... [?25ldone
[?25h  Getting requirements to build editable ... [?25ldone
[?25h  Preparing editable metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: fireatlas
  Building editable for fireatlas (pyproject.toml) ... [?25ldone
[?25h  Created wheel for fireatlas: filename=fireatlas-0.99.1.dev23+g804753c.d20240329-0.editable-py3-none-any.whl size=4406 sha256=5e6052a36e23a1be7eaac2e16ef886196872204423a14ed61ef8a8e9035ef9be
  Stored in directory: /tmp/pip-ephem-wheel-cache-mblq0lbs/wheels/81/38/4c/ee942804efcea9519e5b862948c7e00b4d3bbec73eaa94fe1b
Successfully built fireatlas
Installing collected packages: fireatlas
  Attempting uninstall: fireatlas
    Found existing installation: fireatlas 0.99.1.dev23+g804753c.d20240329
    Uninstalling fireatlas-0.99.1.dev23+g804753c.d20240329:
      Successfully uninstalled

In [5]:
import functools
import json
import argparse
import os
import time
from typing import Tuple
import concurrent
from concurrent.futures import ThreadPoolExecutor

from fireatlas import preprocess
from fireatlas import FireTime
from fireatlas.FireLog import logger
from fireatlas.FireTypes import Region, TimeStep
from fireatlas.utils import timed
from fireatlas import FireConsts
from fireatlas import FireIO

from maap.maap import MAAP
from maap.dps.dps_job import DPSJob
from maap.utils import algorithm_utils


class JobSubmissionException(Exception):
    pass


def get_algorithm_config_filepath(dir_names):
    current_file_dir = os.path.dirname(os.path.abspath(__name__))
    return [
        os.path.join(current_file_dir, f'{dir_name}', 'algorithm_config.yaml')
        for dir_name in dir_names
    ]


def validate_job_submission(submitted_jobs: Tuple[DPSJob]) -> Tuple[DPSJob]:
    """we don't retry job submissions, they should ideally always work

    validate status of job submission results and return result 'job_id'
    """
    failed_statuses = [result for result in submitted_jobs if result.status == 'failed']
    if any(failed_statuses):
        raise JobSubmissionException(f"[ SUBMISSION FAILED ]: the following jobs failed to submit {failed_statuses}")
    return submitted_jobs


def wait_for_job(dps_job: DPSJob) -> DPSJob:
    """this internal DPSJob function will block until job completes and use exponential backoff
    https://github.com/MAAP-Project/maap-py/blob/master/maap/dps/dps_job.py#L80C9-L80C28

    it seems the statuses.lower() are: ['failed', 'succeeded', 'accepted', 'running']
    https://github.com/MAAP-Project/maap-py/blob/master/maap/dps/dps_job.py
    """
    return dps_job.wait_for_completion()


def poll_on_job_status(jobs: Tuple[DPSJob]) -> Tuple[DPSJob]:
    failed_jobs = []
    # don't want to overwhelm the MAAP api so keeping max_workers relatively small
    with ThreadPoolExecutor(max_workers=5) as executor:
        dps_job_futures = [executor.submit(wait_for_job, dps_job) for dps_job in jobs]
        for dps_job in concurrent.futures.as_completed(dps_job_futures):
            try:
                if dps_job.result().retrieve_status().lower() != 'succeeded':
                    failed_jobs.append(dps_job)
            except Exception as e:
                logger.exception(f"'poll_on_jobs_status' failed with {e}")
    return failed_jobs


def track_submitted_jobs(submitted_jobs: Tuple[DPSJob]) -> Tuple[DPSJob]:
    queued_jobs = validate_job_submission(submitted_jobs)
    failed_jobs = poll_on_job_status(queued_jobs)
    return failed_jobs

In [6]:
tst = [2023,1,1,'AM']
ted = [2023,3,1,'PM']
region = ["V3ShastaTrinity", [-124.354248,40.624376,-122.250366,42.045213]]
list_of_time_steps = list(FireTime.t_generator(tst, ted))

#### Data Update Checker

In [9]:
configs = get_algorithm_config_filepath(['data_update_checker',])
maap_api = MAAP(maap_host='api.maap-project.org')
algo_config = algorithm_utils.read_yaml_file(configs[0])
algo_config.pop('inputs')
print(algo_config)

submitted_jobs = []
submit_job_kwargs = {
    "identifier": f"job-{algo_config['algorithm_name']}:{algo_config['algorithm_version']}",
    "algo_id": algo_config["algorithm_name"],
    "version": algo_config["algorithm_version"],
    "username": "gcorradini",
    "queue": algo_config["queue"],
}
param_kwargs = {}
result = maap_api.submitJob(**submit_job_kwargs, **param_kwargs)
submitted_jobs.append(result)
queued_jobs = validate_job_submission(submitted_jobs)

{'algorithm_description': 'Data update checker', 'algorithm_name': 'eis-feds-dps-data-checker-v3', 'algorithm_version': '1.0.0', 'environment': 'ubuntu', 'repository_url': 'https://repo.maap-project.org/gcorradini/fireatlas_nrt.git', 'docker_container_url': 'mas.maap-project.org/root/maap-workspaces/base_images/vanilla:v3.1.4', 'queue': 'maap-dps-eis-worker-64gb', 'run_command': 'fireatlas_nrt/maap_runtime/run_dps_data_checker.sh', 'build_command': 'fireatlas_nrt/maap_runtime/run_dps_build.sh', 'disk_space': '10GB'}


### Preprocess Region

In [18]:
configs = get_algorithm_config_filepath(['preprocess_region',])
maap_api = MAAP(maap_host='api.maap-project.org')
algo_config = algorithm_utils.read_yaml_file(configs[0])
algo_config.pop('inputs')
print(algo_config)

submitted_jobs = []
submit_job_kwargs = {
    "identifier": f"job-{algo_config['algorithm_name']}:{algo_config['algorithm_version']}",
    "algo_id": algo_config["algorithm_name"],
    "version": algo_config["algorithm_version"],
    "username": "gcorradini",
    "queue": algo_config["queue"],
}
param_kwargs = {"regnm": region[0], "bbox": region[1]}
result = maap_api.submitJob(**submit_job_kwargs, **param_kwargs)
submitted_jobs.append(result)
queued_jobs = validate_job_submission(submitted_jobs)

{'algorithm_name': 'eis-feds-dps-precprocess-region-v3', 'algorithm_description': 'create geojson for a region and maybe static source filter', 'algorithm_version': '1.0.0', 'environment': 'ubuntu', 'repository_url': 'https://repo.maap-project.org/gcorradini/fireatlas_nrt.git', 'docker_container_url': 'mas.maap-project.org/root/maap-workspaces/base_images/vanilla:v3.1.4', 'queue': 'maap-dps-eis-worker-64gb', 'run_command': 'fireatlas_nrt/maap_runtime/run_preprocess_region.sh', 'build_command': 'fireatlas_nrt/maap_runtime/run_dps_build.sh', 'disk_space': '100GB'}


### Preprocess Region T

In [19]:
# submitted_jobs = []
# submit_job_kwargs = {
#     "identifier": f"job-{algo_config['algorithm_name']}:{algo_config['algorithm_version']}",
#     "algo_id": algo_config["algorithm_name"],
#     "version": algo_config["algorithm_version"],
#     "username": "gcorradini",
#     "queue": algo_config["queue"],
# }
# for t in list_of_time_steps:
#     print(t)
#     param_kwargs = {"regnm": region[0],"t": json.dumps(t)}
#     output_filepath = preprocess.preprocessed_filename(t, sat=FireConsts.firesrc, region=region, location="s3")
#     if FireIO.os_path_exists(output_filepath):
#         logger.info(f"skipping 'preprocess_region_t' b/c file \
#         already exists for region {region[0]}, {output_filepath}")
#         continue

#     result = maap_api.submitJob(**submit_job_kwargs, **param_kwargs)
#     submitted_jobs.append(result)
# queued_jobs = validate_job_submission(submitted_jobs)

#### FireForward

In [20]:
# submitted_jobs = []
# submit_job_kwargs = {
#     "identifier": f"job-{algo_config['algorithm_name']}:{algo_config['algorithm_version']}",
#     "algo_id": algo_config["algorithm_name"],
#     "version": algo_config["algorithm_version"],
#     "username": "gcorradini",
#     "queue": algo_config["queue"],
# }
# for t in list_of_time_steps:
#     print(t)
#     param_kwargs = {"regnm": region[0],"t": json.dumps(t)}
#     output_filepath = preprocess.preprocessed_filename(t, sat=FireConsts.firesrc, region=region, location="s3")
#     if FireIO.os_path_exists(output_filepath):
#         logger.info(f"skipping 'preprocess_region_t' b/c file \
#         already exists for region {region[0]}, {output_filepath}")
#         continue

#     result = maap_api.submitJob(**submit_job_kwargs, **param_kwargs)
#     submitted_jobs.append(result)
# queued_jobs = validate_job_submission(submitted_jobs)

#### Job Monitoring

In [21]:
[maap_api.getJobStatus(job.id) for job in queued_jobs]

['Running']

In [22]:
%%time
failed_jobs = poll_on_job_status(queued_jobs)

INFO:backoff:Backing off wait_for_completion(...) for 0.8s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 0.6s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 0.4s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 3.3s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 5.7s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 28.9s (RuntimeError)


CPU times: user 149 ms, sys: 13.2 ms, total: 162 ms
Wall time: 40.2 s


In [37]:
failed_jobs[0].result().retrieve_result()

['http://maap-ops-workspace.s3-website-us-west-2.amazonaws.com/dataset/triaged_job/eis-feds-dps-precprocess-region-v3/1.0.0/2024/03/29/7e2ebad6-3dc0-4a9f-b209-fab4d1315014',
 's3://s3-us-west-2.amazonaws.com:80/maap-ops-workspace/dataset/triaged_job/eis-feds-dps-precprocess-region-v3/1.0.0/2024/03/29/7e2ebad6-3dc0-4a9f-b209-fab4d1315014',
 'https://s3.console.aws.amazon.com/s3/buckets/maap-ops-workspace/dataset/triaged_job/eis-feds-dps-precprocess-region-v3/1.0.0/2024/03/29/7e2ebad6-3dc0-4a9f-b209-fab4d1315014/?region=us-east-1&tab=overview',
 "++ python /app/get_username.py\n+ USERNAME=gcorradini\n+ DPS_MACHINE_TOKEN=Y6hERQKLKs0oH0h5Kgi3d-gVVyHgW_DR77YeARdzd-a3KJDE-gYldJzhB94s635KySUXi4r9WOtD6Vo_F9cEbfNI_QUhOG8N47heXHVcW1k3vEw3d0StttXYMOnQPtTzSQLAYKQwJLgbtX-R8ug1XGVeMolh97P9nHcW8awN7yjyaR7g\n++ curl -sb -H 'Accept: application/json' -H 'Content-Type: application/json' -H 'dps-token: Y6hERQKLKs0oH0h5Kgi3d-gVVyHgW_DR77YeARdzd-a3KJDE-gYldJzhB94s635KySUXi4r9WOtD6Vo_F9cEbfNI_QUhOG8N47heXHV