In [None]:
!pip install -e ..

In [48]:
import functools
import json
import argparse
import os
import time
from typing import Tuple
import concurrent
from concurrent.futures import ThreadPoolExecutor

from fireatlas import preprocess
from fireatlas import FireTime
from fireatlas.FireLog import logger
from fireatlas.FireTypes import Region, TimeStep
from fireatlas.utils import timed
from fireatlas import FireConsts
from fireatlas import FireIO

from maap.maap import MAAP
from maap.dps.dps_job import DPSJob
from maap.utils import algorithm_utils


class JobSubmissionException(Exception):
    pass


def get_algorithm_config_filepath(dir_names):
    current_file_dir = os.path.dirname(os.path.abspath(__name__))
    return [
        os.path.join(current_file_dir, f'{dir_name}', 'algorithm_config.yaml')
        for dir_name in dir_names
    ]


def validate_job_submission(submitted_jobs: Tuple[DPSJob]) -> Tuple[DPSJob]:
    """we don't retry job submissions, they should ideally always work

    validate status of job submission results and return result 'job_id'
    """
    failed_statuses = [result for result in submitted_jobs if result.status == 'failed']
    if any(failed_statuses):
        raise JobSubmissionException(f"[ SUBMISSION FAILED ]: the following jobs failed to submit {failed_statuses}")
    return submitted_jobs


def wait_for_job(dps_job: DPSJob) -> DPSJob:
    """this internal DPSJob function will block until job completes and use exponential backoff
    https://github.com/MAAP-Project/maap-py/blob/master/maap/dps/dps_job.py#L80C9-L80C28

    it seems the statuses.lower() are: ['failed', 'succeeded', 'accepted', 'running']
    https://github.com/MAAP-Project/maap-py/blob/master/maap/dps/dps_job.py
    """
    return dps_job.wait_for_completion()


def poll_on_job_status(jobs: Tuple[DPSJob]) -> Tuple[DPSJob]:
    failed_jobs = []
    # don't want to overwhelm the MAAP api so keeping max_workers relatively small
    with ThreadPoolExecutor(max_workers=5) as executor:
        dps_job_futures = [executor.submit(wait_for_job, dps_job) for dps_job in jobs]
        for dps_job in concurrent.futures.as_completed(dps_job_futures):
            try:
                if dps_job.result().retrieve_status().lower() != 'succeeded':
                    failed_jobs.append(dps_job)
            except Exception as e:
                logger.exception(f"'poll_on_jobs_status' failed with {e}")
    return failed_jobs


def track_submitted_jobs(submitted_jobs: Tuple[DPSJob]) -> Tuple[DPSJob]:
    queued_jobs = validate_job_submission(submitted_jobs)
    failed_jobs = poll_on_job_status(queued_jobs)
    return failed_jobs

In [49]:
tst = [2023,1,1,'AM']
ted = [2023,3,1,'PM']
region = ["V3ShastaTrinity", [-124.354248,40.624376,-122.250366,42.045213]]
list_of_time_steps = list(FireTime.t_generator(tst, ted))

#### Data Update Checker

In [9]:
configs = get_algorithm_config_filepath(['data_update_checker',])
maap_api = MAAP(maap_host='api.maap-project.org')
algo_config = algorithm_utils.read_yaml_file(configs[0])
algo_config.pop('inputs')
print(algo_config)

submitted_jobs = []
submit_job_kwargs = {
    "identifier": f"job-{algo_config['algorithm_name']}:{algo_config['algorithm_version']}",
    "algo_id": algo_config["algorithm_name"],
    "version": algo_config["algorithm_version"],
    "username": "gcorradini",
    "queue": algo_config["queue"],
}
param_kwargs = {}
result = maap_api.submitJob(**submit_job_kwargs, **param_kwargs)
submitted_jobs.append(result)
queued_jobs = validate_job_submission(submitted_jobs)

{'algorithm_description': 'Data update checker', 'algorithm_name': 'eis-feds-dps-data-checker-v3', 'algorithm_version': '1.0.0', 'environment': 'ubuntu', 'repository_url': 'https://repo.maap-project.org/gcorradini/fireatlas_nrt.git', 'docker_container_url': 'mas.maap-project.org/root/maap-workspaces/base_images/vanilla:v3.1.4', 'queue': 'maap-dps-eis-worker-64gb', 'run_command': 'fireatlas_nrt/maap_runtime/run_dps_data_checker.sh', 'build_command': 'fireatlas_nrt/maap_runtime/run_dps_build.sh', 'disk_space': '10GB'}


#### Job Monitoring

In [21]:
[maap_api.getJobStatus(job.id) for job in queued_jobs]
%%time
failed_jobs = poll_on_job_status(queued_jobs)

['Running']

### Preprocess Region

In [18]:
configs = get_algorithm_config_filepath(['preprocess_region',])
maap_api = MAAP(maap_host='api.maap-project.org')
algo_config = algorithm_utils.read_yaml_file(configs[0])
algo_config.pop('inputs')
print(algo_config)

submitted_jobs = []
submit_job_kwargs = {
    "identifier": f"job-{algo_config['algorithm_name']}:{algo_config['algorithm_version']}",
    "algo_id": algo_config["algorithm_name"],
    "version": algo_config["algorithm_version"],
    "username": "gcorradini",
    "queue": algo_config["queue"],
}
param_kwargs = {"regnm": region[0], "bbox": region[1]}
result = maap_api.submitJob(**submit_job_kwargs, **param_kwargs)
submitted_jobs.append(result)
queued_jobs = validate_job_submission(submitted_jobs)

{'algorithm_name': 'eis-feds-dps-precprocess-region-v3', 'algorithm_description': 'create geojson for a region and maybe static source filter', 'algorithm_version': '1.0.0', 'environment': 'ubuntu', 'repository_url': 'https://repo.maap-project.org/gcorradini/fireatlas_nrt.git', 'docker_container_url': 'mas.maap-project.org/root/maap-workspaces/base_images/vanilla:v3.1.4', 'queue': 'maap-dps-eis-worker-64gb', 'run_command': 'fireatlas_nrt/maap_runtime/run_preprocess_region.sh', 'build_command': 'fireatlas_nrt/maap_runtime/run_dps_build.sh', 'disk_space': '100GB'}


#### Job Monitoring

In [21]:
[maap_api.getJobStatus(job.id) for job in queued_jobs]
%%time
failed_jobs = poll_on_job_status(queued_jobs)

['Running']

### Preprocess Region T

In [43]:
configs = get_algorithm_config_filepath(['preprocess_region_t',])
maap_api = MAAP(maap_host='api.maap-project.org')
algo_config = algorithm_utils.read_yaml_file(configs[0])
algo_config.pop('inputs')
print(algo_config)

submitted_jobs = []
submit_job_kwargs = {
    "identifier": f"job-{algo_config['algorithm_name']}:{algo_config['algorithm_version']}",
    "algo_id": algo_config["algorithm_name"],
    "version": algo_config["algorithm_version"],
    "username": "gcorradini",
    "queue": algo_config["queue"],
}
for t in list_of_time_steps:
    print(t)
    param_kwargs = {"regnm": region[0],"t": json.dumps(t)}
    output_filepath = preprocess.preprocessed_filename(t, sat=FireConsts.firesrc, region=region, location="s3")
    if FireIO.os_path_exists(output_filepath):
        logger.info(f"skipping 'preprocess_region_t' b/c file \
        already exists for region {region[0]}, {output_filepath}")
        continue

    result = maap_api.submitJob(**submit_job_kwargs, **param_kwargs)
    submitted_jobs.append(result)
queued_jobs = validate_job_submission(submitted_jobs)

{'algorithm_name': 'eis-feds-dps-preprocess-region-and-t-v3', 'algorithm_description': 'do initial filtering and clustering and write to dataframe', 'algorithm_version': '1.0.0', 'environment': 'ubuntu', 'repository_url': 'https://repo.maap-project.org/gcorradini/fireatlas_nrt.git', 'docker_container_url': 'mas.maap-project.org/root/maap-workspaces/base_images/vanilla:v3.1.4', 'queue': 'maap-dps-eis-worker-64gb', 'run_command': 'fireatlas_nrt/maap_runtime/run_preprocess_region_and_t.sh', 'build_command': 'fireatlas_nrt/maap_runtime/run_dps_build.sh', 'disk_space': '100GB'}
[2023, 1, 1, 'AM']
[2023, 1, 1, 'PM']
[2023, 1, 2, 'AM']
[2023, 1, 2, 'PM']
[2023, 1, 3, 'AM']
[2023, 1, 3, 'PM']
[2023, 1, 4, 'AM']
[2023, 1, 4, 'PM']
[2023, 1, 5, 'AM']
[2023, 1, 5, 'PM']
[2023, 1, 6, 'AM']
[2023, 1, 6, 'PM']
[2023, 1, 7, 'AM']
[2023, 1, 7, 'PM']
[2023, 1, 8, 'AM']
[2023, 1, 8, 'PM']
[2023, 1, 9, 'AM']
[2023, 1, 9, 'PM']
[2023, 1, 10, 'AM']
[2023, 1, 10, 'PM']
[2023, 1, 11, 'AM']
[2023, 1, 11, 'PM'

#### Job Monitoring

In [45]:
#[maap_api.getJobStatus(job.id) for job in queued_jobs]

In [46]:
%%time
failed_jobs = poll_on_job_status(queued_jobs)

INFO:backoff:Backing off wait_for_completion(...) for 0.7s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 0.2s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 0.2s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 0.9s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 0.4s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 1.9s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 0.6s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 0.4s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 0.8s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 0.8s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 0.8s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 0.2s (RuntimeError)
INFO:backoff:Backing off wait_for_completion(...) for 5.1s (RuntimeError)
INFO:backoff:Backing off wait_for_comp

CPU times: user 6.33 s, sys: 284 ms, total: 6.62 s
Wall time: 9min 48s


In [47]:
len(failed_jobs)

0

#### FireForward

In [20]:
configs = get_algorithm_config_filepath(['data_update_checker',])
maap_api = MAAP(maap_host='api.maap-project.org')
algo_config = algorithm_utils.read_yaml_file(configs[0])
algo_config.pop('inputs')
print(algo_config)

submitted_jobs = []
submit_job_kwargs = {
    "identifier": f"job-{algo_config['algorithm_name']}:{algo_config['algorithm_version']}",
    "algo_id": algo_config["algorithm_name"],
    "version": algo_config["algorithm_version"],
    "username": "gcorradini",
    "queue": algo_config["queue"],
}
param_kwargs = {"regnm": region[0],"t": json.dumps(t)}

result = maap_api.submitJob(**submit_job_kwargs, **param_kwargs)
submitted_jobs.append(result)
queued_jobs = validate_job_submission(submitted_jobs)

#### Job Monitoring

In [21]:
[maap_api.getJobStatus(job.id) for job in queued_jobs]
%%time
failed_jobs = poll_on_job_status(queued_jobs)

['Running']

In [40]:
#failed_jobs[0].result().retrieve_result()