# MAAP ADE - Performance of NRT run for CONUS 2023

How long does it take to run the algorithm at each timestep and put all outputs into S3?

In [1]:
# If you haven't installed the fireatlas code yet, uncomment the following line and run this cell.

!pip install -e .. -q

# After this runs, restart the notebook kernel.

[0m

In [2]:
from fireatlas import FireTime
import pandas as pd
import holoviews as hv

import hvplot.pandas

First we need to process the log file to get the timings per-section and per-function. This function is specifically for processing running.log for an NRT run where the `t` in the output should refer to `ted`. This function adds integers to the function names so that they appear roughly in the order that they occur in the algorithm.

In [3]:
def prep_log_df(filepath):
    ordered_funcs = [
        "preprocess_input_file",
        "preprocess_region",
        "preprocess_region_t",
        "rehydrate",
        "Fire_expand_rtree",
        "Fire_merge_rtree",
        "Fire_Forward_one_step",
        "Fire_Forward",
        "fill_activefire_rows",
        "save_snapshots",
        "save_large_fires_nplist",
        "save_large_fires_layers",
        "save_combined_large_fire_layers",
        "Run",
    ]
    with open(filepath, "r") as f:
        log = f.readlines()

    t = None
    values = []
    for l in log:
        if "Starting full run" in l:
            t = FireTime.t2dt([eval(t) for t in l.split("ted=[")[1].split("] ")[0].split(", ")])
        if t and "func:" in l:
            func_str, took_str = l.split("func:")[1].split("took: ")
            val_str, unit_str = took_str.split(" ")
            func = func_str.strip()
            if func.startswith("Dask upload"):
                func = "Dask upload of files"
            if func in ordered_funcs:
                i = ordered_funcs.index(func)
                func = f"{i:02d} {func}"
            values.append({"t": t, "func": func, "took": pd.to_timedelta(eval(val_str), unit=unit_str.strip("\n"))})

    return pd.DataFrame(values)

Plot the timings for each function in each NRT run. There are vertical lines indicating where something was changed in how the algorithm runs.

In [4]:
df = pd.read_csv("maap-output.csv")
df["t"] = pd.to_datetime(df["t"])
df["took"] = pd.to_timedelta(df["took"], unit="s")

log_df = prep_log_df("/projects/fireatlas_nrt/running.log")

(
    df.hvplot(
        x="t", y="took", rot=90, grid=True,
        title=f"Time spent: {df.took.sum().round('s')}",
        height=500, width=1000
    ) *
    hv.VLine(pd.Timestamp("2023-01-08 18:00")).opts(color="green") *  # fix: not reading previous allfires 
    hv.VLine(pd.Timestamp("2023-05-19 18:00")).opts(color="red") *    # clean out logs
    hv.VLine(pd.Timestamp("2023-06-12 6:00")).opts(color="purple") *  # bump workers down to 2
    hv.VLine(pd.Timestamp("2023-06-25 18:00")).opts(color="blue") *   # up the memory and bump workers up to 6
    log_df[log_df.took > pd.Timedelta(1, unit="min")].hvplot.scatter(by="func", x="t")
)