# VEDA - Performance NRT run for CONUS 2023

How long does it take to run the algorithm at each timestep and put all outputs into S3?

In [2]:
# If you haven't installed the fireatlas code yet, uncomment the following line and run this cell.

# !pip install -e .. -q

# After this runs, restart the notebook kernel.

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2024.3.1 requires fsspec==2024.3.1, but you have fsspec 2024.6.0 which is incompatible.
xmip 0.7.2 requires xgcm<0.7.0, but you have xgcm 0.8.1 which is incompatible.[0m[31m
[0m

In [1]:
from fireatlas import FireTime
import pandas as pd
import holoviews as hv

import hvplot.pandas

First we need to process the log file to get the timings per-section and per-function. This function is specifically for processing running.log for an NRT run where the `t` in the output should refer to `ted`. This function adds integers to the function names so that they appear roughly in the order that they occur in the algorithm.

In [2]:
def prep_log_df(filepath):
    ordered_funcs = [
        "preprocess_input_file",
        "preprocess_region",
        "preprocess_region_t",
        "rehydrate",
        "Fire_expand_rtree",
        "Fire_merge_rtree",
        "Fire_Forward_one_step",
        "Fire_Forward",
        "fill_activefire_rows",
        "save_snapshots",
        "save_large_fires_nplist",
        "save_large_fires_layers",
        "save_combined_large_fire_layers",
        "Run",
    ]
    
    with open(filepath, "r") as f:
        log = f.readlines()

    t = None
    values = []
    for l in log:
        if "Starting full run" in l:
            t = FireTime.t2dt([eval(t) for t in l.split("ted=[")[1].split("] ")[0].split(", ")])
        if t and "func:" in l:
            func_str, took_str = l.split("func:")[1].split("took: ")
            val_str, unit_str = took_str.split(" ")
            func = func_str.strip()
            if func.startswith("Dask upload"):
                func = "Dask upload of files"
            if func in ordered_funcs:
                i = ordered_funcs.index(func)
                func = f"{i:02d} {func}"
            values.append({"t": t, "func": func, "took": pd.to_timedelta(eval(val_str), unit=unit_str.strip("\n"))})

    return pd.DataFrame(values)

Plot the timings for each function in each NRT run.

In [4]:
df = pd.read_csv("output.csv")
df["t"] = pd.to_datetime(df["t"])
df["took"] = pd.to_timedelta(df["took"], unit="s")

log_df = prep_log_df("/home/jovyan/fireatlas_nrt/running.log")

(
    df.hvplot(
        x="t", y="took", rot=90, grid=True,
        title=f"Time spent so far: {df.took.sum().round('s')}",
        height=500, width=1000, xlim=(pd.Timestamp("2023-07-01"), df.t.max() + pd.Timedelta(1, unit="day"))
    ) *
    hv.VLine(pd.Timestamp("2023-03-15 06:00")).opts(color="green") *  # accidentally wrote to running.log from California 2020 run 
    log_df[log_df.took > pd.Timedelta(1, unit="min")].hvplot.scatter(by="func", x="t")
)