## Generate outputs

What we want out of this algorthym is a snapshot of all the fires at a given t. And a timeseries of each fire across time. 

In [1]:
import os
import datetime
import pandas as pd
import geopandas as gpd

import FireTime, FireObj, FireConsts, postprocess
from utils import timed

region = ["CONUS",]  # note you don't need the shape in here, just the name
tst = [2023, 8, 28, 'AM']
ted = [2023, 9, 6, 'AM']

## Read from disk

In [2]:
allpixels = postprocess.read_allpixels(tst, ted, region)

2024-03-20 20:06:39,150 - FireLog - INFO - func:read_allpixels took: 86.50 ms


In [3]:
allfires_gdf = postprocess.read_allfires_gdf(tst, ted, region)

2024-03-20 20:06:39,640 - FireLog - INFO - func:read_allfires_gdf took: 174.96 ms


## Write snapshots

In [4]:
%%time
gdf = allfires_gdf.reset_index()

for t in FireTime.t_generator(tst, ted):
    dt = FireTime.t2dt(t)
    data = gdf[gdf.t <= dt].drop_duplicates("fireID", keep="last")
    postprocess.save_snapshot_layers(data, region, tst, t)

2024-03-20 20:07:23,842 - FireLog - INFO - func:save_snapshot_layers took: 806.51 ms
2024-03-20 20:07:24,614 - FireLog - INFO - func:save_snapshot_layers took: 767.69 ms
2024-03-20 20:07:25,533 - FireLog - INFO - func:save_snapshot_layers took: 914.14 ms
2024-03-20 20:07:26,569 - FireLog - INFO - func:save_snapshot_layers took: 1.03 sec
2024-03-20 20:07:27,677 - FireLog - INFO - func:save_snapshot_layers took: 1.10 sec
2024-03-20 20:07:28,985 - FireLog - INFO - func:save_snapshot_layers took: 1.30 sec
2024-03-20 20:07:30,438 - FireLog - INFO - func:save_snapshot_layers took: 1.45 sec
2024-03-20 20:07:32,055 - FireLog - INFO - func:save_snapshot_layers took: 1.61 sec
2024-03-20 20:07:33,773 - FireLog - INFO - func:save_snapshot_layers took: 1.71 sec
2024-03-20 20:07:35,585 - FireLog - INFO - func:save_snapshot_layers took: 1.81 sec
2024-03-20 20:07:37,479 - FireLog - INFO - func:save_snapshot_layers took: 1.89 sec
2024-03-20 20:07:39,442 - FireLog - INFO - func:save_snapshot_layers took

CPU times: user 25.5 s, sys: 527 ms, total: 26 s
Wall time: 33.4 s


## Write the large fires to disk


In [5]:
large_fires = postprocess.find_largefires(allfires_gdf)

2024-03-20 20:08:00,645 - FireLog - INFO - func:find_largefires took: 6.39 ms


First we'll use the `allpixels` object to create the `nplist` layer

In [6]:
%%time
for fid in large_fires:
    data = allpixels[allpixels["fid"] == fid]
    postprocess.save_fire_nplist(data, region, fid, tst)

2024-03-20 20:09:10,467 - FireLog - INFO - func:save_fire_nplist took: 59.96 ms
2024-03-20 20:09:10,513 - FireLog - INFO - func:save_fire_nplist took: 44.76 ms
2024-03-20 20:09:10,571 - FireLog - INFO - func:save_fire_nplist took: 57.27 ms
2024-03-20 20:09:10,632 - FireLog - INFO - func:save_fire_nplist took: 58.97 ms
2024-03-20 20:09:10,685 - FireLog - INFO - func:save_fire_nplist took: 52.58 ms
2024-03-20 20:09:10,744 - FireLog - INFO - func:save_fire_nplist took: 57.88 ms
2024-03-20 20:09:10,788 - FireLog - INFO - func:save_fire_nplist took: 42.61 ms
2024-03-20 20:09:10,831 - FireLog - INFO - func:save_fire_nplist took: 42.13 ms
2024-03-20 20:09:10,885 - FireLog - INFO - func:save_fire_nplist took: 51.92 ms
2024-03-20 20:09:10,934 - FireLog - INFO - func:save_fire_nplist took: 48.14 ms
2024-03-20 20:09:10,979 - FireLog - INFO - func:save_fire_nplist took: 43.88 ms
2024-03-20 20:09:11,024 - FireLog - INFO - func:save_fire_nplist took: 43.58 ms
2024-03-20 20:09:11,067 - FireLog - INFO

CPU times: user 1.23 s, sys: 45.3 ms, total: 1.27 s
Wall time: 3 s


The rest of the layers will be created directly from the `allfires_gdf`

First let's do a naive version without the merge fixups

In [7]:
%%time
gdf = allfires_gdf.reset_index().copy()
for fid, data in gdf[gdf["fireID"].isin(large_fires)].groupby("fireID"):
    postprocess.save_fire_layers(data, region, fid, tst)

2024-03-20 20:09:18,003 - FireLog - INFO - func:save_fire_layers took: 143.39 ms
2024-03-20 20:09:18,128 - FireLog - INFO - func:save_fire_layers took: 123.41 ms
2024-03-20 20:09:18,274 - FireLog - INFO - func:save_fire_layers took: 145.08 ms
2024-03-20 20:09:18,396 - FireLog - INFO - func:save_fire_layers took: 121.00 ms
2024-03-20 20:09:18,516 - FireLog - INFO - func:save_fire_layers took: 119.14 ms
2024-03-20 20:09:18,627 - FireLog - INFO - func:save_fire_layers took: 110.16 ms
2024-03-20 20:09:18,737 - FireLog - INFO - func:save_fire_layers took: 109.87 ms
2024-03-20 20:09:18,841 - FireLog - INFO - func:save_fire_layers took: 102.53 ms
2024-03-20 20:09:18,959 - FireLog - INFO - func:save_fire_layers took: 117.24 ms
2024-03-20 20:09:19,066 - FireLog - INFO - func:save_fire_layers took: 106.50 ms
2024-03-20 20:09:19,227 - FireLog - INFO - func:save_fire_layers took: 160.18 ms
2024-03-20 20:09:19,347 - FireLog - INFO - func:save_fire_layers took: 119.18 ms
2024-03-20 20:09:19,454 - Fi

CPU times: user 1.8 s, sys: 83.1 ms, total: 1.89 s
Wall time: 5.78 s


Now let's do the merge as well

In [8]:
%%time
postprocess.save_large_fires_layers(allfires_gdf, region, large_fires, tst)

2024-03-20 20:09:30,434 - FireLog - INFO - func:save_fire_layers took: 156.27 ms
2024-03-20 20:09:30,452 - FireLog - INFO - func:merge_rows took: 16.54 ms


77 rows that potentially need a merge


2024-03-20 20:09:30,596 - FireLog - INFO - func:save_fire_layers took: 143.74 ms
2024-03-20 20:09:30,755 - FireLog - INFO - func:save_fire_layers took: 157.60 ms
2024-03-20 20:09:30,891 - FireLog - INFO - func:save_fire_layers took: 134.75 ms
2024-03-20 20:09:30,916 - FireLog - INFO - func:merge_rows took: 23.60 ms
2024-03-20 20:09:31,043 - FireLog - INFO - func:save_fire_layers took: 127.26 ms
2024-03-20 20:09:31,059 - FireLog - INFO - func:merge_rows took: 14.59 ms
2024-03-20 20:09:31,181 - FireLog - INFO - func:save_fire_layers took: 121.75 ms
2024-03-20 20:09:31,304 - FireLog - INFO - func:save_fire_layers took: 121.05 ms
2024-03-20 20:09:31,429 - FireLog - INFO - func:save_fire_layers took: 124.26 ms
2024-03-20 20:09:31,560 - FireLog - INFO - func:save_fire_layers took: 129.78 ms
2024-03-20 20:09:31,684 - FireLog - INFO - func:save_fire_layers took: 122.40 ms
2024-03-20 20:09:31,839 - FireLog - INFO - func:save_fire_layers took: 154.14 ms
2024-03-20 20:09:31,964 - FireLog - INFO -

CPU times: user 2.09 s, sys: 119 ms, total: 2.2 s
Wall time: 7 s


## Merge Experiments

These ones need some merge help:

In [9]:
merge_needed = (gdf.mergeid != gdf.fireID) & (gdf.invalid == False)
print(f"{merge_needed.sum()} rows that potentially need a merge")

# we'll set the "fireID" to "mergeid" in those spots
gdf.loc[merge_needed, "fireID"] = gdf.loc[merge_needed, "mergeid"]

77 rows that potentially need a merge


I have two different ideas of how to merge rows:

1) The first version of the `merge_rows` function uses a unary union to join the hull and then recalculated the fline and the ftype.
2) The second version of the `merge_rows` function uses code that is more similar to the existing merge function. It constructs a MultiGeometry out of the various geometry objects.

In [10]:
@timed
def merge_rows(data):
    """For a subset of allfires data containing only one fire, merge any
    rows that have the same `t`
    """
    
    from shapely.ops import unary_union

    dd = FireGpkg_sfs.getdd("all")
    output = data.set_index("t").copy()
    
    # clean up any merges that are needed
    for dt, rows in data[data.t.duplicated(False)].groupby("t"):
        f = FireObj.Fire(fid, FireTime.dt2t(dt), allpixels)
        f.t_st = FireTime.dt2t(rows["t_st"].min())
        f.hull = unary_union(rows["hull"].values)
        
        # this might be doing more work than it needs to
        f.updatefline()

        # ftype is unused in the output files
        f.ftype = rows.ftype.iloc[0]
    
        for k, tp in dd.items():
            if tp == "datetime64[ns]":
                output.loc[dt, k] = FireTime.t2dt(getattr(f, k))
            else:
                output.loc[dt, k] = getattr(f, k)
    
    for k, tp in dd.items():
        output[k] = output[k].astype(tp)
        
    return output.drop_duplicates().reset_index()

In [11]:
@timed
def merge_rows(data):
    """For a subset of allfires data containing only one fire, merge any
    rows that have the same `t`
    """
    output = data.drop_duplicates(subset=["t"]).set_index("t").copy()
    
    # clean up any merges that are needed
    for dt, rows in data[data.t.duplicated(False)].groupby("t"):
        # first get the weighted sums for pixden and meanFRP
        pixweight = (rows["pixden"] * rows["farea"]).sum()
        FRPweight = (rows["meanFRP"] * rows["n_pixels"]).sum()
        
        for col in ["n_pixels", "n_newpixels", "farea", "fperim", "flinelen"]:
            output.loc[dt, col] = rows[col].sum()

        output.loc[dt, "t_st"] = rows["t_st"].min()
        output.loc[dt, "pixden"] = pixweight / output.loc[dt, "farea"]
        output.loc[dt, "meanFRP"] = FRPweight / output.loc[dt, "n_pixels"]

        dissolved = rows.dissolve()
        for col in ["hull", "fline", "nfp"]:
            output.loc[dt, col] = dissolved[col].item()
        
    return output.reset_index()

## Experiments

Does it make a big difference if you filter first rather than after?

In [None]:
%%time
for fid, data in gdf[gdf["fireID"].isin(large_fires)].groupby("fireID"):
    f = fid

In [None]:
%%time
for fid, data in gdf.groupby("fireID"):
    if fid in large_fires:
        f = fid

In [None]:
%%time
for fid in large_fires:
    data = gdf[gdf["fireID"] == fid]
    f = fid

In [None]:
%%time
for fid in large_fires:
    data = allfires_gdf.loc[fid]
    f = fid