# Data Processing Code

## Install libraries

In [17]:
# install libraries
! pip install xarray netcdf4 pandas geopandas

Collecting geopandas
  Downloading geopandas-1.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting pyogrio>=0.7.2 (from geopandas)
  Downloading pyogrio-0.11.0-cp312-cp312-win_amd64.whl.metadata (5.4 kB)
Collecting pyproj>=3.5.0 (from geopandas)
  Downloading pyproj-3.7.1-cp312-cp312-win_amd64.whl.metadata (31 kB)
Collecting shapely>=2.0.0 (from geopandas)
  Downloading shapely-2.1.1-cp312-cp312-win_amd64.whl.metadata (7.0 kB)
Downloading geopandas-1.1.0-py3-none-any.whl (338 kB)
Downloading pyogrio-0.11.0-cp312-cp312-win_amd64.whl (19.2 MB)
   ---------------------------------------- 0.0/19.2 MB ? eta -:--:--
   ---------------- ----------------------- 7.9/19.2 MB 40.7 MB/s eta 0:00:01
   ---------------------------------- ----- 16.5/19.2 MB 41.7 MB/s eta 0:00:01
   ---------------------------------------- 19.2/19.2 MB 37.8 MB/s eta 0:00:00
Downloading pyproj-3.7.1-cp312-cp312-win_amd64.whl (6.3 MB)
   ---------------------------------------- 0.0/6.3 MB ? eta -:--:--
   ----------------

## Import libraries

In [18]:
# import libraries
import xarray as xr
import pandas as pd
import geopandas as gpd

## Playground

The playground provides a space for users to play around with code and experiment with the data

In [None]:
# example era5 data importation
era5_play = xr.open_dataset("..\\data\\raw_data\\era5_downloads\\era5_us_2020_01.nc", engine = "netcdf4")

In [None]:
# convert era5 to dataframe
era5_play = era5_play.to_dataframe()

In [None]:
# Show the first few rows of era5
print(era5_play.tail())

                                        number expver       u10       v10  \
valid_time          latitude longitude                                      
2020-01-31 23:00:00 18.9     -68.10          0   0001 -3.658166 -3.355125   
                             -67.85          0   0001 -3.660119 -3.796532   
                             -67.60          0   0001 -4.077111 -3.681297   
                             -67.35          0   0001 -4.454064 -3.382469   
                             -67.10          0   0001 -4.781701 -2.624657   

                                               d2m         t2m         msl  \
valid_time          latitude longitude                                       
2020-01-31 23:00:00 18.9     -68.10     295.558838  299.283569  101750.750   
                             -67.85     295.584229  299.312866  101754.000   
                             -67.60     295.597900  299.314819  101756.500   
                             -67.35     295.543213  299.342163  101759

## Process the fire perimeter data

In [68]:
# Load CSV into DataFrame
wfigs_perims = pd.read_csv("..\\data\\raw_data\\WFIGS Current Interagency Fire Perimeters\\WFIGS_Interagency_Perimeters_-3500393626074286023.csv")

  wfigs_perims = pd.read_csv("..\\data\\raw_data\\WFIGS Current Interagency Fire Perimeters\\WFIGS_Interagency_Perimeters_-3500393626074286023.csv")


In [69]:
# view the data
print(wfigs_perims.columns.tolist())
print(wfigs_perims.head())

['OBJECTID', 'poly_SourceOID', 'poly_IncidentName', 'poly_FeatureCategory', 'poly_MapMethod', 'poly_GISAcres', 'poly_DeleteThis', 'poly_FeatureAccess', 'poly_FeatureStatus', 'poly_IsVisible', 'poly_CreateDate', 'poly_DateCurrent', 'poly_PolygonDateTime', 'poly_IRWINID', 'poly_FORID', 'poly_Acres_AutoCalc', 'poly_SourceGlobalID', 'poly_Source', 'attr_SourceOID', 'attr_ABCDMisc', 'attr_ADSPermissionState', 'attr_CalculatedAcres', 'attr_ContainmentDateTime', 'attr_ControlDateTime', 'attr_CreatedBySystem', 'attr_IncidentSize', 'attr_DiscoveryAcres', 'attr_DispatchCenterID', 'attr_EstimatedCostToDate', 'attr_FinalAcres', 'attr_FFReportApprovedByTitle', 'attr_FFReportApprovedByUnit', 'attr_FFReportApprovedDate', 'attr_FireBehaviorGeneral', 'attr_FireBehaviorGeneral1', 'attr_FireBehaviorGeneral2', 'attr_FireBehaviorGeneral3', 'attr_FireCause', 'attr_FireCauseGeneral', 'attr_FireCauseSpecific', 'attr_FireCode', 'attr_FireDepartmentID', 'attr_FireDiscoveryDateTime', 'attr_FireMgmtComplexity', '

In [73]:
willow_fires = wfigs_perims[wfigs_perims["poly_IncidentName"] == "Willow"]
print(willow_fires["attr_UniqueFireIdentifier"].nunique())
print(willow_fires["attr_UniqueFireIdentifier"].unique())


26
['2021-AZSCA-001676' '2021-CAMDF-000577' '2021-CALPF-001493'
 '2020-AZTNF-002514' '2020-WYCOX-200312' '2022-MIHMF-220119'
 '2021-COSJF-001044' '2020-AZASF-000689' '2022-NVWID-020365'
 '2020-IDNCF-000069' '2020-IDBOD-000518' '2021-CACDD-006676'
 '2023-SDSDS-230189' '2023-MN3SS-000019' '2023-AKMID-000293'
 '2023-AZASF-000170' '2023-AZASF-000611' '2024-WIWIS-FIRE122391'
 '2024-WIWIS-FIRE121832' '2024-FLBCP-001015' '2024-AZTNF-001334'
 '2024-WVNRP-000359' '2025-WIWIS-FIRE123428' '2025-WIWIS-FIRE123459'
 '2025-NDFTA-000265' '2025-WIWIS-FIRE123935']


In [77]:
# Convert polygon datetime column to datetime type (adjust column name if needed)
wfigs_perims['date'] = pd.to_datetime(wfigs_perims['poly_PolygonDateTime'], errors='coerce').dt.date

willow_fires = wfigs_perims[wfigs_perims["poly_IncidentName"] == "Willow"]

unique_fire_ids = willow_fires["attr_UniqueFireIdentifier"].unique()

for fire_id in unique_fire_ids:
    print(f"--- Fire ID: {fire_id} ---")
    single_fire = willow_fires[willow_fires["attr_UniqueFireIdentifier"] == fire_id].sort_values(by="date")
    print(single_fire[["date", "poly_GISAcres"]])
    print("\n")



--- Fire ID: 2021-AZSCA-001676 ---
            date  poly_GISAcres
1253  2021-06-29            5.0


--- Fire ID: 2021-CAMDF-000577 ---
     date  poly_GISAcres
2802  NaT            NaN


--- Fire ID: 2021-CALPF-001493 ---
     date  poly_GISAcres
5426  NaT       2877.869


--- Fire ID: 2020-AZTNF-002514 ---
            date  poly_GISAcres
5727  2020-08-18            3.0


--- Fire ID: 2020-WYCOX-200312 ---
     date  poly_GISAcres
5918  NaT            NaN


--- Fire ID: 2022-MIHMF-220119 ---
            date  poly_GISAcres
8479  2022-08-05            0.1


--- Fire ID: 2021-COSJF-001044 ---
            date  poly_GISAcres
8860  2021-08-07            0.1


--- Fire ID: 2020-AZASF-000689 ---
            date  poly_GISAcres
9090  2021-01-05            NaN


--- Fire ID: 2022-NVWID-020365 ---
             date  poly_GISAcres
12083  2022-08-02           0.31


--- Fire ID: 2020-IDNCF-000069 ---
      date  poly_GISAcres
12117  NaT            NaN


--- Fire ID: 2020-IDBOD-000518 ---
       

In [None]:
# Step 1: Parse polygon datetime and extract date
wfigs_perims["poly_PolygonDateTime"] = wfigs_perims["date"] = wfigs_perims["poly_PolygonDateTime"].dt.datepd.to_datetime(wfigs_perims["poly_PolygonDateTime"], errors='coerce')


In [58]:
# Step 2: Sort by fire name, date, and descending acreage to keep largest per day
wfigs_perims_sorted = wfigs_perims.sort_values(
    by=["poly_IncidentName", "date", "poly_GISAcres"],
    ascending=[True, True, False]
)

In [59]:
# Step 3: Keep largest perimeter per fire per day
wfigs_perims_daily_largest = wfigs_perims_sorted.drop_duplicates(
    subset=["poly_IncidentName", "date"]
).copy()

In [60]:
# Step 4: Calculate daily growth in acres per fire
wfigs_perims_daily_largest["daily_growth_acres"] = wfigs_perims_daily_largest.groupby("poly_IncidentName")["poly_GISAcres"].diff().fillna(0)


In [61]:
# Step 5: Prepare output columns and sort
wfigs_perims_output = wfigs_perims_daily_largest[["poly_IncidentName", "date", "poly_GISAcres", "daily_growth_acres"]].rename(
    columns={"poly_GISAcres": "cumulative_acres"}
).sort_values(by=["poly_IncidentName", "date"]).reset_index(drop=True)

In [63]:
# Step 6: Print example output
print(wfigs_perims_output.head(10))

     poly_IncidentName        date  cumulative_acres  daily_growth_acres
0            \tMA07038  2022-12-14           0.39855                 0.0
1              0573 CS  2021-07-14           0.10000                 0.0
2   235 West Mullan Rd  2021-05-21               NaN                 0.0
3                 7637  2021-11-13          10.00000                 0.0
4          Anniversary  2022-08-30           0.10000                 0.0
5            Ashe Lake  2023-02-22           1.00000                 0.0
6             Brewster  2021-07-08          10.00000                 0.0
7    Bridger Foothills  2020-09-05           1.00000                 0.0
8              Bullard  2021-06-23           1.00000                 0.0
9            Burntside  2022-10-24           1.50000                 0.0


In [66]:
willow_fire = wfigs_perims_output[wfigs_perims_output["poly_IncidentName"] == "Willow"]
willow_fire_sorted = willow_fire.sort_values(by="date", ascending=True)
print(willow_fire_sorted.head(10))


      poly_IncidentName        date  cumulative_acres  daily_growth_acres
28324            Willow  2020-08-18            3.0000              0.0000
28325            Willow  2020-09-14          134.5707            131.5707
28326            Willow  2021-01-05               NaN              0.0000
28327            Willow  2021-04-21           31.7700              0.0000
28328            Willow  2021-06-29            5.0000            -26.7700
28329            Willow  2021-08-07            0.1000             -4.9000
28330            Willow  2022-08-02            0.3100              0.2100
28331            Willow  2022-08-05            0.1000             -0.2100
28332            Willow  2023-05-23            0.2000              0.1000
28333            Willow  2023-08-11            0.1000             -0.1000


In [53]:
import pandas as pd

# Parse the polygon datetime as datetime type
wfigs_perims['poly_PolygonDateTime'] = pd.to_datetime(wfigs_perims['poly_PolygonDateTime'], errors='coerce')

# Count unique dates per fire
fire_date_counts = wfigs_perims.groupby('poly_IncidentName')['poly_PolygonDateTime'].nunique().reset_index(name='unique_dates')

# Find fires with more than one date (multiple snapshots)
multiple_dates = fire_date_counts[fire_date_counts['unique_dates'] > 1]

print(f"Number of fires with multiple polygon dates: {len(multiple_dates)}")
print(multiple_dates)

# Sort by 'unique_dates' descending
fire_date_counts_sorted = fire_date_counts.sort_values(by='unique_dates', ascending=False)

print(fire_date_counts_sorted)  # Show top 10 fires with most polygon dates


Number of fires with multiple polygon dates: 2110
      poly_IncidentName  unique_dates
64               000003             2
65               000004             2
66               000005             3
70               000014             2
89               000067             2
...                 ...           ...
23776      Yellowjacket             2
23779              Yeti             2
23796             Young             2
23808             Yucca             2
23816              ZINK             2

[2110 rows x 2 columns]
          poly_IncidentName  unique_dates
23455                Willow            22
5856             Cottonwood            21
18400            Rock Creek            17
18208                 Ridge            16
16708                  Pine            16
...                     ...           ...
14415                Moffat             0
8889                 Gilman             0
19957  Shupac Lake Rd. Fire             0
14418               Mojave              0
23989  