# Data Processing Code

## Install libraries

In [1]:
# install libraries
! pip install xarray netcdf4 pandas geopandas geodatasets contextily



## Import libraries

In [2]:
# import libraries
import xarray as xr
import pandas as pd
import geopandas as gpd
from shapely import wkt
from datetime import datetime
import matplotlib.pyplot as plt
from shapely.geometry import Point
import geodatasets
import contextily as ctx  # for basemap tiles
import folium
import math

  "cipher": algorithms.TripleDES,
  "class": algorithms.Blowfish,
  "class": algorithms.TripleDES,


## Playground

The playground provides a space for users to play around with code and experiment with the data

In [6]:
# example era5 data importation
era5_play = xr.open_dataset("..\\data\\raw_data\\era5_daily_downloads\\era5_us_2020_01.nc", engine = "netcdf4")

In [7]:
# convert era5 to dataframe
era5_play = era5_play.to_dataframe()

In [8]:
# Show the first few rows of era5
era5_play

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,u10,number,v10,d2m,t2m,msl,sp,lai_hv,lai_lv,tp,ssr
valid_time,latitude,longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-01-01,71.4,-179.10,4.972603,0,-2.577962,242.021194,244.760880,102903.710938,101508.523438,0.0,0.947937,0.000276,8.639994e-11
2020-01-01,71.4,-178.85,5.263740,0,-3.247558,242.217972,244.979797,102880.132812,101528.226562,0.0,0.687561,0.000292,8.639994e-11
2020-01-01,71.4,-178.60,5.823148,0,-4.172892,242.952682,245.845520,102852.476562,101849.226562,0.0,0.628174,0.000264,8.639994e-11
2020-01-01,71.4,-178.35,6.135038,0,-5.028238,243.359741,246.287003,102827.625000,101989.773438,0.0,0.387695,0.000286,8.639994e-11
2020-01-01,71.4,-178.10,6.363186,0,-6.020182,244.015915,246.945938,102803.562500,102291.562500,0.0,0.228210,0.000339,8.639994e-11
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-31,18.9,-68.10,-2.222520,0,1.156516,295.373871,298.954163,101698.164062,101727.101562,0.0,0.000000,0.000037,1.872556e+07
2020-01-31,18.9,-67.85,-2.143805,0,0.542787,295.267731,298.951813,101702.210938,101720.812500,0.0,0.000000,0.000033,1.887270e+07
2020-01-31,18.9,-67.60,-1.916490,0,-0.041726,295.137451,298.910614,101705.289062,101703.273438,0.0,0.000000,0.000269,1.879559e+07
2020-01-31,18.9,-67.35,-1.743883,0,-0.479307,294.999603,298.997864,101707.125000,101695.351562,0.0,0.000000,0.000269,1.854739e+07


In [41]:
era5_play.describe()

Unnamed: 0,number,u10,v10,d2m,t2m,msl,sp,lai_hv,lai_lv,tp,ssr
count,70485816.0,70485820.0,70485820.0,70485820.0,70485820.0,70485820.0,70485820.0,70485820.0,70485820.0,70485820.0,70485820.0
mean,0.0,0.4598129,-0.3319026,268.5918,272.6914,101655.4,98488.03,0.7406679,0.4847267,9.558734e-05,220292.2
std,0.0,5.077495,4.308384,88.31491,88.37498,45217.23,45217.23,1.036144,0.6659786,0.0003393506,449240.5
min,0.0,-23.29198,-23.68954,211.234,214.9206,95492.19,66116.31,0.0,0.0,0.0,3.599998e-12
25%,0.0,-2.554642,-3.084194,253.7292,256.5464,101021.0,97547.69,0.0,0.0,0.0,3.599998e-12
50%,0.0,0.3912659,-0.3978004,271.1414,276.1951,101772.9,100601.1,0.0,0.0,4.768372e-06,3.599999e-12
75%,0.0,3.6418,2.215988,284.4717,288.7805,102370.6,101900.7,1.372681,0.7565918,4.36306e-05,132288.0
max,0.0,26.74963,22.9808,299.554,309.3477,105330.7,104505.2,6.454468,3.974731,0.02860641,3063168.0


## Process the fire perimeter data

In [12]:
# Load CSV into DataFrame
wfigs_perims = pd.read_csv("..\\data\\raw_data\\WFIGS Current Interagency Fire Perimeters\\WFIGS_Interagency_Perimeters_-3500393626074286023.csv")

  wfigs_perims = pd.read_csv("..\\data\\raw_data\\WFIGS Current Interagency Fire Perimeters\\WFIGS_Interagency_Perimeters_-3500393626074286023.csv")


In [None]:
# Pick the more accurate acreage field
# We'll prefer poly_Acres_AutoCalc if it exists and is not null, else poly_GISAcres
wfigs_perims['used_acres'] = wfigs_perims['poly_Acres_AutoCalc'].fillna(wfigs_perims['poly_GISAcres'])

# Prepare data
wfigs_perims['poly_PolygonDateTime'] = pd.to_datetime(wfigs_perims['poly_PolygonDateTime'], errors='coerce')
wfigs_perims_2024 = wfigs_perims[wfigs_perims['poly_PolygonDateTime'].dt.year == 2024].copy()
wfigs_perims_2024 = wfigs_perims_2024.dropna(subset=['attr_InitialLatitude', 'attr_InitialLongitude', 'used_acres'])

# Create GeoDataFrame in EPSG:4326 (lat/lon)
geometry = [Point(xy) for xy in zip(wfigs_perims_2024['attr_InitialLongitude'], wfigs_perims_2024['attr_InitialLatitude'])]
gdf_fires = gpd.GeoDataFrame(wfigs_perims_2024, geometry=geometry, crs="EPSG:4326")

# Initialize folium map centered on continental USA
m = folium.Map(location=[39.5, -98.35], zoom_start=4, tiles='OpenStreetMap')

# Normalize radius so that 1 acre = radius that matches a reasonable size on screen
# We choose a scale factor to convert acres to pixels radius.
# Since area ~ acres, radius ~ sqrt(acres), so radius = scale * sqrt(acres)

max_acres = gdf_fires['used_acres'].max()
min_radius = 2   # Minimum radius in pixels
max_radius = 25  # Max radius in pixels

def scale_radius(acres):
    # sqrt scaling for radius proportionality to area (acres)
    # Normalize sqrt(acres) to max_radius range
    radius = min_radius + (math.sqrt(acres) / math.sqrt(max_acres)) * (max_radius - min_radius)
    return radius

# Add points to the map
for idx, row in gdf_fires.iterrows():
    folium.CircleMarker(
        location=[row['attr_InitialLatitude'], row['attr_InitialLongitude']],
        radius=scale_radius(row['used_acres']),
        color='red',
        fill=True,
        fill_opacity=0.6,
        popup=(
            f"Fire Name: {row.get('poly_FireName', 'N/A')}<br>"
            f"Acreage: {row['used_acres']:.1f} acres<br>"
            f"Date: {row['poly_PolygonDateTime'].date()}"
        )
    ).add_to(m)

# Save and display map
# m.save("wildfires_2024_usa_area_proportional.html")
m


In [None]:
# view the data
print(wfigs_perims.columns.tolist())
print(wfigs_perims.head())

['OBJECTID', 'poly_SourceOID', 'poly_IncidentName', 'poly_FeatureCategory', 'poly_MapMethod', 'poly_GISAcres', 'poly_DeleteThis', 'poly_FeatureAccess', 'poly_FeatureStatus', 'poly_IsVisible', 'poly_CreateDate', 'poly_DateCurrent', 'poly_PolygonDateTime', 'poly_IRWINID', 'poly_FORID', 'poly_Acres_AutoCalc', 'poly_SourceGlobalID', 'poly_Source', 'attr_SourceOID', 'attr_ABCDMisc', 'attr_ADSPermissionState', 'attr_CalculatedAcres', 'attr_ContainmentDateTime', 'attr_ControlDateTime', 'attr_CreatedBySystem', 'attr_IncidentSize', 'attr_DiscoveryAcres', 'attr_DispatchCenterID', 'attr_EstimatedCostToDate', 'attr_FinalAcres', 'attr_FFReportApprovedByTitle', 'attr_FFReportApprovedByUnit', 'attr_FFReportApprovedDate', 'attr_FireBehaviorGeneral', 'attr_FireBehaviorGeneral1', 'attr_FireBehaviorGeneral2', 'attr_FireBehaviorGeneral3', 'attr_FireCause', 'attr_FireCauseGeneral', 'attr_FireCauseSpecific', 'attr_FireCode', 'attr_FireDepartmentID', 'attr_FireDiscoveryDateTime', 'attr_FireMgmtComplexity', '

In [None]:
willow_fires = wfigs_perims[wfigs_perims["poly_IncidentName"] == "Willow"]
print(willow_fires["attr_UniqueFireIdentifier"].nunique())
print(willow_fires["attr_UniqueFireIdentifier"].unique())


26
['2021-AZSCA-001676' '2021-CAMDF-000577' '2021-CALPF-001493'
 '2020-AZTNF-002514' '2020-WYCOX-200312' '2022-MIHMF-220119'
 '2021-COSJF-001044' '2020-AZASF-000689' '2022-NVWID-020365'
 '2020-IDNCF-000069' '2020-IDBOD-000518' '2021-CACDD-006676'
 '2023-SDSDS-230189' '2023-MN3SS-000019' '2023-AKMID-000293'
 '2023-AZASF-000170' '2023-AZASF-000611' '2024-WIWIS-FIRE122391'
 '2024-WIWIS-FIRE121832' '2024-FLBCP-001015' '2024-AZTNF-001334'
 '2024-WVNRP-000359' '2025-WIWIS-FIRE123428' '2025-WIWIS-FIRE123459'
 '2025-NDFTA-000265' '2025-WIWIS-FIRE123935']


In [None]:
# Convert polygon datetime column to datetime type (adjust column name if needed)
wfigs_perims['date'] = pd.to_datetime(wfigs_perims['poly_PolygonDateTime'], errors='coerce').dt.date

willow_fires = wfigs_perims[wfigs_perims["poly_IncidentName"] == "Willow"]

unique_fire_ids = willow_fires["attr_UniqueFireIdentifier"].unique()

for fire_id in unique_fire_ids:
    print(f"--- Fire ID: {fire_id} ---")
    single_fire = willow_fires[willow_fires["attr_UniqueFireIdentifier"] == fire_id].sort_values(by="date")
    print(single_fire[["date", "poly_GISAcres"]])
    print("\n")



--- Fire ID: 2021-AZSCA-001676 ---
            date  poly_GISAcres
1253  2021-06-29            5.0


--- Fire ID: 2021-CAMDF-000577 ---
     date  poly_GISAcres
2802  NaT            NaN


--- Fire ID: 2021-CALPF-001493 ---
     date  poly_GISAcres
5426  NaT       2877.869


--- Fire ID: 2020-AZTNF-002514 ---
            date  poly_GISAcres
5727  2020-08-18            3.0


--- Fire ID: 2020-WYCOX-200312 ---
     date  poly_GISAcres
5918  NaT            NaN


--- Fire ID: 2022-MIHMF-220119 ---
            date  poly_GISAcres
8479  2022-08-05            0.1


--- Fire ID: 2021-COSJF-001044 ---
            date  poly_GISAcres
8860  2021-08-07            0.1


--- Fire ID: 2020-AZASF-000689 ---
            date  poly_GISAcres
9090  2021-01-05            NaN


--- Fire ID: 2022-NVWID-020365 ---
             date  poly_GISAcres
12083  2022-08-02           0.31


--- Fire ID: 2020-IDNCF-000069 ---
      date  poly_GISAcres
12117  NaT            NaN


--- Fire ID: 2020-IDBOD-000518 ---
       

In [None]:
# Step 1: Parse polygon datetime and extract date
wfigs_perims["poly_PolygonDateTime"] = wfigs_perims["date"] = wfigs_perims["poly_PolygonDateTime"].dt.datepd.to_datetime(wfigs_perims["poly_PolygonDateTime"], errors='coerce')


In [None]:
# Step 2: Sort by fire name, date, and descending acreage to keep largest per day
wfigs_perims_sorted = wfigs_perims.sort_values(
    by=["poly_IncidentName", "date", "poly_GISAcres"],
    ascending=[True, True, False]
)

In [None]:
# Step 3: Keep largest perimeter per fire per day
wfigs_perims_daily_largest = wfigs_perims_sorted.drop_duplicates(
    subset=["poly_IncidentName", "date"]
).copy()

In [None]:
# Step 4: Calculate daily growth in acres per fire
wfigs_perims_daily_largest["daily_growth_acres"] = wfigs_perims_daily_largest.groupby("poly_IncidentName")["poly_GISAcres"].diff().fillna(0)


In [61]:
# Step 5: Prepare output columns and sort
wfigs_perims_output = wfigs_perims_daily_largest[["poly_IncidentName", "date", "poly_GISAcres", "daily_growth_acres"]].rename(
    columns={"poly_GISAcres": "cumulative_acres"}
).sort_values(by=["poly_IncidentName", "date"]).reset_index(drop=True)

In [63]:
# Step 6: Print example output
print(wfigs_perims_output.head(10))

     poly_IncidentName        date  cumulative_acres  daily_growth_acres
0            \tMA07038  2022-12-14           0.39855                 0.0
1              0573 CS  2021-07-14           0.10000                 0.0
2   235 West Mullan Rd  2021-05-21               NaN                 0.0
3                 7637  2021-11-13          10.00000                 0.0
4          Anniversary  2022-08-30           0.10000                 0.0
5            Ashe Lake  2023-02-22           1.00000                 0.0
6             Brewster  2021-07-08          10.00000                 0.0
7    Bridger Foothills  2020-09-05           1.00000                 0.0
8              Bullard  2021-06-23           1.00000                 0.0
9            Burntside  2022-10-24           1.50000                 0.0


In [66]:
willow_fire = wfigs_perims_output[wfigs_perims_output["poly_IncidentName"] == "Willow"]
willow_fire_sorted = willow_fire.sort_values(by="date", ascending=True)
print(willow_fire_sorted.head(10))


      poly_IncidentName        date  cumulative_acres  daily_growth_acres
28324            Willow  2020-08-18            3.0000              0.0000
28325            Willow  2020-09-14          134.5707            131.5707
28326            Willow  2021-01-05               NaN              0.0000
28327            Willow  2021-04-21           31.7700              0.0000
28328            Willow  2021-06-29            5.0000            -26.7700
28329            Willow  2021-08-07            0.1000             -4.9000
28330            Willow  2022-08-02            0.3100              0.2100
28331            Willow  2022-08-05            0.1000             -0.2100
28332            Willow  2023-05-23            0.2000              0.1000
28333            Willow  2023-08-11            0.1000             -0.1000


In [53]:
import pandas as pd

# Parse the polygon datetime as datetime type
wfigs_perims['poly_PolygonDateTime'] = pd.to_datetime(wfigs_perims['poly_PolygonDateTime'], errors='coerce')

# Count unique dates per fire
fire_date_counts = wfigs_perims.groupby('poly_IncidentName')['poly_PolygonDateTime'].nunique().reset_index(name='unique_dates')

# Find fires with more than one date (multiple snapshots)
multiple_dates = fire_date_counts[fire_date_counts['unique_dates'] > 1]

print(f"Number of fires with multiple polygon dates: {len(multiple_dates)}")
print(multiple_dates)

# Sort by 'unique_dates' descending
fire_date_counts_sorted = fire_date_counts.sort_values(by='unique_dates', ascending=False)

print(fire_date_counts_sorted)  # Show top 10 fires with most polygon dates


Number of fires with multiple polygon dates: 2110
      poly_IncidentName  unique_dates
64               000003             2
65               000004             2
66               000005             3
70               000014             2
89               000067             2
...                 ...           ...
23776      Yellowjacket             2
23779              Yeti             2
23796             Young             2
23808             Yucca             2
23816              ZINK             2

[2110 rows x 2 columns]
          poly_IncidentName  unique_dates
23455                Willow            22
5856             Cottonwood            21
18400            Rock Creek            17
18208                 Ridge            16
16708                  Pine            16
...                     ...           ...
14415                Moffat             0
8889                 Gilman             0
19957  Shupac Lake Rd. Fire             0
14418               Mojave              0
23989  