## Part 2: Dataset Acquisition

This notebook is a streamlined version of the data exploration in Part 1 and mainly contains the following steps:

* Set up parameters for both data acquisition and image exportation.
* Filter fire events based on arces calcualted by GIS  (100 ~ 10,000 arces)
* Search the region bounds and pad the image to an identical output size of 224 * 224 pixels
* Acquire pre- and post-fire event satellite images based on the time window (60 days before fire and 30 days after the fire).
* Fill the mask (1 stands for burned area) with fire perimeters using region geometry.
* Select bands from the Sentinel-2 collection (RGB, near infrared and short-wavelength infrared).
* Compute the Normalized Burn Ratio for the pre- and post-fire dates.
* Calculate the NBR difference by subtracting the NBR from the pre-fire state by the post-fire state.
* Export the pre- and post-fire satellite imagery with five bands (RGB, near infrared, short-wavelength infrared), the fire mask and te NBR difference.

In [None]:
import pandas as pd
import geopandas as gpd
import ee, geemap

In [2]:
ee.Authenticate()
ee.Initialize()

In [None]:
### ==============================================================================================
### Setup parameters
### ==============================================================================================
# Parameters for data acquisition
CLOUD_THRESHOLD = 20
PRE_FIRE_DAYS = 60  # Days before fire to include
POST_FIRE_DAYS = 30  # Days after containment to include


# Parameters for export
TARGET_PIXELS = 224
SCALE = 20  # Meters per pixel
# Approximate meters per degree at equator
# We prefer to large image for cropping
METERS_PER_DEGREE = 111320 

In [None]:
### ==============================================================================================
### Read GeoJSON file and preprocessing
### ==============================================================================================
in_file = f"../../data/California_Historic_Fire_Perimeters.geojson"
fire_gdf = gpd.read_file(in_file)
fire_df = pd.DataFrame(fire_gdf)
fire_df["ALARM_DATE_ISO"] = pd.to_datetime(fire_df.ALARM_DATE, format='%a, %d %b %Y %H:%M:%S %Z').dt.tz_convert('UTC').dt.strftime('%Y-%m-%dT%H:%M:%SZ')
fire_df["CONT_DATE_ISO"]  = pd.to_datetime(fire_df.CONT_DATE, format='%a, %d %b %Y %H:%M:%S %Z').dt.tz_convert('UTC').dt.strftime('%Y-%m-%dT%H:%M:%SZ')

In [None]:
### ==============================================================================================
### Filter fire events
### ==============================================================================================
medium_fires = fire_df[
    (fire_df.GIS_ACRES < 10000) & 
    (fire_df.GIS_ACRES > 1000) & 
    (fire_df.DECADES == "2020-January 2025") &
    (fire_df.ALARM_DATE_ISO.notna()) &
    (fire_df.CONT_DATE_ISO.notna()) ]

In [11]:
small_fires = fire_df[
    (fire_df.GIS_ACRES < 1000) & 
    (fire_df.GIS_ACRES > 100) & 
    (fire_df.DECADES == "2020-January 2025") &
    (fire_df.ALARM_DATE_ISO.notna()) &
    (fire_df.CONT_DATE_ISO.notna()) ]

In [13]:
fire_events = pd.concat([small_fires, medium_fires], axis=0)
len(fire_events)

494

In [None]:
### ==============================================================================================
### Add metadata for the chosen region
### ==============================================================================================
analysis_regions = []
for idx, fire in fire_events.iterrows():
    fire_geom = fire.geometry
    analysis_regions.append({
        'name': fire.FIRE_NAME,
        'geometry': fire_geom,
        'alarm_date': fire.ALARM_DATE_ISO,
        'containment_date': fire.CONT_DATE_ISO,
        'acres_burned': fire.GIS_ACRES
    })

In [None]:
def expand_region(region_geom):
    """Expand region bounds if smaller than target crop size in meters."""
    # Extract the coordinates and bounds
    bounds = region_geom.bounds()
    coord = bounds.coordinates().getInfo()[0]
    xmin, ymin = coord[0]
    xmax, ymax = coord[2]

    # Compute the image size
    width_m = (xmax - xmin) * METERS_PER_DEGREE
    height_m = (ymax - ymin) * METERS_PER_DEGREE
    min_size_m = SCALE * TARGET_PIXELS

    # Compute padding if the export image will smaller than target
    pad_x = max(0, (min_size_m - width_m) / METERS_PER_DEGREE / 2)
    pad_y = max(0, (min_size_m - height_m) / METERS_PER_DEGREE / 2)

    # Expand the bounds
    new_bounds = ee.Geometry.Rectangle([xmin - pad_x, ymin - pad_y, xmax + pad_x, ymax + pad_y])
    return new_bounds

In [16]:
output_folder = f"dataset"

In [None]:
### ==============================================================================================
### Run and export image to Googel Drive
### ==============================================================================================
for region in analysis_regions:
    region_gdf = gpd.GeoDataFrame([region], geometry='geometry', crs='EPSG:4326')
    region_ee = geemap.geopandas_to_ee(region_gdf)
    region_geom = region_ee.geometry()
    print(f"Processing region: {region['name']}")

    # Calculate time window for imagery using ISO
    pre_fire_start = ee.Date(region['alarm_date']).advance(-PRE_FIRE_DAYS, 'day')
    post_fire_end = ee.Date(region['containment_date']).advance(POST_FIRE_DAYS, 'day')

    # Get Sentinel-2 collection for this region and time period
    expand_regions = expand_region(region_geom=region_geom)
    collection = (
        ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
        .filterBounds(expand_regions)
        .filterDate(pre_fire_start, post_fire_end)
        .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', CLOUD_THRESHOLD))
        .select(['B2', 'B3', 'B4', 'B8', 'B11', 'B12'])
    )
    # Create pre-fire and post-fire composites
    # Clip to expand regions to ensure the minimum crop size
    pre_fire_img = collection.filterDate(pre_fire_start, ee.Date(region['alarm_date'])).median().clip(expand_regions)
    post_fire_img = collection.filterDate(ee.Date(region['containment_date']), post_fire_end).median().clip(expand_regions)

    # Calculate NBR
    pre_fire_nbr = pre_fire_img.normalizedDifference(['B8', 'B12']).rename('NBR')
    post_fire_nbr = post_fire_img.normalizedDifference(['B8', 'B12']).rename('NBR')
    dnbr = pre_fire_nbr.subtract(post_fire_nbr).rename('dNBR')

    # Create fire mask
    fire_mask = ee.Image(0).byte().paint(region_geom, 1).rename("mask").clip(expand_regions)

    # Combine all bands into separate images for smaller exports
    clean_name = region['name'].replace(' ', '_').replace('/', '_')
    alarm_date_short = region['alarm_date'][:10].replace('-', '')

    # Export Pre-fire image 
    pre_fire_visual = pre_fire_img.select(['B2', 'B3', 'B4', 'B8', 'B12'])  
    task1 = ee.batch.Export.image.toDrive(
        image=pre_fire_visual,
        description=f'{clean_name}_{alarm_date_short}',
        folder=f'{output_folder}/pre_fire',
        scale=20,
        region=expand_regions,
        maxPixels=1e10,
        fileFormat='GeoTIFF'
    )
    task1.start()

    # Export Post-fire image 
    post_fire_visual = post_fire_img.select(['B2', 'B3', 'B4', 'B8', 'B12'])  
    task2 = ee.batch.Export.image.toDrive(
        image=post_fire_visual,
        description=f'{clean_name}_{alarm_date_short}',
        folder=f'{output_folder}/post_fire',
        scale=20,
        region=expand_regions,
        maxPixels=1e10,
        fileFormat='GeoTIFF'
    )
    task2.start()

    # Export Fire mask
    task3 = ee.batch.Export.image.toDrive(
        image=fire_mask,
        description=f'{clean_name}_{alarm_date_short}',
        folder=f'{output_folder}/masks',
        scale=20,
        region=expand_regions,
        maxPixels=1e10,
        fileFormat='GeoTIFF'
    )
    task3.start()

    # Export dNBR
    task4 = ee.batch.Export.image.toDrive(
        image=dnbr,
        description=f'{clean_name}_{alarm_date_short}',
        folder=f'{output_folder}/dnbr',
        scale=20,
        region=expand_regions,
        maxPixels=1e10,
        fileFormat='GeoTIFF'
    )
    task4.start()