<a href="https://colab.research.google.com/github/SashaNasonova/burnSeverity/blob/main/BARC_ImageSearch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Burn Severity Mapping Notebook - Image Search
This notebook is intended to be used for small scale, interactive burn severity mapping of individual fires in conjunction with the BARC_SBS notebook. For large scale semi-automated mapping please refer to the main python scripts (https://github.com/SashaNasonova/burnSeverity).

This notebook checks for all available pre- and post-fire imagery for a fire perimeter (user-defined or downloaded from BC Data Catalogue) and calculate the

Steps:
1. Clone repository, install packages, and define functions
2. Authenticate Google Earth Engine and initialize a Google Cloud project
3. Import fire perimeters from https://pub.data.gov.bc.ca/datasets/cdfc2d7b-c046-4bf0-90ac-4897232619e1/prot_current_fire_polys.zip or upload your own perimeters in shapefile format with auxiliary files as well ('/content/perims.shp').
4. Select an individual fire number
5. Define which sensors to search
6. Search for pre-fire imagery by defining a time range (T1 and T2) and maximum scene cloud cover (e.g. less than 70%) and save outputs to a csv.
7. Visualize pre-fire imagery in table format
8. Search for post-fire imagery by defining a time range (T1 and T2) and maximum scene cloud cover (e.g. less than 70%) and save outputs to a csv.
9. Visualize post-fire imagery in a table

All outputs are saved to a folder named by date and time (e.g. /content/scene_availability_eval_2025-08-12_17-01-33). Image availability tables are named by fire number (e.g. /content/scene_availability_eval_2025-08-12_18-56-07/V71145_post_all_mosaicMetadata.csv).



In [None]:
# Clone github repository to be able to access the test data and provincial extent vector data
!git clone https://github.com/SashaNasonova/burnSeverity.git

In [None]:
# Install the libraries
%pip install geemap==0.32.1 #Stable version is (0.32.1) from 15-Jul-2024
%pip install pycrs rasterio python-pptx cartopy requests

In [None]:
# Import the libraries
import ee
import geemap
import os, json, shutil
import geopandas
from osgeo import gdal
from google.colab import files
import requests, zipfile
from datetime import datetime
import numpy as np
import pandas as pd
from pathlib import Path
import warnings


In [None]:
# Processing function
# firenumber: string, unique fire perimeter identifier from BC Wildfire
# dattypes: list of strings, one or more sensor types
# poly: ee.FeatureClass, fire perimeter
# T1: string, first date of search interval (eg. 2025-06-01 is June 1st, 2025)
# T2: string, last date of search interval, not inclusive
# dattype_info: dictionary, sensor types and associated information
# outfolder: string, output root folder
# timing: string, 'pre' or 'post'

def eval(firenumber=None,dattypes=None,poly=None,T1=None,T2=None,dattype_info=None,outfolder=None,timing=None):
  warnings.simplefilter(action='ignore', category=FutureWarning) #silencing future warnings
  def aoionly(img):
    return(img.updateMask(poly_mask))

  def getfiles(d,ext):
      paths = []
      for file in os.listdir(d):
          if file.endswith(ext):
              paths.append(os.path.join(d, file))
      return(paths)

  #Helper function must be nested within processing function
  def getDate(im):
      return(ee.Image(im).date().format("YYYY-MM-dd"))

  def getSceneIds(im):
      return(ee.Image(im).get('PRODUCT_ID'))

  def mosaicByDate(indate):
      d = ee.Date(indate)
      #print(d)
      im = col.filterBounds(poly).filterDate(d, d.advance(1, "day")).mosaic()
      #print(im)
      return(im.set("system:time_start", d.millis(), "system:index", d.format("YYYY-MM-dd")))

  def runDateMosaic(col_list):
      #get a list of unique dates within the list
      date_list = col_list.map(getDate).getInfo()
      udates = list(set(date_list))
      udates.sort()
      udates_ee = ee.List(udates)

      #mosaic images by unique date
      mosaic_imlist = udates_ee.map(mosaicByDate)
      return(ee.ImageCollection(mosaic_imlist))

  #Landsat cloud mask from metadata
  ## Check this!!!
  def get_cloud(img1):
      ### Change as of Oct 24, 2023: cloud shadow is too inaccurate, remove
      ### Though it is picking up topographic shadow. Questions!
      # Bits 3 and 4 are cloud and cloud shadow, respectively.
      #cloudShadowBitMask = (1 << 4)
      cloudBitMask = (1 << 3)
      # Get the pixel QA band.
      qa = img1.select('QA_PIXEL')
      #set both flags to 1
      #clouds = qa.bitwiseAnd(cloudBitMask).eq(0).And(qa.bitwiseAnd(cloudShadowBitMask).eq(0)).rename('cloudmsk')
      clouds = qa.bitwiseAnd(cloudBitMask).eq(0).rename('cloudmsk')
      return(img1.addBands(clouds))

  print('Evaluating',firenumber)
  df_list = []
  for dattype in dattypes:
    print(' -Searching',dattype)
    col = ee.ImageCollection(dattype_info[dattype]['collection_id']).map(aoionly).select(dattype_info[dattype]['bands'])
    cld_field = dattype_info[dattype]['cld_field']
    colfilt = col.filterDate(T1,T2).filterBounds(poly).filter(ee.Filter.lt(cld_field,cld))
    colfilt_list = colfilt.toList(10000)

    if colfilt_list.size().getInfo() == 0:
        print('   -Zero scenes were found for',dattype)
        continue

    # Create before mosaics
    mosaic_col = runDateMosaic(colfilt_list)

    # Ask server for individual scene metadata
    metadata = colfilt.getInfo()

    # Turn metadata into table format
    features = metadata['features']

    out = []
    for i in features:
        d1 = pd.DataFrame([{'id':i['id']}])
        p1 = pd.DataFrame([i['properties']])
        t1 = d1.join(p1)
        out.append(t1)

    meta_df = pd.concat(out)

    def strDate(string):
        u_str = string.rsplit('_')[1].rsplit('T')[0]
        s = u_str[0:4] + '-' + u_str[4:6] + '-' + u_str[6:8]
        return(s)

    #add date column
    if dattype.startswith('S2'):
        meta_df['date'] = meta_df['DATATAKE_IDENTIFIER'].apply(strDate)
    else:
        meta_df['date'] = meta_df['DATE_ACQUIRED']

    #outpath = os.path.join(outfolder,firenumber+'_'+dattype+'_'+timing+'_sceneMetadata.csv')
    #meta_df.to_csv(outpath)

    #make a copy of meta_df
    meta_scenes = meta_df.copy()

    # Classify to get coverage and cloud extent, fix this to check if any bands are equal to 0
    def classify_extent(img1):
        if dattype.startswith('S2'):
            classes = img1.expression("((B2 + B3 + B4) !=0) ? 1 "
                                        ": 0",{'B2': img1.select('B2'),
                                              'B3': img1.select('B3'),
                                              'B4': img1.select('B4')}).rename('c').clip(poly)
        else:
            classes = img1.expression("((B2 + B3 + B4) !=0) ? 1 "
                                        ": 0",{'B2': img1.select('SR_B2'),
                                              'B3': img1.select('SR_B3'),
                                              'B4': img1.select('SR_B4')}).rename('c').clip(poly)
        return(classes)

    mosaic_extent = mosaic_col.map(classify_extent).toBands()

    def classify_cc(img1):
        if dattype.startswith('S2'):
            classes = img1.expression("(MSK_CLDPRB > 30) ? 1 "
                                ": 0",{'MSK_CLDPRB': img1.select('MSK_CLDPRB')}).rename('c').clip(poly)
        else:
            classes = img1.expression("(cloudmsk == 1) ? 0 "
                                ": 1",{'cloudmsk': img1.select('cloudmsk')}).rename('c').clip(poly)
        return(classes)

    if dattype.startswith('S2'):
        mosaic_cc = mosaic_col.map(classify_cc).toBands()
        aot = mosaic_col.select('AOT').toBands().divide(1000)
        reduced_mean_aot = aot.reduceRegion(reducer=ee.Reducer.mean(),geometry=poly.geometry(),maxPixels=100000000000,scale=30).getInfo()
    else:
        mosaic_cloudmsk = mosaic_col.map(get_cloud)
        mosaic_cc = mosaic_cloudmsk.map(classify_cc).toBands()

    #Calculate statistics, if the image is too big this may fail.
    #This step causes problems sometimes due to maxPixels limits
    reduced_sum = mosaic_extent.reduceRegion(reducer=ee.Reducer.sum(),geometry=poly.geometry(),maxPixels=100000000000,scale=30).getInfo()
    reduced_count = mosaic_extent.reduceRegion(reducer=ee.Reducer.count(),geometry=poly.geometry(),maxPixels=100000000000,scale=30).getInfo()

    reduced_sum_cc = mosaic_cc.reduceRegion(reducer=ee.Reducer.sum(),geometry=poly.geometry(),maxPixels=100000000000,scale=30).getInfo()
    reduced_count_cc = mosaic_cc.reduceRegion(reducer=ee.Reducer.count(),geometry=poly.geometry(),maxPixels=100000000000,scale=30).getInfo()

    print('   -Image statistics calculated')

    #Rearrange and calculate percent coverage and percent cloud cover
    #extent
    df_sum = pd.DataFrame([reduced_sum]).T
    df_sum.columns = ['sum']

    df_count = pd.DataFrame([reduced_count]).T
    df_count.columns = ['count']

    df_perc = df_sum.join(df_count)
    df_perc['percent_coverage'] = (df_perc['sum']/df_perc['count'])*100

    #cloud cover
    df_sum_cc = pd.DataFrame([reduced_sum_cc]).T
    df_sum_cc.columns = ['sum_cc']

    df_count_cc = pd.DataFrame([reduced_count_cc]).T
    df_count_cc.columns = ['count_cc']

    df_perc_cc = df_sum_cc.join(df_count_cc)
    df_perc_cc['percent_cc'] = (df_perc_cc['sum_cc']/df_perc_cc['count_cc'])*100
    #print(df_perc_cc)

    if dattype.startswith('S'):
        #aot
        df_mean_aot = pd.DataFrame([reduced_mean_aot]).T
        df_mean_aot.columns = ['mean_aot']

        #join extent and cc
        meta_df_ext_temp = df_perc.join(df_perc_cc)

        #get rid of cc suffix
        oldnames = meta_df_ext_temp.index
        newnames = [s.rsplit('_')[0] for s in oldnames]
        meta_df_ext_temp.index = newnames

        #get rid of aot suffix
        oldnames = df_mean_aot.index
        newnames = [s.rsplit('_')[0] for s in oldnames]
        df_mean_aot.index = newnames

        meta_df_ext = meta_df_ext_temp.join(df_mean_aot)
        #print(meta_df_ext)

    else:
        #join extent and cc
        meta_df_ext = df_perc.join(df_perc_cc)

        #get rid of cc suffix
        oldnames = meta_df_ext.index
        newnames = [s.rsplit('_')[0] for s in oldnames]
        meta_df_ext.index = newnames

    #get average scene cloud cover and join to mosaic metadata
    meta_scenes_cld = meta_scenes.groupby('date')[cld_field].mean()
    temp = pd.DataFrame(meta_scenes_cld)
    meta_scenes_cld = temp.rename(columns={'date':'date',cld_field:'percent_cc_scene'})

    meta_df_ext = meta_df_ext.join(meta_scenes_cld)
    meta_df_ext_out = meta_df_ext.copy().round(2).drop(columns=['sum','count','sum_cc','count_cc'])
    meta_df_ext_out['sensor'] = dattype

    if 'mean_aot' not in meta_df_ext_out.columns:
      meta_df_ext_out['mean_aot'] = None

    #outpath = os.path.join(outfolder,firenumber+'_'+dattype+'_'+timing+'_mosaicMetadata.csv')
    #meta_df_ext_out.to_csv(outpath)
    df_list.append(meta_df_ext_out)
    print('   -Dataframe prepared')

  #Output final dataframe and save
  df_all = pd.concat(df_list)
  outpath2 = os.path.join(outfolder,firenumber+'_'+timing+'_all_mosaicMetadata.csv')
  df_all.to_csv(outpath2)
  print('   -Final dataframe saved to',outpath2)
  return(df_all)

In [None]:
# Authenticate gee
ee.Authenticate()

In [None]:
# Initialize with a google cloud project
project = 'wlbr-2025'
ee.Initialize(project=project)

In [None]:
# Get fire perimeter file (either user defined) or pull from BC Wildfire
# Open fires shapefile if exists
fires_shp = '/content/perims.shp'
if os.path.exists(fires_shp):
  print('Using user specified perimeter file')
else:
  print('Downloading BC Wildfire current fire perimeter file')
  fires_shp = '/content/prot_current_fire_polys/prot_current_fire_polys.shp'

  url = 'https://pub.data.gov.bc.ca/datasets/cdfc2d7b-c046-4bf0-90ac-4897232619e1/prot_current_fire_polys.zip'
  response = requests.get(url)

  if response.status_code == 200:
      with open("prot_current_fire_poly.zip", 'wb') as file:
          file.write(response.content)
      print("File downloaded successfully")
  else:
      print(f"Failed to download file. Status code: {response.status_code}")

  with zipfile.ZipFile("prot_current_fire_poly.zip", 'r') as zip_ref:
      zip_ref.extractall('/content/prot_current_fire_polys')
  print('Fire perimeter file: ',fires_shp)

In [None]:
from google.colab import data_table
data_table.enable_dataframe_formatter()

# Visualize in table format
fires = geemap.shp_to_ee(fires_shp)
fires_df = geopandas.read_file(fires_shp)
fires_df_tbl = fires_df.drop(columns=['geometry'], axis=1, inplace=False)
#fires_df_tbl = fires_df_tbl[(fires_df_tbl['FIRE_STAT']=='Out') & (fires_df_tbl['FIRE_SZ_HA']>=100)] #uncomment for fires that are out and >= 100 ha
fires_df_tbl

In [None]:
# Select fire number
firenumber = 'V71498'

In [None]:
# Now select one fire (in the test data, there's only one fire perimeter)
fieldname = 'FIRE_NUM' #unique firenumber field, change if needed

# First check if the firenumber exists in the shapefile provided
firelist = fires_df[fieldname].tolist()

if firenumber not in firelist:
  print('Selected fire number:',firenumber)
  print('Available fire numbers: ',firelist)
  raise ValueError('Fire number not in fire list. Typo?')

# Create output folder
outfolder = 'scene_availability_eval_'+ datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
if not os.path.exists(outfolder):
  os.mkdir(outfolder)

# Save a copy of the fire perimeter
vector_folder = os.path.join(outfolder,'vectors')
if not os.path.exists(vector_folder):
  os.mkdir(vector_folder)

outshp = os.path.join(vector_folder,firenumber+'.shp')
fires_df_sub = fires_df[fires_df[fieldname]==firenumber]
fires_df_sub.to_file(outshp,driver='ESRI Shapefile')

# Load in the single perimeter
poly = geemap.shp_to_ee(outshp)

# Create raster mask to reduce extent of image collections
# Function aoionly in functions
poly_buf = poly.geometry().buffer(500).bounds()
poly_mask = ee.Image.constant(1).clip(poly_buf).selfMask()

In [None]:
# Define collections
dattype_info = {
    'S2': {
        'collection_id': 'COPERNICUS/S2_SR_HARMONIZED',
        'cld_field': 'CLOUDY_PIXEL_PERCENTAGE',
        'bands': ['B2','B3','B4','MSK_CLDPRB','AOT']
    },
    'L9': {
        'collection_id': 'LANDSAT/LC09/C02/T1_L2',
        'cld_field': 'CLOUD_COVER',
        'bands':['SR_B2','SR_B3','SR_B4','QA_PIXEL']
    },
    'L8': {
        'collection_id': 'LANDSAT/LC08/C02/T1_L2',
        'cld_field': 'CLOUD_COVER',
        'bands':['SR_B2','SR_B3','SR_B4','QA_PIXEL']
    },
    'L8_TOA': {
        'collection_id': 'LANDSAT/LC08/C02/T1_TOA',
        'cld_field': 'CLOUD_COVER',
        'bands':['B2','B3','B4','QA_PIXEL']
    },
    'L9_TOA': {
        'collection_id': 'LANDSAT/LC09/C02/T1_TOA',
        'cld_field': 'CLOUD_COVER',
        'bands':['B2','B3','B4','QA_PIXEL']
    }
}

In [None]:
# Select which we want to assess
dattypes = ['S2','L8','L9']

In [None]:
## Look for pre-fire imagery
timing = 'pre'
T1 = '2024-07-01'
T2 = '2024-09-01'
cld = 40
pre_df = eval(firenumber=firenumber,dattypes=dattypes,poly=poly,T1=T1,T2=T2,
              dattype_info=dattype_info,outfolder=outfolder,timing=timing)
pre_df

In [None]:
## Look for post-fire imagery
timing = 'post'
T1 = '2025-08-01'
T2 = '2025-09-01'
cld = 100
post_df = eval(firenumber=firenumber,dattypes=dattypes,poly=poly,T1=T1,T2=T2,
              dattype_info=dattype_info,outfolder=outfolder,timing=timing)
post_df

In [None]:
# Download folder with spreadsheets?
zipped = outfolder + '.zip'

!zip -r {zipped} {outfolder}
files.download(zipped)