The Notebook explores and showcases Global Wildfire events

In [1]:
import os
import glob
import pandas as pd
import geopandas as gpd
import dask_geopandas as dgpd
import dask.dataframe as dd


In [2]:
wildfire_path = "/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023"
shapefile_paths = glob.glob(os.path.join(wildfire_path, "*.shp"))

filtered_shapefiles = [
    path for path in shapefile_paths
    if 2014 <= int(os.path.basename(path).split('_')[-1].split('.')[0]) <= 2023
]

filtered_shapefiles

['/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2022.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2021.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2020.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2019.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2018.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2023.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2015.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2016.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2014.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original

In [3]:
list_of_ddfs = [dgpd.read_file(shp, chunksize=2048) for shp in filtered_shapefiles]
dask_gdf = dgpd.from_dask_dataframe(dd.concat(list_of_ddfs))
dask_gdf.head()

   _uid_          id initialdat  finaldate     area_ha  \
0    1.0  25688592.0 2022-08-23 2022-08-30  364.626801   
1    2.0  25688596.0 2022-08-26 2022-08-26   21.448625   
2    3.0  25688597.0 2022-08-19 2022-08-19   21.448625   
3    4.0  25688598.0 2022-08-12 2022-08-17   85.794575   
4    5.0  25688599.0 2022-08-28 2022-08-28   21.448625   

                                            geometry  
0  POLYGON ((28.06051 -31.58333, 28.06176 -31.587...  
1  POLYGON ((27.5127 -31.58333, 27.51759 -31.5833...  
2  POLYGON ((27.74258 -31.58333, 27.74747 -31.583...  
3  POLYGON ((29.09025 -31.59167, 29.09155 -31.595...  
4  POLYGON ((29.20504 -31.58333, 29.20993 -31.583...  


In [4]:
df = dask_gdf.compute()
df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 10123583 entries, 0 to 1016865
Data columns (total 6 columns):
 #   Column      Dtype         
---  ------      -----         
 0   _uid_       float64       
 1   id          float64       
 2   initialdat  datetime64[ms]
 3   finaldate   datetime64[ms]
 4   area_ha     float64       
 5   geometry    geometry      
dtypes: datetime64[ms](2), float64(3), geometry(1)
memory usage: 540.7 MB


In [6]:
df.head()

Unnamed: 0,_uid_,id,initialdat,finaldate,area_ha,geometry
0,1.0,25688592.0,2022-08-23,2022-08-30,364.626801,"POLYGON ((28.06051 -31.58333, 28.06176 -31.587..."
1,2.0,25688596.0,2022-08-26,2022-08-26,21.448625,"POLYGON ((27.5127 -31.58333, 27.51759 -31.5833..."
2,3.0,25688597.0,2022-08-19,2022-08-19,21.448625,"POLYGON ((27.74258 -31.58333, 27.74747 -31.583..."
3,4.0,25688598.0,2022-08-12,2022-08-17,85.794575,"POLYGON ((29.09025 -31.59167, 29.09155 -31.595..."
4,5.0,25688599.0,2022-08-28,2022-08-28,21.448625,"POLYGON ((29.20504 -31.58333, 29.20993 -31.583..."


In [7]:
print("Total number of burn scar events between 2014 and 2023:", len(df))

Total number of burn scar events between 2014 and 2023: 10123583
