The Notebook explores and showcases Global Wildfire events

In [1]:
import os
import glob
import pandas as pd
import geopandas as gpd
import dask_geopandas as dgpd
import dask.dataframe as dd


In [2]:
wildfire_path = "/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023"
shapefile_paths = glob.glob(os.path.join(wildfire_path, "*.shp"))

filtered_shapefiles = [
    path for path in shapefile_paths
    if 2014 <= int(os.path.basename(path).split('_')[-1].split('.')[0]) <= 2023
]

print(filtered_shapefiles)
filtered_shapefiles

['/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2022.shp', '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2021.shp', '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2020.shp', '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2019.shp', '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2018.shp', '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2023.shp', '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2015.shp', '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2016.shp', '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2014.shp', '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire

['/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2022.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2021.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2020.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2019.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2018.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2023.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2015.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2016.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original_globfire_filtered_2014.shp',
 '/workspace/_output/GLOBFIRE_burned_area_full_dataset_2002_2023/original

In [3]:
list_of_ddfs = [dgpd.read_file(shp, chunksize=2048) for shp in shapefile_paths]
dask_gdf = dgpd.from_dask_dataframe(dd.concat(list_of_ddfs))
print(dask_gdf.head())

   _uid_         id initialdat  finaldate     area_ha  \
0    1.0  8592263.0 2008-01-12 2008-01-12   21.402765   
1    2.0  8592264.0 2008-01-13 2008-01-13   21.402738   
2    3.0  8592129.0 2008-01-02 2008-01-02   21.445638   
3    4.0  8592130.0 2008-01-07 2008-01-07   21.445601   
4    5.0  8592131.0 2008-01-03 2008-01-03  128.669714   

                                            geometry  
0  POLYGON ((-102.64936 19.79583, -102.64493 19.7...  
1  POLYGON ((-102.63513 19.7875, -102.6307 19.787...  
2  POLYGON ((-112.96276 30.91667, -112.9579 30.91...  
3  POLYGON ((-112.95292 30.90833, -112.94806 30.9...  
4  POLYGON ((-112.48573 30.77917, -112.47603 30.7...  


In [4]:
df = dask_gdf.compute()
print(df.info())

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 23726360 entries, 0 to 1016865
Data columns (total 6 columns):
 #   Column      Dtype         
---  ------      -----         
 0   _uid_       float64       
 1   id          float64       
 2   initialdat  datetime64[ms]
 3   finaldate   datetime64[ms]
 4   area_ha     float64       
 5   geometry    geometry      
dtypes: datetime64[ms](2), float64(3), geometry(1)
memory usage: 1.2 GB
None


In [12]:
df['initialdat'] = pd.to_datetime(df['initialdat'])
filtered_df = df[(df['initialdat'].dt.year >= 2014) & (df['initialdat'].dt.year <= 2023)]
filtered_df.sample(10)

Unnamed: 0,_uid_,id,initialdat,finaldate,area_ha,geometry
769608,769609.0,16731114.0,2014-09-24,2014-10-01,171.19625,"POLYGON ((32.92902 -18.7625, 32.92983 -18.7666..."
535424,535425.0,22830737.0,2020-01-02,2020-01-04,42.755015,"POLYGON ((-9.12184 9.375, -9.11761 9.375, -9.1..."
431114,431115.0,20985694.0,2018-06-25,2018-06-30,833.675194,"POLYGON ((18.88025 -8.54167, 18.87604 -8.54167..."
83005,83006.0,20638839.0,2018-01-12,2018-01-12,106.879719,"POLYGON ((0.80011 8.32917, 0.80853 8.32917, 0...."
211806,211807.0,20241563.0,2017-09-01,2017-09-12,1689.938508,"POLYGON ((19.14778 -15.975, 19.14345 -15.975, ..."
378581,378582.0,24389610.0,2021-06-24,2021-06-24,21.38459,"POLYGON ((17.39316 -13.08333, 17.39744 -13.083..."
874235,874236.0,21503776.0,2018-11-09,2018-11-09,21.371943,"POLYGON ((-45.48201 -4.825, -45.47783 -4.825, ..."
163141,163142.0,17350869.0,2015-04-15,2015-04-15,42.937925,"POLYGON ((43.46777 35.97083, 43.47807 35.97083..."
216225,216226.0,16112646.0,2014-03-15,2014-03-20,213.794294,"MULTIPOLYGON (((34.51309 10.51667, 34.52156 10..."
97797,97798.0,23547125.0,2020-08-08,2020-08-08,21.374503,"POLYGON ((25.34547 -7.27083, 25.34967 -7.27083..."


In [13]:
print("Total number of burn scar events between 2014 and 2023:", len(filtered_df))

Total number of burn scar events between 2014 and 2023: 10123583
