# Machine Learning for Assessing Brush Fire Risk in The United States

## Import required packages

In [1]:
# !pip install geopandas shapely
#%pip install xarray
#%pip install zarr
#%pip install fsspec
#%pip install cartopy
#%pip install netCDF4
#%pip install scipy
#%pip install bottleneck
#%pip install gcsfs

In [2]:
#Importing required packages
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import os
import re
import csv
import xarray as xr
import zarr
import fsspec
import cartopy.crs as ccrs
import glob as glob
import netCDF4 as nc
from netCDF4 import Dataset
from scipy.stats import skew,stats
import bottleneck
import gcsfs
import matplotlib.ticker as mticker
import warnings
warnings.filterwarnings("ignore") 

# import geopandas as gpd
# from shapely.geometry import Point

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

## CMIP6 Data

#### burntFractionAll

In [3]:
#Loading CMIP6 data stored on google cloud
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')

In [11]:
df_burntFractionAll = df.query("variable_id == 'burntFractionAll' & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2'")

df_burntFractionAll

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
44114,CMIP,CNRM-CERFACS,CNRM-ESM2-1,historical,r1i1p1f2,Lmon,burntFractionAll,gr,gs://cmip6/CMIP6/CMIP/CNRM-CERFACS/CNRM-ESM2-1...,,20181206
53402,CMIP,CNRM-CERFACS,CNRM-ESM2-1,esm-hist,r1i1p1f2,Lmon,burntFractionAll,gr,gs://cmip6/CMIP6/CMIP/CNRM-CERFACS/CNRM-ESM2-1...,,20190215
68560,ScenarioMIP,CNRM-CERFACS,CNRM-ESM2-1,ssp245,r1i1p1f2,Lmon,burntFractionAll,gr,gs://cmip6/CMIP6/ScenarioMIP/CNRM-CERFACS/CNRM...,,20190328
376059,ScenarioMIP,CNRM-CERFACS,CNRM-ESM2-1,ssp585,r1i1p1f2,Lmon,burntFractionAll,gr,gs://cmip6/CMIP6/ScenarioMIP/CNRM-CERFACS/CNRM...,,20191021


In [12]:
burntFractionAll_store_present = df_burntFractionAll.zstore.values[0]
print(burntFractionAll_store_present)
mapper = fsspec.get_mapper(burntFractionAll_store_present)
burntFractionAll_present = xr.open_zarr(mapper, consolidated=True)

burntFractionAll_present

gs://cmip6/CMIP6/CMIP/CNRM-CERFACS/CNRM-ESM2-1/historical/r1i1p1f2/Lmon/burntFractionAll/gr/v20181206/


Unnamed: 0,Array,Chunk
Bytes,30.94 kiB,30.94 kiB
Shape,"(1980, 2)","(1980, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 30.94 kiB 30.94 kiB Shape (1980, 2) (1980, 2) Dask graph 1 chunks in 2 graph layers Data type datetime64[ns] numpy.ndarray",2  1980,

Unnamed: 0,Array,Chunk
Bytes,30.94 kiB,30.94 kiB
Shape,"(1980, 2)","(1980, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,247.50 MiB,75.00 MiB
Shape,"(1980, 128, 256)","(600, 128, 256)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 247.50 MiB 75.00 MiB Shape (1980, 128, 256) (600, 128, 256) Dask graph 4 chunks in 2 graph layers Data type float32 numpy.ndarray",256  128  1980,

Unnamed: 0,Array,Chunk
Bytes,247.50 MiB,75.00 MiB
Shape,"(1980, 128, 256)","(600, 128, 256)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [13]:
min_value = burntFractionAll_present['burntFractionAll'].min()
max_value = burntFractionAll_present['burntFractionAll'].max()

print(f"Minimum burntFractionAll: {min_value.values}")
print(f"Maximum burntFractionAll: {max_value.values}")

Minimum burntFractionAll: 0.0
Maximum burntFractionAll: 1.2239598035812378


In [14]:
# Define a threshold for burnt fraction to classify as fire
fire_threshold = 0.3

# Label the data
burntFractionAll_present['fire_label'] = (burntFractionAll_present['burntFractionAll'] > fire_threshold).astype(int)

In [15]:
# Convert the DataArray to a pandas DataFrame
label_df = burntFractionAll_present['fire_label'].to_dataframe()

# Use value_counts on the DataFrame
label_counts = label_df['fire_label'].value_counts()

# Display the value counts
print(label_counts)

0    64708175
1      172465
Name: fire_label, dtype: int64


In [16]:
burntFractionAll_combined = burntFractionAll_present.fire_label
burntFractionAll_df = burntFractionAll_combined.to_dataframe()
burntFractionAll_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,type,fire_label
time,lat,lon,Unnamed: 3_level_1,Unnamed: 4_level_1
1850-01-16 12:00:00,-88.927735,0.00000,b'typeburnt',0
1850-01-16 12:00:00,-88.927735,1.40625,b'typeburnt',0
1850-01-16 12:00:00,-88.927735,2.81250,b'typeburnt',0
1850-01-16 12:00:00,-88.927735,4.21875,b'typeburnt',0
1850-01-16 12:00:00,-88.927735,5.62500,b'typeburnt',0
...,...,...,...,...
2014-12-16 12:00:00,88.927735,352.96875,b'typeburnt',0
2014-12-16 12:00:00,88.927735,354.37500,b'typeburnt',0
2014-12-16 12:00:00,88.927735,355.78125,b'typeburnt',0
2014-12-16 12:00:00,88.927735,357.18750,b'typeburnt',0


#### pr

In [9]:
df_pr = df.query("variable_id == 'pr' & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & experiment_id == 'historical' & table_id == 'Amon'")

pr_store_present = df_pr.zstore.values[0]

mapper = fsspec.get_mapper(pr_store_present)
pr_present = xr.open_zarr(mapper, consolidated=True)

pr_combined = pr_present.pr
pr_df = pr_combined.to_dataframe()
pr_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pr
time,lat,lon,Unnamed: 3_level_1
1850-01-16 12:00:00,-88.927735,0.00000,0.000001
1850-01-16 12:00:00,-88.927735,1.40625,0.000001
1850-01-16 12:00:00,-88.927735,2.81250,0.000001
1850-01-16 12:00:00,-88.927735,4.21875,0.000001
1850-01-16 12:00:00,-88.927735,5.62500,0.000001
...,...,...,...
2014-12-16 12:00:00,88.927735,352.96875,0.000002
2014-12-16 12:00:00,88.927735,354.37500,0.000002
2014-12-16 12:00:00,88.927735,355.78125,0.000002
2014-12-16 12:00:00,88.927735,357.18750,0.000002


#### sfcWind

In [8]:
df_sfcWind = df.query("variable_id == 'sfcWind' & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & experiment_id == 'historical' & table_id == 'Amon'")

df_sfcWind

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
44186,CMIP,CNRM-CERFACS,CNRM-ESM2-1,historical,r1i1p1f2,Amon,sfcWind,gr,gs://cmip6/CMIP6/CMIP/CNRM-CERFACS/CNRM-ESM2-1...,,20181206


In [10]:
sfcWind_store_present = df_sfcWind.zstore.values[0]
mapper = fsspec.get_mapper(sfcWind_store_present)
sfcWind_present = xr.open_zarr(mapper, consolidated=True)

sfcWind_combined = sfcWind_present.sfcWind
sfcWind_df = sfcWind_combined.to_dataframe()
sfcWind_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,height,sfcWind
time,lat,lon,Unnamed: 3_level_1,Unnamed: 4_level_1
1850-01-16 12:00:00,-88.927735,0.00000,10.0,3.439427
1850-01-16 12:00:00,-88.927735,1.40625,10.0,3.439427
1850-01-16 12:00:00,-88.927735,2.81250,10.0,3.439427
1850-01-16 12:00:00,-88.927735,4.21875,10.0,3.439427
1850-01-16 12:00:00,-88.927735,5.62500,10.0,3.439427
...,...,...,...,...
2014-12-16 12:00:00,88.927735,352.96875,10.0,6.243606
2014-12-16 12:00:00,88.927735,354.37500,10.0,6.243606
2014-12-16 12:00:00,88.927735,355.78125,10.0,6.243606
2014-12-16 12:00:00,88.927735,357.18750,10.0,6.243606


#### hur

In [17]:
df_hur = df.query("variable_id == 'hurs' & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & experiment_id == 'historical' & table_id == 'Amon'")

df_hur

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
44259,CMIP,CNRM-CERFACS,CNRM-ESM2-1,historical,r1i1p1f2,Amon,hurs,gr,gs://cmip6/CMIP6/CMIP/CNRM-CERFACS/CNRM-ESM2-1...,,20181206


In [18]:
hur_store_present = df_hur.zstore.values[0]
print(hur_store_present)
mapper_hur = fsspec.get_mapper(hur_store_present)
hur_present = xr.open_zarr(mapper_hur, consolidated=True)

hur_present

gs://cmip6/CMIP6/CMIP/CNRM-CERFACS/CNRM-ESM2-1/historical/r1i1p1f2/Amon/hurs/gr/v20181206/


Unnamed: 0,Array,Chunk
Bytes,30.94 kiB,30.94 kiB
Shape,"(1980, 2)","(1980, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray
"Array Chunk Bytes 30.94 kiB 30.94 kiB Shape (1980, 2) (1980, 2) Dask graph 1 chunks in 2 graph layers Data type datetime64[ns] numpy.ndarray",2  1980,

Unnamed: 0,Array,Chunk
Bytes,30.94 kiB,30.94 kiB
Shape,"(1980, 2)","(1980, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,datetime64[ns] numpy.ndarray,datetime64[ns] numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,247.50 MiB,75.00 MiB
Shape,"(1980, 128, 256)","(600, 128, 256)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 247.50 MiB 75.00 MiB Shape (1980, 128, 256) (600, 128, 256) Dask graph 4 chunks in 2 graph layers Data type float32 numpy.ndarray",256  128  1980,

Unnamed: 0,Array,Chunk
Bytes,247.50 MiB,75.00 MiB
Shape,"(1980, 128, 256)","(600, 128, 256)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [20]:
hur_combined = hur_present.hurs

hur_df = hur_combined.to_dataframe()
hur_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,height,hurs
time,lat,lon,Unnamed: 3_level_1,Unnamed: 4_level_1
1850-01-16 12:00:00,-88.927735,0.00000,2.0,90.531197
1850-01-16 12:00:00,-88.927735,1.40625,2.0,90.531197
1850-01-16 12:00:00,-88.927735,2.81250,2.0,90.531197
1850-01-16 12:00:00,-88.927735,4.21875,2.0,90.531197
1850-01-16 12:00:00,-88.927735,5.62500,2.0,90.531197
...,...,...,...,...
2014-12-16 12:00:00,88.927735,352.96875,2.0,89.186096
2014-12-16 12:00:00,88.927735,354.37500,2.0,89.186096
2014-12-16 12:00:00,88.927735,355.78125,2.0,89.186096
2014-12-16 12:00:00,88.927735,357.18750,2.0,89.186096


#### ta

In [23]:
df_ta = df.query("variable_id == 'tas' & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & experiment_id == 'historical' & table_id == 'Amon'")

ta_store_present = df_ta.zstore.values[0]
print(ta_store_present)
mapper = fsspec.get_mapper(ta_store_present)
ta_present = xr.open_zarr(mapper, consolidated=True)

ta_combined = ta_present.tas

ta_df = ta_combined.to_dataframe()
ta_df


gs://cmip6/CMIP6/CMIP/CNRM-CERFACS/CNRM-ESM2-1/historical/r1i1p1f2/Amon/tas/gr/v20181206/


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,height,tas
time,lat,lon,Unnamed: 3_level_1,Unnamed: 4_level_1
1850-01-16 12:00:00,-88.927735,0.00000,2.0,242.565491
1850-01-16 12:00:00,-88.927735,1.40625,2.0,242.565491
1850-01-16 12:00:00,-88.927735,2.81250,2.0,242.565491
1850-01-16 12:00:00,-88.927735,4.21875,2.0,242.565491
1850-01-16 12:00:00,-88.927735,5.62500,2.0,242.565491
...,...,...,...,...
2014-12-16 12:00:00,88.927735,352.96875,2.0,248.361481
2014-12-16 12:00:00,88.927735,354.37500,2.0,248.361481
2014-12-16 12:00:00,88.927735,355.78125,2.0,248.361481
2014-12-16 12:00:00,88.927735,357.18750,2.0,248.361481


#### Combining Datasets