# Machine Learning for Assessing Brush Fire Risk in The United States

## Import required packages

In [3]:
# !pip install geopandas shapely
#%pip install xarray
#%pip install zarr
#%pip install fsspec
#%pip install cartopy
#%pip install netCDF4
#%pip install scipy
#%pip install bottleneck
#%pip install gcsfs

In [40]:
#Importing required packages
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import os
import re
import csv
import xarray as xr
import zarr
import fsspec
import cartopy.crs as ccrs
import glob as glob
import netCDF4 as nc
from netCDF4 import Dataset
from scipy.stats import skew,stats
import bottleneck
import gcsfs
import matplotlib.ticker as mticker
import warnings
from sklearn.model_selection import train_test_split
warnings.filterwarnings("ignore") 

# import geopandas as gpd
# from shapely.geometry import Point

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

## CMIP6 Data

#### burntFractionAll

In [5]:
#Loading CMIP6 data stored on google cloud
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')

#subselect for surface temparature and the two experiments:
# df_pr = df.query("activity_id=='HighResMIP' & (variable_id == 'pr' ) & (experiment_id == 'highresSST-present'|experiment_id == 'highresSST-future') & source_id=='MRI-AGCM3-2-S' & table_id == 'Amon'")


df_burntFractionAll = df.query("variable_id == 'burntFractionAll'  & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & experiment_id == 'historical' & table_id == 'Lmon'")

df_burntFractionAll

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
44114,CMIP,CNRM-CERFACS,CNRM-ESM2-1,historical,r1i1p1f2,Lmon,burntFractionAll,gr,gs://cmip6/CMIP6/CMIP/CNRM-CERFACS/CNRM-ESM2-1...,,20181206


In [6]:
burntFractionAll_store_present = df_burntFractionAll.zstore.values[0]
mapper = fsspec.get_mapper(burntFractionAll_store_present)
burntFractionAll_present = xr.open_zarr(mapper, consolidated=True)

#burntFractionAll_present

In [7]:
burntFractionAll_store_present

'gs://cmip6/CMIP6/CMIP/CNRM-CERFACS/CNRM-ESM2-1/historical/r1i1p1f2/Lmon/burntFractionAll/gr/v20181206/'

In [8]:
min_value = burntFractionAll_present['burntFractionAll'].min()
max_value = burntFractionAll_present['burntFractionAll'].max()

print(f"Minimum burntFractionAll: {min_value.values}")
print(f"Maximum burntFractionAll: {max_value.values}")

Minimum burntFractionAll: 0.0
Maximum burntFractionAll: 1.2239598035812378


In [62]:
'''
# Define a threshold for burnt fraction to classify as fire
fire_threshold = 0.1

burntFractionAll_present['fire_label'] = (burntFractionAll_present['burntFractionAll'] > fire_threshold).astype(int)
bfa_combined = burntFractionAll_present.sel(time=slice('1940-01-16T12:00:00' , '1940-2-16T12:00:00'))
bfa_df = bfa_combined['fire_label'].to_dataframe()
bfa_df = bfa_df.drop(columns='type')
bfa_df
'''

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fire_label
time,lat,lon,Unnamed: 3_level_1
1940-01-16 12:00:00,-88.927735,0.00000,0
1940-01-16 12:00:00,-88.927735,1.40625,0
1940-01-16 12:00:00,-88.927735,2.81250,0
1940-01-16 12:00:00,-88.927735,4.21875,0
1940-01-16 12:00:00,-88.927735,5.62500,0
...,...,...,...
1940-02-15 12:00:00,88.927735,352.96875,0
1940-02-15 12:00:00,88.927735,354.37500,0
1940-02-15 12:00:00,88.927735,355.78125,0
1940-02-15 12:00:00,88.927735,357.18750,0


In [103]:

bfa_combined = burntFractionAll_present.sel(time=slice('1940-01-16T12:00:00' , '1940-2-16T12:00:00'))
bfa_df = bfa_combined.burntFractionAll.to_dataframe()
bfa_df = bfa_df.drop(columns='type')
#bfa_df
label_counts = bfa_df.value_counts()

# Display the value counts
print(label_counts)


burntFractionAll
0.000000            9432
0.000274             512
0.000164             182
0.000027              60
0.000274              26
                    ... 
0.000291               1
0.000291               1
0.000291               1
0.000290               1
0.754695               1
Length: 12548, dtype: int64


In [102]:

# Use value_counts on the DataFrame
label_counts = bfa_df['fire_label'].value_counts()

# Display the value counts
print(label_counts)

KeyError: 'fire_label'

#### pr

In [93]:
df_pr = df.query("variable_id == 'pr' & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & experiment_id == 'historical' & table_id == 'Amon'")

pr_store_present = df_pr.zstore.values[0]

#pr_store_future = df_pr.zstore.values[1]
mapper = fsspec.get_mapper(pr_store_present)
pr_present = xr.open_zarr(mapper, consolidated=True)
#pr_present_split = pr_present.sel(time=slice('2012-01-16T12:00:00' , '2014-12-16T12:00:00'))

#pr_combined = xr.concat([pr_present_split, pr_future_split], dim='time')
pr_combined = pr_present.pr.sel(time=slice('1940-01-16T12:00:00' , '1940-2-16T12:00:00'))
pr_df = pr_combined.to_dataframe()
pr_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pr
time,lat,lon,Unnamed: 3_level_1
1940-01-16 12:00:00,-88.927735,0.00000,4.780071e-07
1940-01-16 12:00:00,-88.927735,1.40625,4.780071e-07
1940-01-16 12:00:00,-88.927735,2.81250,4.780071e-07
1940-01-16 12:00:00,-88.927735,4.21875,4.780071e-07
1940-01-16 12:00:00,-88.927735,5.62500,4.780071e-07
...,...,...,...
1940-02-15 12:00:00,88.927735,352.96875,1.903327e-06
1940-02-15 12:00:00,88.927735,354.37500,1.903327e-06
1940-02-15 12:00:00,88.927735,355.78125,1.903327e-06
1940-02-15 12:00:00,88.927735,357.18750,1.903327e-06


#### sfcWind

In [33]:
df_sfcWind = df.query("variable_id == 'sfcWind' & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & experiment_id == 'historical' & table_id == 'Amon'")

sfcWind_store_present = df_sfcWind.zstore.values[0]
mapper = fsspec.get_mapper(sfcWind_store_present)
sfcWind_present = xr.open_zarr(mapper, consolidated=True)

#sfcWind_present_split = sfcWind_present.sel(time=slice('2012-01-16T12:00:00' , '2014-12-16T12:00:00'))
#sfcWind_present_split

#sfcWind_combined = xr.concat([sfcWind_present_split, sfcWind_future_split], dim='time')
sfcWind_combined = sfcWind_present.sfcWind.sel(time=slice('1940-01-16T12:00:00' , '1940-2-16T12:00:00'))
sfcWind_df = sfcWind_combined.to_dataframe()
sfcWind_df = sfcWind_df.drop(columns='height')
sfcWind_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,sfcWind
time,lat,lon,Unnamed: 3_level_1
1940-01-16 12:00:00,-88.927735,0.00000,2.963656
1940-01-16 12:00:00,-88.927735,1.40625,2.963656
1940-01-16 12:00:00,-88.927735,2.81250,2.963656
1940-01-16 12:00:00,-88.927735,4.21875,2.963656
1940-01-16 12:00:00,-88.927735,5.62500,2.963656
...,...,...,...
1940-02-15 12:00:00,88.927735,352.96875,6.793680
1940-02-15 12:00:00,88.927735,354.37500,6.793680
1940-02-15 12:00:00,88.927735,355.78125,6.793680
1940-02-15 12:00:00,88.927735,357.18750,6.793680


#### hur

In [77]:
df_hurs = df.query("variable_id == 'hurs' & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & experiment_id == 'historical' & table_id == 'Amon'")

hurs_store_present = df_hurs.zstore.values[0]
mapper = fsspec.get_mapper(hurs_store_present)
hurs_present = xr.open_zarr(mapper, consolidated=True)

hurs_combined = hurs_present.hurs.sel(time=slice('1940-01-16T12:00:00' , '1940-2-16T12:00:00'))

hurs_df = hurs_combined.to_dataframe()
hurs_df = hurs_df.drop(columns='height')
hurs_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,hurs
time,lat,lon,Unnamed: 3_level_1
1940-01-16 12:00:00,-88.927735,0.00000,94.292168
1940-01-16 12:00:00,-88.927735,1.40625,94.292168
1940-01-16 12:00:00,-88.927735,2.81250,94.292168
1940-01-16 12:00:00,-88.927735,4.21875,94.292168
1940-01-16 12:00:00,-88.927735,5.62500,94.292168
...,...,...,...
1940-02-15 12:00:00,88.927735,352.96875,74.388908
1940-02-15 12:00:00,88.927735,354.37500,74.388908
1940-02-15 12:00:00,88.927735,355.78125,74.388908
1940-02-15 12:00:00,88.927735,357.18750,74.388908


#### ta

In [14]:
df_tas = df.query("variable_id == 'tas' & source_id == 'CNRM-ESM2-1' & member_id == 'r1i1p1f2' & experiment_id == 'historical' & table_id == 'Amon'")

tas_store_present = df_tas.zstore.values[0]
mapper = fsspec.get_mapper(tas_store_present)
tas_present = xr.open_zarr(mapper, consolidated=True)

tas_combined = tas_present.tas.sel(time=slice('1940-01-16T12:00:00' , '1940-2-16T12:00:00'))

tas_df = tas_combined.to_dataframe()
tas_df


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,height,tas
time,lat,lon,Unnamed: 3_level_1,Unnamed: 4_level_1
1940-01-16 12:00:00,-88.927735,0.00000,2.0,243.316696
1940-01-16 12:00:00,-88.927735,1.40625,2.0,243.316696
1940-01-16 12:00:00,-88.927735,2.81250,2.0,243.316696
1940-01-16 12:00:00,-88.927735,4.21875,2.0,243.316696
1940-01-16 12:00:00,-88.927735,5.62500,2.0,243.316696
...,...,...,...,...
1940-02-15 12:00:00,88.927735,352.96875,2.0,236.381638
1940-02-15 12:00:00,88.927735,354.37500,2.0,236.381638
1940-02-15 12:00:00,88.927735,355.78125,2.0,236.381638
1940-02-15 12:00:00,88.927735,357.18750,2.0,236.381638


In [15]:
pr_store_present

'gs://cmip6/CMIP6/CMIP/CNRM-CERFACS/CNRM-ESM2-1/historical/r1i1p1f2/Amon/pr/gr/v20181206/'

In [16]:
sfcWind_store_present

'gs://cmip6/CMIP6/CMIP/CNRM-CERFACS/CNRM-ESM2-1/historical/r1i1p1f2/Amon/sfcWind/gr/v20181206/'

In [17]:
hur_store_present

'gs://cmip6/CMIP6/CMIP/CNRM-CERFACS/CNRM-ESM2-1/historical/r1i1p1f2/Amon/hur/gr/v20181206/'

In [18]:
tas_store_present

'gs://cmip6/CMIP6/CMIP/CNRM-CERFACS/CNRM-ESM2-1/historical/r1i1p1f2/Amon/tas/gr/v20181206/'

In [112]:
frames = [tas_df, sfcWind_df, hurs_df, pr_df, bfa_df]

result = pd.concat(frames, axis=1)
result = result.dropna() 
result

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,height,tas,sfcWind,hurs,pr,burntFractionAll
time,lat,lon,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1940-01-16 12:00:00,-88.927735,0.00000,2.0,243.316696,2.963656,94.292168,4.780071e-07,0.0
1940-01-16 12:00:00,-88.927735,1.40625,2.0,243.316696,2.963656,94.292168,4.780071e-07,0.0
1940-01-16 12:00:00,-88.927735,2.81250,2.0,243.316696,2.963656,94.292168,4.780071e-07,0.0
1940-01-16 12:00:00,-88.927735,4.21875,2.0,243.316696,2.963656,94.292168,4.780071e-07,0.0
1940-01-16 12:00:00,-88.927735,5.62500,2.0,243.316696,2.963656,94.292168,4.780071e-07,0.0
...,...,...,...,...,...,...,...,...
1940-02-15 12:00:00,83.342596,337.50000,2.0,235.123871,4.151989,63.164009,2.961700e-06,0.0
1940-02-15 12:00:00,83.342596,338.90625,2.0,235.123871,4.151989,63.164009,2.961700e-06,0.0
1940-02-15 12:00:00,83.342596,340.31250,2.0,235.123871,4.151989,63.164009,2.961700e-06,0.0
1940-02-15 12:00:00,83.342596,341.71875,2.0,235.123871,4.151989,63.164009,2.961700e-06,0.0


In [113]:
X = result.iloc[:, :-1] 
y = result.iloc[:, -1] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
'''
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
conf_matrix = confusion_matrix(y_test, predictions)

print("precision", precision_score(y_test, predictions))
print("accuracy", accuracy_score(y_test, predictions))
print("recall", recall_score(y_test, predictions))

# Extract values from the confusion matrix
tn, fp, fn, tp = conf_matrix.ravel()
print("tn:", tn)
print("tp:", tp)
print("fn:", fn)
print("fp:", fp)

print(predictions)
'''

'\nclf = tree.DecisionTreeClassifier()\nclf = clf.fit(X_train, y_train)\npredictions = clf.predict(X_test)\nconf_matrix = confusion_matrix(y_test, predictions)\n\nprint("precision", precision_score(y_test, predictions))\nprint("accuracy", accuracy_score(y_test, predictions))\nprint("recall", recall_score(y_test, predictions))\n\n# Extract values from the confusion matrix\ntn, fp, fn, tp = conf_matrix.ravel()\nprint("tn:", tn)\nprint("tp:", tp)\nprint("fn:", fn)\nprint("fp:", fp)\n\nprint(predictions)\n'

In [114]:

from sklearn.metrics import mean_squared_error
clf = tree.DecisionTreeRegressor()
clf = clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print(mse)
print(np.unique(predictions))

0.0017426428637326071
[0.00000000e+00 2.00719110e-12 1.62090585e-11 ... 5.58807254e-01
 5.75998485e-01 7.54694998e-01]
