# Machine Learning for Assessing Brush Fire Risk in The United States

## Import required packages

In [2]:
!pip install geopandas
!pip install reverse_geocoder

Collecting geopandas
  Obtaining dependency information for geopandas from https://files.pythonhosted.org/packages/27/27/2687abaa2ac02b5814e2929a5033da1e5d132c19a904dc56f77f63fd6eb9/geopandas-0.14.0-py3-none-any.whl.metadata
  Downloading geopandas-0.14.0-py3-none-any.whl.metadata (1.5 kB)
Collecting fiona>=1.8.21 (from geopandas)
  Obtaining dependency information for fiona>=1.8.21 from https://files.pythonhosted.org/packages/7f/27/b24c1610c7ae5716709321f04d38d7b8b71ed531f80df4f697b9ad99cfc3/fiona-1.9.5-cp311-cp311-win_amd64.whl.metadata
  Downloading fiona-1.9.5-cp311-cp311-win_amd64.whl.metadata (51 kB)
     ---------------------------------------- 0.0/51.1 kB ? eta -:--:--
     --------------- ---------------------- 20.5/51.1 kB 640.0 kB/s eta 0:00:01
     ------------------------------ ------- 41.0/51.1 kB 487.6 kB/s eta 0:00:01
     -------------------------------------- 51.1/51.1 kB 524.0 kB/s eta 0:00:00
Collecting click-plugins>=1.0 (from fiona>=1.8.21->geopandas)
  Downloadin

Collecting reverse_geocoder
  Downloading reverse_geocoder-1.5.1.tar.gz (2.2 MB)
     ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
      --------------------------------------- 0.0/2.2 MB 1.3 MB/s eta 0:00:02
     - -------------------------------------- 0.1/2.2 MB 871.5 kB/s eta 0:00:03
     -- ------------------------------------- 0.2/2.2 MB 1.0 MB/s eta 0:00:03
     ----- ---------------------------------- 0.3/2.2 MB 1.6 MB/s eta 0:00:02
     --------- ------------------------------ 0.5/2.2 MB 2.3 MB/s eta 0:00:01
     ------------ --------------------------- 0.7/2.2 MB 2.5 MB/s eta 0:00:01
     -------------- ------------------------- 0.8/2.2 MB 2.5 MB/s eta 0:00:01
     ----------------------- ---------------- 1.3/2.2 MB 3.7 MB/s eta 0:00:01
     ------------------------------------- -- 2.1/2.2 MB 5.0 MB/s eta 0:00:01
     ---------------------------------------- 2.2/2.2 MB 5.1 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (set

In [3]:
#Importing required packages
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import os
import re
import csv
import xarray as xr
import zarr
import fsspec
import cartopy.crs as ccrs
import glob as glob
import netCDF4 as nc
from netCDF4 import Dataset
from scipy.stats import skew,stats
import bottleneck
import gcsfs
import matplotlib.ticker as mticker
import warnings
warnings.filterwarnings("ignore") 

import geopandas as gpd
from shapely.geometry import Point
import reverse_geocoder as rg

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

## Loading data

In [4]:
# Directory containing the CSV files
directory = './'

# Create a dictionary to hold the dataframes. The keys will be years.
modis_data = {}
all_dataframes = []

# Iterate over all files in the directory
for filename in os.listdir(directory):
    # Use a regex to match the pattern "modis" followed by a year and ending with "United_States.csv"
    match = re.match(r'modis_(\d{4})_United_States.csv', filename)
    if match:
        # Extract the year from the matched filename
        year = match.group(1)
        # Load the CSV file into a dataframe
        df = pd.read_csv(os.path.join(directory, filename))
        # Store the dataframe in the dictionary with the year as the key
        modis_data[year] = df
        all_dataframes.append(df)
        
all_in_one_data = pd.concat(all_dataframes, ignore_index=True)

In [5]:
modis_2012 = modis_data['2012']
modis_2013 = modis_data['2013']
modis_2014 = modis_data['2014']
modis_2015 = modis_data['2015']
modis_2016 = modis_data['2016']
modis_2017 = modis_data['2017']
modis_2018 = modis_data['2018']
modis_2019 = modis_data['2019']
modis_2020 = modis_data['2020']
modis_2021 = modis_data['2021']
modis_2022 = modis_data['2022']

In [8]:
print(all_in_one_data.iloc[0])
print(type(all_in_one_data))
all_in_one_data.head()

latitude         33.0156
longitude       -97.0675
brightness         325.4
scan                 1.1
track                1.1
acq_date      2012-01-01
acq_time             426
satellite          Terra
instrument         MODIS
confidence           100
version              6.2
bright_t31         285.5
frp                 30.5
daynight               N
type                   0
Name: 0, dtype: object
<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,33.0156,-97.0675,325.4,1.1,1.1,2012-01-01,426,Terra,MODIS,100,6.2,285.5,30.5,N,0
1,36.3413,-96.521,313.3,1.0,1.0,2012-01-01,427,Terra,MODIS,87,6.2,279.3,15.9,N,0
2,36.3396,-96.5321,320.3,1.0,1.0,2012-01-01,427,Terra,MODIS,100,6.2,279.1,21.8,N,0
3,36.3306,-96.53,326.9,1.0,1.0,2012-01-01,427,Terra,MODIS,100,6.2,280.2,28.8,N,0
4,36.122,-96.0743,302.7,1.0,1.0,2012-01-01,427,Terra,MODIS,51,6.2,279.4,9.0,N,0


In [None]:
# Function to get state names using reverse_geocoder
def get_state(lat, lon):
    results = rg.search((lat, lon))
    # Return the 'admin1' field if using reverse_geocoder
    return results[0]['admin1']

# Apply the function to each row in the DataFrame
all_in_one_data['state'] = all_in_one_data.apply(lambda x: get_state(x['latitude'], x['longitude']), axis=1)

print(all_in_one_data[['latitude', 'longitude', 'state']])

Loading formatted geocoded file...


## Model

In [41]:
#Loading CMIP6 data stored on google cloud
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')

#subselect for surface temparature and the two experiments:
df_pr = df.query("activity_id=='HighResMIP' & (variable_id == 'pr' ) & (experiment_id == 'highresSST-present'|experiment_id == 'highresSST-future') & source_id=='MRI-AGCM3-2-S' & table_id == 'Amon'")
df_pr

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
241884,HighResMIP,MRI,MRI-AGCM3-2-S,highresSST-present,r1i1p1f1,Amon,pr,gn,gs://cmip6/CMIP6/HighResMIP/MRI/MRI-AGCM3-2-S/...,,20190711
255157,HighResMIP,MRI,MRI-AGCM3-2-S,highresSST-future,r1i1p1f1,Amon,pr,gn,gs://cmip6/CMIP6/HighResMIP/MRI/MRI-AGCM3-2-S/...,,20190820
