# About this notebook

This notebook integrates previously acquired gridMet data for San Diego county with historical fire perimeters. It creates two new features for the gridMet dataframe that indicate (1) whether a fire occurred at the location and date specified by the record and (2) the acres burned by the fire if so. 

In [1]:
%matplotlib inline
import pandas as pd
import geopandas as gpd
import numpy as np

In [2]:
%%time
df = pd.read_parquet('gridMet.parquet.gz').reset_index()
df = df[~(df.date.isna() | df.precipitation_amount_mm.isna())]
df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))
df.drop(['latitude', 'longitude'], axis=1, inplace=True)

Wall time: 59.8 s


In [3]:
%%time
# Expand the latitude and longitude from a point to a rectangle.
# The extent of the rectangle is 1/24th of a degree in both directions,
# which is the displacement between points of the original gridMet data.
from shapely.geometry import Polygon
dx, dy = 1/24, 1/24
def to_box(p):
    poly = Polygon([(p.x,      p.y),
                    (p.x + dx, p.y),
                    (p.x + dx, p.y + dy),
                    (p.x,      p.y + dy),
                   ])
    return poly
df = df.set_geometry(df.geometry.apply(to_box))
df.head()

Wall time: 10min 8s


Unnamed: 0,date,precipitation_amount_mm,relative_humidity_%,specific_humidity_kg/kg,surface_downwelling_shortwave_flux_in_air_W m-2,wind_from_direction_Degrees Clockwise from north,wind_speed_m/s,max_air_temperature_K,min_air_temperature_K,burning_index_g_Unitless,dead_fuel_moisture_100hr_Percent,dead_fuel_moisture_1000hr_Percent,energy_release_component-g_Unitless,potential_evapotranspiration_mm,mean_vapor_pressure_deficit_kPa,geometry
0,1999-01-01,0.0,40.3,0.00589,138.0,123.0,1.6,293.1,281.1,24.0,16.0,15.5,34.0,1.7,0.74,"POLYGON ((-117.97500 33.56667, -117.93333 33.5..."
1,1999-01-01,0.0,39.8,0.0059,137.2,123.0,1.6,293.1,281.2,24.0,16.0,15.5,35.0,1.7,0.74,"POLYGON ((-117.93333 33.56667, -117.89167 33.5..."
2,1999-01-01,0.0,38.2,0.0058,137.2,123.0,1.7,293.2,281.2,26.0,15.2,14.9,37.0,1.7,0.77,"POLYGON ((-117.89167 33.56667, -117.85000 33.5..."
3,1999-01-01,0.0,36.4,0.00567,137.3,49.0,1.8,293.3,280.3,27.0,15.0,14.8,38.0,1.8,0.76,"POLYGON ((-117.85000 33.56667, -117.80833 33.5..."
4,1999-01-01,0.0,33.8,0.00538,137.3,49.0,1.9,293.2,279.9,29.0,13.3,13.6,43.0,1.9,0.8,"POLYGON ((-117.80833 33.56667, -117.76667 33.5..."


In [4]:
df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 6826300 entries, 0 to 10676520
Data columns (total 16 columns):
 #   Column                                            Dtype         
---  ------                                            -----         
 0   date                                              datetime64[ns]
 1   precipitation_amount_mm                           float64       
 2   relative_humidity_%                               float64       
 3   specific_humidity_kg/kg                           float64       
 4   surface_downwelling_shortwave_flux_in_air_W m-2   float64       
 5   wind_from_direction_Degrees Clockwise from north  float64       
 6   wind_speed_m/s                                    float64       
 7   max_air_temperature_K                             float64       
 8   min_air_temperature_K                             float64       
 9   burning_index_g_Unitless                          float64       
 10  dead_fuel_moisture_100hr_Percent 

In [5]:
df.describe()

Unnamed: 0,precipitation_amount_mm,relative_humidity_%,specific_humidity_kg/kg,surface_downwelling_shortwave_flux_in_air_W m-2,wind_from_direction_Degrees Clockwise from north,wind_speed_m/s,max_air_temperature_K,min_air_temperature_K,burning_index_g_Unitless,dead_fuel_moisture_100hr_Percent,dead_fuel_moisture_1000hr_Percent,energy_release_component-g_Unitless,potential_evapotranspiration_mm,mean_vapor_pressure_deficit_kPa
count,6826300.0,6826300.0,6826300.0,6826300.0,6826300.0,6826300.0,6826300.0,6826300.0,6826300.0,6826300.0,6826300.0,6826300.0,6826300.0,6826300.0
mean,0.8304122,30.00426,0.00650333,238.5361,226.6884,2.980093,297.6084,283.4156,37.14322,12.01792,12.75536,50.26444,4.325072,1.354994
std,4.286879,17.69667,0.002908315,79.89067,77.85217,1.255808,7.771557,6.103147,19.10992,4.536604,4.035236,20.9661,1.928999,0.9730105
min,0.0,0.1,0.00024,4.5,0.0,0.4,268.6,257.5,0.0,1.9,3.4,0.0,0.2,0.0
25%,0.0,15.4,0.00431,164.9,197.0,2.2,292.1,279.1,27.0,8.1,9.6,34.0,2.7,0.63
50%,0.0,26.7,0.0062,245.6,241.0,2.7,297.0,283.3,37.0,12.1,12.9,47.0,4.2,1.09
75%,0.0,42.6,0.00843,312.9,277.0,3.5,303.1,287.9,50.0,15.4,15.6,66.0,5.7,1.83
max,287.1,100.0,0.02392,377.7,360.0,16.2,322.7,307.4,151.0,30.6,30.8,110.0,13.2,7.17


In [6]:
df.head()

Unnamed: 0,date,precipitation_amount_mm,relative_humidity_%,specific_humidity_kg/kg,surface_downwelling_shortwave_flux_in_air_W m-2,wind_from_direction_Degrees Clockwise from north,wind_speed_m/s,max_air_temperature_K,min_air_temperature_K,burning_index_g_Unitless,dead_fuel_moisture_100hr_Percent,dead_fuel_moisture_1000hr_Percent,energy_release_component-g_Unitless,potential_evapotranspiration_mm,mean_vapor_pressure_deficit_kPa,geometry
0,1999-01-01,0.0,40.3,0.00589,138.0,123.0,1.6,293.1,281.1,24.0,16.0,15.5,34.0,1.7,0.74,"POLYGON ((-117.97500 33.56667, -117.93333 33.5..."
1,1999-01-01,0.0,39.8,0.0059,137.2,123.0,1.6,293.1,281.2,24.0,16.0,15.5,35.0,1.7,0.74,"POLYGON ((-117.93333 33.56667, -117.89167 33.5..."
2,1999-01-01,0.0,38.2,0.0058,137.2,123.0,1.7,293.2,281.2,26.0,15.2,14.9,37.0,1.7,0.77,"POLYGON ((-117.89167 33.56667, -117.85000 33.5..."
3,1999-01-01,0.0,36.4,0.00567,137.3,49.0,1.8,293.3,280.3,27.0,15.0,14.8,38.0,1.8,0.76,"POLYGON ((-117.85000 33.56667, -117.80833 33.5..."
4,1999-01-01,0.0,33.8,0.00538,137.3,49.0,1.9,293.2,279.9,29.0,13.3,13.6,43.0,1.9,0.8,"POLYGON ((-117.80833 33.56667, -117.76667 33.5..."


In [7]:
perimeters = gpd.read_file('../../data/geoMAC/sd_fire_final_dedupe.shp')
perimeters['date'] = pd.to_datetime(perimeters.final_date, format="%Y-%m-%d")
perimeters.drop(perimeters.columns.difference(['date', 'geometry', 'acres', 'fire_name']), axis=1, inplace=True)
perimeters.crs = {'init' :'epsg:3857'}
perimeters = perimeters.to_crs(epsg=4326)
print(perimeters.shape)
perimeters.head()

  return _prepare_from_string(" ".join(pjargs))


(557, 4)


Unnamed: 0,fire_name,acres,geometry,date
0,67,43.394566,"POLYGON ((-116.95196 32.93964, -116.95149 32.9...",2006-08-11
1,67,43.691695,"POLYGON ((-116.95196 32.93964, -116.95149 32.9...",2006-08-11
2,76-2,11.91,"POLYGON ((-117.22833 33.28174, -117.22824 33.2...",2010-06-23
3,76-2,11.912337,"POLYGON ((-117.22967 33.27916, -117.22970 33.2...",2010-06-23
4,78 #4,99.97422,"POLYGON ((-116.93929 33.08838, -116.93909 33.0...",2007-07-09


In [8]:
%%time
fire_occurred = gpd.sjoin(df[['date', 'geometry']], perimeters)
fire_occurred = fire_occurred[fire_occurred.date_left == fire_occurred.date_right]
fire_occurred

  "(%s != %s)" % (left_df.crs, right_df.crs)


Wall time: 31min 38s


Unnamed: 0,date_left,geometry,index_right,fire_name,acres,date_right
2598868,2004-02-11,"POLYGON ((-117.80833 33.56667, -117.76667 33.5...",220,EL MORO,4.767303,2004-02-11
1770628,2002-06-26,"POLYGON ((-117.80833 33.56667, -117.76667 33.5...",289,LAGUNA,83.299934,2002-06-26
1770629,2002-06-26,"POLYGON ((-117.76667 33.56667, -117.72500 33.5...",289,LAGUNA,83.299934,2002-06-26
1770676,2002-06-26,"POLYGON ((-117.80833 33.52500, -117.76667 33.5...",289,LAGUNA,83.299934,2002-06-26
1770677,2002-06-26,"POLYGON ((-117.76667 33.52500, -117.72500 33.5...",289,LAGUNA,83.299934,2002-06-26
...,...,...,...,...,...,...
4031066,2006-12-05,"POLYGON ((-116.89167 32.52500, -116.85000 32.5...",70,BORDER 30,63.240620,2006-12-05
4031066,2006-12-05,"POLYGON ((-116.89167 32.52500, -116.85000 32.5...",71,BORDER 30,63.665359,2006-12-05
6801146,2012-05-17,"POLYGON ((-116.89167 32.52500, -116.85000 32.5...",73,BORDER 6,11.372368,2012-05-17
6830378,2012-06-07,"POLYGON ((-116.89167 32.52500, -116.85000 32.5...",133,Border 6,11.448854,2012-06-07


In [9]:
fire_occurred.fire_name.value_counts()

Witch         557
Harris        209
Poomacha      182
CEDAR          96
Border 3       88
             ... 
EL MORO         1
HARBISON        1
BORDER #34      1
BONITA          1
Border 12       1
Name: fire_name, Length: 315, dtype: int64

In [10]:
df['fire_occurred'] = False
df.loc[fire_occurred.index, 'fire_occurred'] = True
df.loc[fire_occurred.index, 'acres_burned'] = fire_occurred.acres
df.loc[fire_occurred.index, 'fire_name'] = fire_occurred.fire_name
df.head()

Unnamed: 0,date,precipitation_amount_mm,relative_humidity_%,specific_humidity_kg/kg,surface_downwelling_shortwave_flux_in_air_W m-2,wind_from_direction_Degrees Clockwise from north,wind_speed_m/s,max_air_temperature_K,min_air_temperature_K,burning_index_g_Unitless,dead_fuel_moisture_100hr_Percent,dead_fuel_moisture_1000hr_Percent,energy_release_component-g_Unitless,potential_evapotranspiration_mm,mean_vapor_pressure_deficit_kPa,geometry,fire_occurred,acres_burned,fire_name
0,1999-01-01,0.0,40.3,0.00589,138.0,123.0,1.6,293.1,281.1,24.0,16.0,15.5,34.0,1.7,0.74,"POLYGON ((-117.97500 33.56667, -117.93333 33.5...",False,,
1,1999-01-01,0.0,39.8,0.0059,137.2,123.0,1.6,293.1,281.2,24.0,16.0,15.5,35.0,1.7,0.74,"POLYGON ((-117.93333 33.56667, -117.89167 33.5...",False,,
2,1999-01-01,0.0,38.2,0.0058,137.2,123.0,1.7,293.2,281.2,26.0,15.2,14.9,37.0,1.7,0.77,"POLYGON ((-117.89167 33.56667, -117.85000 33.5...",False,,
3,1999-01-01,0.0,36.4,0.00567,137.3,49.0,1.8,293.3,280.3,27.0,15.0,14.8,38.0,1.8,0.76,"POLYGON ((-117.85000 33.56667, -117.80833 33.5...",False,,
4,1999-01-01,0.0,33.8,0.00538,137.3,49.0,1.9,293.2,279.9,29.0,13.3,13.6,43.0,1.9,0.8,"POLYGON ((-117.80833 33.56667, -117.76667 33.5...",False,,


In [11]:
df.head()

Unnamed: 0,date,precipitation_amount_mm,relative_humidity_%,specific_humidity_kg/kg,surface_downwelling_shortwave_flux_in_air_W m-2,wind_from_direction_Degrees Clockwise from north,wind_speed_m/s,max_air_temperature_K,min_air_temperature_K,burning_index_g_Unitless,dead_fuel_moisture_100hr_Percent,dead_fuel_moisture_1000hr_Percent,energy_release_component-g_Unitless,potential_evapotranspiration_mm,mean_vapor_pressure_deficit_kPa,geometry,fire_occurred,acres_burned,fire_name
0,1999-01-01,0.0,40.3,0.00589,138.0,123.0,1.6,293.1,281.1,24.0,16.0,15.5,34.0,1.7,0.74,"POLYGON ((-117.97500 33.56667, -117.93333 33.5...",False,,
1,1999-01-01,0.0,39.8,0.0059,137.2,123.0,1.6,293.1,281.2,24.0,16.0,15.5,35.0,1.7,0.74,"POLYGON ((-117.93333 33.56667, -117.89167 33.5...",False,,
2,1999-01-01,0.0,38.2,0.0058,137.2,123.0,1.7,293.2,281.2,26.0,15.2,14.9,37.0,1.7,0.77,"POLYGON ((-117.89167 33.56667, -117.85000 33.5...",False,,
3,1999-01-01,0.0,36.4,0.00567,137.3,49.0,1.8,293.3,280.3,27.0,15.0,14.8,38.0,1.8,0.76,"POLYGON ((-117.85000 33.56667, -117.80833 33.5...",False,,
4,1999-01-01,0.0,33.8,0.00538,137.3,49.0,1.9,293.2,279.9,29.0,13.3,13.6,43.0,1.9,0.8,"POLYGON ((-117.80833 33.56667, -117.76667 33.5...",False,,


In [12]:
df.fire_occurred.value_counts()

False    6824133
True        2167
Name: fire_occurred, dtype: int64

In [13]:
tmp = df.geometry.apply(lambda poly: poly.exterior.coords[0])
df['longitude'] = tmp.apply(lambda p: p[0])
df['latitude'] = tmp.apply(lambda p: p[1])
df.drop('geometry', axis=1).head()

Unnamed: 0,date,precipitation_amount_mm,relative_humidity_%,specific_humidity_kg/kg,surface_downwelling_shortwave_flux_in_air_W m-2,wind_from_direction_Degrees Clockwise from north,wind_speed_m/s,max_air_temperature_K,min_air_temperature_K,burning_index_g_Unitless,dead_fuel_moisture_100hr_Percent,dead_fuel_moisture_1000hr_Percent,energy_release_component-g_Unitless,potential_evapotranspiration_mm,mean_vapor_pressure_deficit_kPa,fire_occurred,acres_burned,fire_name,longitude,latitude
0,1999-01-01,0.0,40.3,0.00589,138.0,123.0,1.6,293.1,281.1,24.0,16.0,15.5,34.0,1.7,0.74,False,,,-117.975,33.566667
1,1999-01-01,0.0,39.8,0.0059,137.2,123.0,1.6,293.1,281.2,24.0,16.0,15.5,35.0,1.7,0.74,False,,,-117.933333,33.566667
2,1999-01-01,0.0,38.2,0.0058,137.2,123.0,1.7,293.2,281.2,26.0,15.2,14.9,37.0,1.7,0.77,False,,,-117.891667,33.566667
3,1999-01-01,0.0,36.4,0.00567,137.3,49.0,1.8,293.3,280.3,27.0,15.0,14.8,38.0,1.8,0.76,False,,,-117.85,33.566667
4,1999-01-01,0.0,33.8,0.00538,137.3,49.0,1.9,293.2,279.9,29.0,13.3,13.6,43.0,1.9,0.8,False,,,-117.808333,33.566667


In [14]:
df.drop('geometry', axis=1).to_parquet("integratedData.parquet.gz", compression="gzip")

# Uploading to s3

In [15]:
s3_url = "s3://dse-cohort5-group5/wildfire_capstone/integratedData.parquet.gz"
df.drop('geometry', axis=1).to_parquet(s3_url, compression="gzip")

In [16]:
import boto3
s3_url = 'dse-cohort5-group5'
s3 = boto3.client("s3")
all_objects = s3.list_objects(Bucket=s3_url)
all_objects

{'ResponseMetadata': {'RequestId': 'EF90F9405B1915FC',
  'HostId': '5jGryo7JHg02wFuhikJWpgPRmT4SOV1JAsKKeV/UtglZaEVZN0AYrKBFk9wqyfH6y7PC7Nw9SVo=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': '5jGryo7JHg02wFuhikJWpgPRmT4SOV1JAsKKeV/UtglZaEVZN0AYrKBFk9wqyfH6y7PC7Nw9SVo=',
   'x-amz-request-id': 'EF90F9405B1915FC',
   'date': 'Sun, 10 May 2020 01:05:47 GMT',
   'x-amz-bucket-region': 'us-west-1',
   'content-type': 'application/xml',
   'transfer-encoding': 'chunked',
   'server': 'AmazonS3'},
  'RetryAttempts': 1},
 'IsTruncated': True,
 'Marker': '',
 'Contents': [{'Key': 'test/',
   'LastModified': datetime.datetime(2020, 2, 14, 2, 26, 50, tzinfo=tzutc()),
   'ETag': '"d41d8cd98f00b204e9800998ecf8427e"',
   'Size': 0,
   'StorageClass': 'STANDARD',
   'Owner': {'DisplayName': 'kcoakley+cohort5group5',
    'ID': 'fe3a1755292b139ad5397ba537aa0aff32ef6ebba07fe45dcbe486d81e1ccc11'}},
  {'Key': 'wildfire_capstone/GeomacHashes.parquet',
   'LastModified': datetime.datetime(2020,