In [1]:
import sqlite3
import pandas as pd
import numpy as np
import pickle
import geopandas
from shapely.geometry import Polygon
import uuid

In [2]:
cnx = sqlite3.connect('../us_wildfire_dataset/FPA_FOD_20170508.sqlite')
df = pd.read_sql_query("SELECT DISCOVERY_DATE, CONT_DATE, LATITUDE, LONGITUDE, STATE, FIRE_NAME, FIRE_SIZE_CLASS, FIRE_SIZE,STAT_CAUSE_DESCR FROM fires", cnx)

In [3]:
def filter_raw_data(df, state='CA', min_class=None):

    # drop states
    df_filt = df[df.STATE == 'CA']
    df_filt = df_filt.drop(['STATE'], axis=1)

    # drop fire classes
    if min_class is not None:
        df_filt.FIRE_SIZE_CLASS = df_filt.FIRE_SIZE_CLASS.apply(ord)
        df_filt = df_filt[df_filt.FIRE_SIZE_CLASS >= ord(min_class)]

    # reformat dates
    df_filt.DISCOVERY_DATE = pd.to_datetime(df['DISCOVERY_DATE'], unit='D', origin='julian')
    df_filt.CONT_DATE = pd.to_datetime(df['CONT_DATE'], unit='D', origin='julian')

    # convert coordinates
    df_filt = geopandas.GeoDataFrame(df_filt, geometry=geopandas.points_from_xy(
        df_filt.LONGITUDE, df_filt.LATITUDE))
    df_filt = df_filt.drop(['LONGITUDE'], axis=1)
    df_filt = df_filt.drop(['LATITUDE'], axis=1)
    df_filt.insert(2, 'COORD', df_filt.pop('geometry'))

    # remove missing values
    df_filt = df_filt.dropna()

    # reformat head
    df_filt.columns = [
        'start_date', 'end_date', 'geometry',
        'name', 'size_class', 'size', 'cause'
    ]

    # sort by start dates
    df_filt = df_filt.sort_values(by='start_date')
    df_filt = df_filt.reset_index()

    return df_filt

In [4]:
def extract_geo_fires(df, area):

    # return fires within polygon
    res = df[df.within(area)]
    return res


def build_geo_grid(df, grid_area, square_size, verbose=False):
    bounds = grid_area.bounds

    # calculate number of grids in lat/long directions
    long_steps = int((bounds[2] - bounds[0]) / square_size)
    lat_steps = int((bounds[3] - bounds[1]) / square_size)

    grid_df = []
    prog, total = 0, long_steps * lat_steps
    for i in range(long_steps):
        for j in range(lat_steps):

            # update progress
            prog += 1
            if verbose and prog % 10 == 0:
                print('Progress: {}/{}'.format(prog, total), flush=True)
            
            # get south-east grid square corner
            c_lon = bounds[0] + i * square_size
            c_lat = bounds[1] + j * square_size

            # create grid square
            grid = Polygon([
                (c_lon, c_lat), 
                (c_lon + square_size, c_lat), 
                (c_lon, c_lat + square_size), 
                (c_lon + square_size, c_lat + square_size)
            ])

            fires = extract_geo_fires(df, grid)
            grid_df.append([uuid.uuid4(), grid, fires.index])

    # build grid df
    grid_df = geopandas.GeoDataFrame(grid_df)
    grid_df.columns = ['grid_id', 'grid_square', 'fire_indices']
    return grid_df

In [5]:
# coordinate-square north of San Bernardino/Riverside 
p = Polygon([(-118, 34), (-118, 36), (-116, 34), (-116, 36)])

# filter raw data from dataset and build sexy format
df_filt = filter_raw_data(df)

# df_filt is ordered by start_date with the following columns:
#   'start_date', 'end_date', 'geometry', 'name', 'size_class', 'size', 'cause'

# build df with each grid square area (from grid area `p` with grid square size 0.1) and corresponding fires indices (which indexes complete list of fires in df_filt)
grid_df = build_geo_grid(df_filt, p, 0.5, verbose=True)

# grid_df has the following columns:
#    'grid_id', 'grid_square', 'fire_indices'
#    grid_id: a random uuid to identify the grid square for later
#    grid_square: polygon object of the grid square
#    fire_indices: list of indices of the fires from df_filt within the grid_square

Progress: 10/16


In [6]:
def label_grid_square(df, grid_df, start_date='1992-01', end_date='2015-12', verbose=False):
    
    # build date range (in months)
    date_range = [str(d) for d in np.arange(
        start_date, 
        end_date, 
        dtype='datetime64[M]'
    )]    

    # iterate over all grid squares 
    label_df = []
    prog, total = 0, grid_df.shape[0]
    for i in range(grid_df.shape[0]):
        
        # update progress
        prog += 1
        if verbose and prog % 10 == 0:
            print('Progress: {}/{}'.format(prog, total), flush=True)
                
        # get grid square fires
        id = grid_df.loc[i, 'grid_id']
        grid_square = grid_df.loc[i, 'grid_square']
        fire_indices = list(grid_df.loc[i, 'fire_indices'])
        fires = df.loc[fire_indices, :]
        
        # collect all months in fire date range
        months = []
        for _, row in fires.iterrows(): 
            start = row.start_date.date()
            end = (row.end_date + pd.DateOffset(months=1)).date()
            months.extend([str(d) for d in np.arange(start, end, dtype='datetime64[M]')])
        fire_months = list(set(months))
        
        # label fire months
        month_labels = []
        for month in date_range:
            if month in fire_months: month_labels.append(1)
            else: month_labels.append(0)
        
        # add labels
        labels = [id, grid_square] + month_labels
        label_df.append(labels)
        
    # build label df
    label_df = pd.DataFrame(label_df)
    label_df.columns = ['grid_id', 'grid_square'] + date_range
    return label_df

In [7]:
    
# labels whether there was a fire in each grid square 
label_df = label_grid_square(df_filt, grid_df, verbose=True)

# label_df has the following columns:
#    'grid_id', 'grid_square', 'months ...'
#    grid_id: a random uuid to identify the grid square for later
#    grid_square: polygon object of the grid square
#    indices: the rest of the columns are a label for each month (0 if there was no fire in the grid square during them onth and 1 otherwise)

Progress: 10/16


In [8]:
print(label_df)

grid_id  \
0   8f6e2515-fbb8-4e85-bc53-252efc4c17ef   
1   caea15cb-4bae-434a-a050-ca65c11601b0   
2   6bd7d797-df2a-48b4-b85d-260bea0916f6   
3   ad5b6c7f-a7e4-4a2d-8279-244262f3afe8   
4   1ae13da5-6bb8-4ec2-a2e6-68cbd5c86667   
5   2f6a6c10-4561-4769-9ca2-fd7e422ad953   
6   04289e2a-6490-490c-af8b-c74ddbae98c7   
7   80a4514f-18bf-494d-95f9-aa6e023a155d   
8   2f253ae3-3b69-4114-b0dd-a1bca7a16eab   
9   bf5330c2-0167-42e7-8489-340a6bc3d400   
10  c2dc7892-4775-4eb3-b178-9e2aefda4232   
11  0b6962c9-c171-45e5-996c-be610e814ed2   
12  19cef0f6-2617-4c05-adac-8508c36eb5bb   
13  b9e916df-ecad-441d-a7e5-7ecf0d454105   
14  dd9d37d2-f8ef-4460-bc1a-55313738cae0   
15  4660b52a-13e3-4682-bd0f-7d60e74868b0   

                                          grid_square  1992-01  1992-02  \
0   POLYGON ((-118 34, -117.5 34, -118 34.5, -117....        0        0   
1   POLYGON ((-118 34.5, -117.5 34.5, -118 35, -11...        0        0   
2   POLYGON ((-118 35, -117.5 35, -118 35.5, -117....      

In [21]:
output = []

for index, row in label_df.iterrows():
    for rowIndex, col in enumerate(row[2:-1]):
        # if not on fire
        if col == 0:
            # include implicit conversion to tuple of bounds instead of polygon shape; will make working with earth engine easier
            output.append((row[0], row[1].bounds, label_df.columns[rowIndex + 2], row[rowIndex + 1 + 2]))

In [23]:
# save the corresponding data structure to disk
import pickle
with open("../us_wildfire_dataset/labelled_temporal_polygons.pkl", "wb") as f:
    pickle.dump(output, f, protocol=pickle.HIGHEST_PROTOCOL)

In [13]:
import ee

ee.Initialize()

In [14]:
for item in output:
    # we want to grab the polygon, and download the associated image files
    

116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -116.5 35.5, -116 35.5, -116.5 35))
POLYGON ((-116.5 35, -116 35, -1