In [66]:
import sqlite3
import pandas as pd
import numpy as np
import pickle
import geopandas
from shapely.geometry import Polygon

In [101]:
cnx = sqlite3.connect('../us_wildfire_dataset/FPA_FOD_20170508.sqlite')
df = pd.read_sql_query("SELECT DISCOVERY_DATE, CONT_DATE, LATITUDE, LONGITUDE, STATE, FIRE_NAME, FIRE_SIZE_CLASS, FIRE_SIZE,STAT_CAUSE_DESCR FROM fires", cnx)

In [124]:
def filter_raw_data(df, state='CA', min_class=None):

    # drop states
    df_filt = df[df.STATE == 'CA']
    df_filt = df_filt.drop(['STATE'], axis=1)

    # drop fire classes
    if min_class is not None:
        df_filt.FIRE_SIZE_CLASS = df_filt.FIRE_SIZE_CLASS.apply(ord)
        df_filt = df_filt[df_filt.FIRE_SIZE_CLASS >= ord(min_class)]

    # reformat dates
    df_filt.DISCOVERY_DATE = pd.to_datetime(df['DISCOVERY_DATE'], unit='D', origin='julian')
    df_filt.CONT_DATE = pd.to_datetime(df['CONT_DATE'], unit='D', origin='julian')

    # convert coordinates
    df_filt = geopandas.GeoDataFrame(df_filt, geometry=geopandas.points_from_xy(
        np.abs(df_filt.LONGITUDE), np.abs(df_filt.LATITUDE)))
    df_filt = df_filt.drop(['LONGITUDE'], axis=1)
    df_filt = df_filt.drop(['LATITUDE'], axis=1)
    df_filt.insert(2, 'COORD', df_filt.pop('geometry'))

    # remove missing values
    df_filt = df_filt.dropna()

    # reformat head
    df_filt.columns = [
        'start_date', 'end_date', 'geometry',
        'name', 'size_class', 'size', 'cause'
    ]

    # sort by start dates
    df_filt = df_filt.sort_values(by='start_date')

    return df_filt

In [138]:
def extract_geo_fires(df, area):

    # return fires within polygon
    return df[df.within(area)]

def build_geo_grid(df, grid_area, square_size):
    bounds = grid_area.bounds

    # calculate number of grids in lat/long directions
    long_steps = int((bounds[2] - bounds[0]) / square_size)
    lat_steps = int((bounds[3] - bounds[1]) / square_size)

    grid_df = []
    for i in range(long_steps):
        for j in range(lat_steps):
            
            # get south-east grid square corner
            c_lon = bounds[0] + i * square_size
            c_lat = bounds[1] + j * square_size

            # create grid square
            grid = Polygon([
                (c_lon, c_lat), 
                (c_lon + square_size, c_lat), 
                (c_lon, c_lat + square_size), 
                (c_lon + square_size, c_lat + square_size)
            ])

            fires = extract_geo_fires(df, grid)
            grid_df.append([grid, fires.index])

    # build grid df
    grid_df = geopandas.GeoDataFrame(grid_df)
    grid_df.columns = ['grid', 'fire_indices']
    return grid_df

# coordinate-square north of San Bernardino/Riverside 
p = Polygon([(118, 34), (118, 36), (116, 34), (116, 36)])

df_filt = filter_raw_data(df)
grid_df = build_geo_grid(df_filt, p, 0.1)