In [53]:
import pandas as pd  # provides interface for interacting with tabular data
import geopandas as gpd  # combines the capabilities of pandas and shapely for geospatial operations
from shapely.geometry import Point, Polygon, MultiPolygon  # for manipulating text data into geospatial shapes
from shapely import wkt  # stands for "well known text," allows for interchange across GIS programs
import rtree  # supports geospatial join
import os
import fnmatch
import numpy as np
import matplotlib.pyplot as plt
import descartes
import sys
import sklearn
from datetime import datetime as dt
sys.path.append('/Users/saraprice/Documents/NYU/Fall_2020/DS_GA_1001/final_project/wildfires-1001/code/functions/')
from gis_processing import *

In [3]:
data_dir = '/Users/saraprice/Documents/NYU/Fall_2020/DS_GA_1001/wildfires-1001/data'

## Read in fire target data

In [10]:
target_df = {}
full_target_data = gpd.GeoDataFrame()
for i in np.arange(1, 4):
    target_df[i] = pd.read_pickle(os.path.join(data_dir, f'clean_data/target_full_{i}.pkl')) 
    full_target_data = full_target_data.append(target_df[i])

In [27]:
full_target_data['COUNTYFP'] = full_target_data['COUNTYFP'].astype(int)
full_target_data['GRID_ID'] = full_target_data['GRID_ID'].astype(int)
full_target_data['YEAR'] = full_target_data['date'].apply(lambda x:x.year)

## Read in demography data

In [45]:
ca_demogs = pd.read_csv(os.path.join(data_dir, 'clean_data/CA_demogs/demogs_arson_master.csv'))
ca_demogs.head()

Unnamed: 0,NAME,COUNTYFP,GRID_ID,YEAR,MEDIAN_AGE_TOT,MEDIAN_AGE_MALE,MEDIAN_AGE_FEM,AGEUNDER13_TOT,AGE1424_TOT,AGE2544_TOT,...,POPDENSITY,POPDENSITY_MALE,POPDENSITY_FEM,Structure Arsons,Mobile Arsons,Other Arsons,Total Arsons,Total Arsons Cleared,Unemployment,medianHHI2018
0,Plumas,63,49,2008,49.6,49.2,50.1,0.230043,0.211169,0.312208,...,1.732209,0.866061,0.866148,0,0,0,0,0,0.176,55079
1,Plumas,63,49,2009,49.6,49.2,50.1,0.230043,0.211169,0.312208,...,1.732209,0.866061,0.866148,1,1,0,2,0,0.176,55079
2,Plumas,63,49,2010,49.8,49.3,50.2,0.228572,0.209697,0.309784,...,1.724157,0.861039,0.863117,1,0,0,1,0,0.176,55079
3,Plumas,63,49,2011,50.3,49.6,50.9,0.227446,0.205022,0.303463,...,1.707273,0.855671,0.851602,2,0,5,7,6,0.176,55079
4,Plumas,63,49,2012,50.9,50.1,51.5,0.222684,0.194892,0.298701,...,1.677663,0.838615,0.839048,0,0,0,0,0,0.161,55079


In [48]:
##Merge demog onto full_target_data
print(full_target_data.shape)
full_target_data1 = pd.merge(full_target_data, ca_demogs, on = ['NAME', 'COUNTYFP', 'GRID_ID', 'YEAR'], how = 'left')
print(full_target_data1.shape)
full_target_data1.head()

(1443887, 23)
(1443887, 42)


Unnamed: 0,date,month_id,month_start,month_end,week_id,week_start,week_end,GRID_ID,FIRE_AREA,FIRE_KEY,...,POPDENSITY,POPDENSITY_MALE,POPDENSITY_FEM,Structure Arsons,Mobile Arsons,Other Arsons,Total Arsons,Total Arsons Cleared,Unemployment,medianHHI2018
0,1990-01-01,1990_1,1990-01-01,1990-01-31,1990_1,1990-01-01,1990-01-07,0,,,...,4.996877,2.778235,2.218641,9,1,1,11,2,0.133,48518
1,1990-01-02,1990_1,1990-01-01,1990-01-31,1990_1,1990-01-01,1990-01-07,0,,,...,4.996877,2.778235,2.218641,9,1,1,11,2,0.133,48518
2,1990-01-03,1990_1,1990-01-01,1990-01-31,1990_1,1990-01-01,1990-01-07,0,,,...,4.996877,2.778235,2.218641,9,1,1,11,2,0.133,48518
3,1990-01-04,1990_1,1990-01-01,1990-01-31,1990_1,1990-01-01,1990-01-07,0,,,...,4.996877,2.778235,2.218641,9,1,1,11,2,0.133,48518
4,1990-01-05,1990_1,1990-01-01,1990-01-31,1990_1,1990-01-01,1990-01-07,0,,,...,4.996877,2.778235,2.218641,9,1,1,11,2,0.133,48518


## Infrastructure Data

In [50]:
infrastructure = gpd.read_file(os.path.join(data_dir, 'clean_data/grid_infrastructure/grid_infrastructure.dbf'))
infrastructure_sub = infrastructure[['GRID_ID', 'total_road', 'road_count']]

In [52]:
full_target_data2 = full_target_data1.merge(infrastructure_sub, on = ['GRID_ID'], how = 'left')
full_target_data2.shape
full_target_data2.head()

Unnamed: 0,date,month_id,month_start,month_end,week_id,week_start,week_end,GRID_ID,FIRE_AREA,FIRE_KEY,...,POPDENSITY_FEM,Structure Arsons,Mobile Arsons,Other Arsons,Total Arsons,Total Arsons Cleared,Unemployment,medianHHI2018,total_road,road_count
0,1990-01-01,1990_1,1990-01-01,1990-01-31,1990_1,1990-01-01,1990-01-07,0,,,...,2.218641,9,1,1,11,2,0.133,48518,456280.569816,79.0
1,1990-01-02,1990_1,1990-01-01,1990-01-31,1990_1,1990-01-01,1990-01-07,0,,,...,2.218641,9,1,1,11,2,0.133,48518,456280.569816,79.0
2,1990-01-03,1990_1,1990-01-01,1990-01-31,1990_1,1990-01-01,1990-01-07,0,,,...,2.218641,9,1,1,11,2,0.133,48518,456280.569816,79.0
3,1990-01-04,1990_1,1990-01-01,1990-01-31,1990_1,1990-01-01,1990-01-07,0,,,...,2.218641,9,1,1,11,2,0.133,48518,456280.569816,79.0
4,1990-01-05,1990_1,1990-01-01,1990-01-31,1990_1,1990-01-01,1990-01-07,0,,,...,2.218641,9,1,1,11,2,0.133,48518,456280.569816,79.0
