In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import geopandas as gpd
from matplotlib import pyplot as plt
import contextily as ctx
import datetime

In [None]:
perimeters = gpd.read_file('./hist/hist.shp')
print(perimeters.shape)
perimeters = perimeters[~perimeters.year.isna()]
perimeters.year = perimeters.year.astype(int)
print(perimeters.shape)

In [None]:
perimeters.head()

In [None]:
# Do we have unique fires?
len(perimeters.id.unique())==perimeters.shape[0]

In [None]:
from shapely.geometry import Point
MIN_LAT, MAX_LAT = 32.4, 33.6
MIN_LONG, MAX_LONG = -118, -116
sd_perimeters = perimeters.cx[MIN_LONG:MAX_LONG, MIN_LAT:MAX_LAT]
print(sd_perimeters.shape)
sd_perimeters.crs = {'init' :'epsg:4326'}
sd_perimeters = sd_perimeters.to_crs(epsg=3857)
sd_perimeters.head()

In [None]:
sd_perimeters = sd_perimeters[sd_perimeters['year']>2000].reset_index(drop=True)
print(sd_perimeters.shape)
sd_perimeters.head()

In [None]:
sd_perimeters.year.value_counts().sort_index().plot.bar()
plt.title("SD Fires Over Time");

In [None]:
# Percent missingness
(sd_perimeters.isna().sum()/sd_perimeters.shape[0])*100

In [None]:
# Fair amount of missing data, year and acres are reliable:
sd_perimeters.acres.plot.density()

In [None]:
sd_perimeters['final_date'] = sd_perimeters['alarm_date'].combine_first(sd_perimeters['perimeter_'])
sd_perimeters['final_date'] = pd.to_datetime(sd_perimeters['final_date'])
sd_perimeters['cont_date'] = pd.to_datetime(sd_perimeters['cont_date'])

In [None]:
sd_perimeters.final_date.map(lambda x: x.month).value_counts().sort_index().plot.bar()

In [None]:
sd_perimeters.cause.value_counts().plot.bar()
# Looks like the causes are by a cause key, which I am not sure we have

In [None]:
sd_perimeters['burn_days'] = (sd_perimeters['cont_date'] - sd_perimeters['final_date']).dt.days

In [None]:
plt.scatter(sd_perimeters.burn_days, sd_perimeters.acres,s=20)
plt.xlabel('Days Burned')
plt.ylabel('Acres Burned');

In [None]:
sd_perimeters[(sd_perimeters['fire_name']=='Vallecito Lightning Complex')& (sd_perimeters['acres']== 519.4)]

In [None]:
sd_perimeters.drop(['id'], axis=1,inplace=True)
sd_perimeters.drop_duplicates(inplace=True)

In [None]:
sd_perimeters = sd_perimeters[sd_perimeters['acres']>1000]
sd_perimeters.fire_name.value_counts()

In [None]:
fire_src = sd_perimeters[['fire_name','acres','final_date']].sort_values(by=['fire_name', 'acres','final_date'])

In [None]:
fire_lookup = fire_src.groupby((['fire_name','acres']),as_index=False).final_date.min()

In [None]:
fire_final = pd.merge(fire_lookup,sd_perimeters, how='inner', left_on=['fire_name','acres','final_date'], right_on = ['fire_name','acres','final_date'])

In [None]:
# Convert back to geoPandas
fire_finalg = gpd.GeoDataFrame(fire_final)
fire_finalg.drop(['alarm_date','cont_date','perimeter_','year'],axis=1,inplace=True)
fire_finalg['final_date'] = fire_finalg['final_date'].astype(str)

In [None]:
fire_finalg.to_file("sd_fire_final_dedupe.shp")