# Wildfire risk by US county

## Introduction

### Obtain all zipcodes for the state

In [1]:
state_name = 'CA'
filename = 'data/zipcodes_'+state_name+'.csv'
import pandas as pd
counties = pd.read_csv(filename, names=['zipcode','city','county'])
print("%d zipcodes found..." % len(counties))
with pd.option_context('display.max_rows',10) : 
    print(counties)

2589 zipcodes found...
      zipcode              city       county
0       90001       Los Angeles  Los Angeles
1       90002       Los Angeles  Los Angeles
2       90003       Los Angeles  Los Angeles
3       90004       Los Angeles  Los Angeles
4       90005       Los Angeles  Los Angeles
...       ...               ...          ...
2584    96157  South Lake Tahoe    El Dorado
2585    96158  South Lake Tahoe    El Dorado
2586    96160           Truckee       Nevada
2587    96161           Truckee       Nevada
2588    96162           Truckee       Nevada

[2589 rows x 3 columns]


### Load cencus data 

In [2]:
from uszipcode import SearchEngine

search = SearchEngine(simple_zipcode=True)
countydata = {}
skipped_zip=0
for zipcode in list(counties.index) : 
    data = search.by_zipcode(str(zipcode)).to_dict()
    try : 
        countydata[zipcode] = {
            "City":data["major_city"]+" "+data["state"],
            "Latitude":data["lat"],
            "Longitude":data["lng"],
            "Population":data["population"],
            "Density":int(data["population_density"]),
            "Occupancy":int(data["occupied_housing_units"] / data["housing_units"] * 100)/100,
            "Value":int(data["median_home_value"]/100)/10,
            "Income":int(data["median_household_income"]/100)/10
        }
    except:
        skipped_zip = skipped_zip+1
#         print("WARNING: error processing %s rbs, skipping" % (zipcode))
print("WARNING: missing data in %d out of %d zipcodes" % (skipped_zip,len(counties)))



### Load FEMA data

The data is retrieved from https://www.fema.gov/openfema-dataset-disaster-declarations-summaries-v2

In [55]:
# fema_filename = 'data/DisasterDeclarationsSummaries.csv'
# fire_data = pd.read_csv(fema_filename, index_col=[1],names=["femaDeclarationString",
#                                                             "disasterNumber",
#                                                             "state",
#                                                             "declarationType",
#                                                             "declarationDate",
#                                                             "fyDeclared",
#                                                             "incidentType",
#                                                             "declarationTitle",
#                                                             "ihProgramDeclared",
#                                                             "iaProgramDeclared",
#                                                             "paProgramDeclared",
#                                                             "hmProgramDeclared",
#                                                             "incidentBeginDate",
#                                                             "incidentEndDate",
#                                                             "disasterCloseoutDate",
#                                                             "fipsStateCode",
#                                                             "fipsCountyCode",
#                                                             "placeCode",
#                                                             "designatedArea",
#                                                             "declarationRequestNumber",
#                                                             "hash","lastRefresh"])
# print('Types of incidents filed in the data set...')
# print(set(fire_data['incidentType']))
# for item in list(fire_data.index) : 
#     print(item)
# with pd.option_context('display.max_rows',10) : 
#     print(fire_data)

*Assuming only fire incidents are relevent to this study

### Getting California fire incident data

Scraping the data off of www.fire.ca.gov/incidents

In [4]:
import requests 
from bs4 import BeautifulSoup
import re
table_content = []
final_list = []
list_of_avail_years = range(2013,2020,1)
for yr in list_of_avail_years : 
    url = 'https://www.fire.ca.gov/incidents/'+str(yr)+'/'
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')
    html_content = {}
    for i in range(len(soup.find_all('div'))) : 
        try : 
            if soup.find_all('div')[i].attrs['class'] == ['section', 'incidents-esri-webmap']: 
                html_content = soup.find_all('div')[i].text
        except : 
            continue
    
    table_content=eval(html_content.partition('create_map([')[2].split("\n")[0][:-3])
    for t in table_content : 
        final_list.append(t)


Constructing the dataframe based on scraped data

In [101]:
fires_df = pd.DataFrame(final_list)
# fires_df.set_index('CountiesList',inplace=True)
print(fires_df)

      IncidentId                              UniqueId            Name  \
0            653  7a4a3552-3bea-4428-984c-e2edef13980f      Becks Fire   
1            654  094719ba-a47b-4abb-9ec5-a506b2b9fd23      River Fire   
2            655  99cb21da-9639-4300-9a97-1cadc993189f     Jurupa Fire   
3            656  3513f584-632d-4fd0-a98a-646eb0753248      Butte Fire   
4            657  58f89ff8-bd3e-4355-b1c0-8fa05c747d3f   Fawnskin Fire   
...          ...                                   ...             ...   
1617        2368  dfc32e06-b88e-4d30-ae3d-22bc18792f5e   Hillside Fire   
1618        2369  31852a71-e639-496f-9e7d-0968d1c218b3     Maria Fire    
1619        2370  305f30f5-622d-4c55-8813-45ffbc15aed0  Sobrante Fire    
1620        2371  69d98a94-4980-4dc0-b388-61c37c635f32      Ranch Fire   
1621        2372  600d608d-6279-435d-bc9b-99aacaa5e80f      Eagle Fire   

                                               Location   Latitude  \
0         North of Highway 29, southeast 

Listing the counties affected by fires

In [102]:
county_list = []
county_list_repeated = set(fires_df['CountiesList'].tolist())
while ("" in county_list_repeated) : 
    county_list_repeated.remove("")
for item in county_list_repeated : 
    item_list = []
    item_list = item.split(",")
    if len(item_list) > 1 : 
        for c in item_list : 
            county_list.append(c.strip())
    else : 
        county_list.append(item)
county_set = set(county_list)
print('List of counties affect by fires : \n', county_set)

List of counties affect by fires : 
 {'Yuba', 'Sierra', 'Modoc', 'Marin', 'Solano', 'Kern', 'Shasta', 'Ventura', 'Mono', 'Tehama', 'Butte', 'El Dorado', 'Fresno', 'Santa Barbara', 'Napa', 'State of Oregon', 'San Luis Obispo', 'Kings', 'Trinity', 'San Diego', 'Nevada', 'Amador', 'Sacramento', 'Placer', 'Plumas', 'Riverside', 'Santa Clara', 'Contra Costa', 'Orange', 'Mariposa', 'Sonoma', 'Stanislaus', 'Glenn', 'Monterey', 'Madera', 'Alameda', 'Lake', 'San Mateo', 'San Bernardino', 'Tulare', 'Inyo', 'Mendocino', 'Sutter', 'Lassen', 'San Joaquin', 'Calaveras', 'San Benito', 'Alpine', 'Humboldt', 'Mexico', 'Los Angeles', 'State of Nevada', 'Del Norte', 'Siskiyou', 'Colusa', 'Merced', 'Santa Cruz', 'Yolo', 'Tuolumne'}


In [103]:
print("Total of %d out 58 total counties in CA affected by fires" % len(county_set))

Total of 59 out 58 total counties in CA affected by fires


The county size information was acquired from https://www.counties.org/pod/square-mileage-county on Nov 15, 2019

In [106]:
county_size = pd.read_csv('data/counties_size.csv', names=['county','size'])
county_size.set_index('county',inplace=True)
print("%d counties found..." % len(county_size))
with pd.option_context('display.max_rows',10) : 
    print(county_size)

58 counties found...
                 size
county               
San Bernardino  20057
Inyo            10181
Kern             8132
Riverside        7206
Siskiyou         6278
...               ...
Amador            595
Marin             520
San Mateo         448
Santa Cruz        445
San Francisco      47

[58 rows x 1 columns]


Note, the area is quoted in square miles.

Checking that the county is listed in the manifest : 

In [108]:
county_manifest = county_size.index.tolist()
county_exclude = []
for county in county_set : 
    if county not in county_manifest : 
        print("ERROR: Unknown County %s in fire data" % county)
        county_exclude.append(county)

ERROR: Unknown County State of Oregon in fire data
ERROR: Unknown County Mexico in fire data
ERROR: Unknown County State of Nevada in fire data


In [215]:
# county_size
def get_area(county) : 
    area = county_size.loc[county]
    return float(area)

# get_area("Ventura") # for testing

1843.0

Restructuring the dataframe to only have one county assigned to a fire

In [257]:
fires_df=fires_df[fires_df.AcresBurnedDisplay!='']
fires_df=fires_df[fires_df.CountiesList!='']
fires_multi_county_df=fires_df[fires_df.CountiesList.str.contains(",")]
fires_no_multi_county=fires_df[~fires_df.CountiesList.str.contains(",")]
# print(fires_multi_county_df)
new_rows = []
for row in fires_multi_county_df.iterrows() :
    row_tmp = row[1].copy()
    row_id_temp = row[0]
    county_temp = [x.strip(' ') for x in row[1]["CountiesList"].split(",")]
    total_area = 0
    # Filtering out counties that are not in the manifest
    if [(i,j) for i in county_temp for j in county_manifest if i not in county_manifest] : 
        print("WARNING: found county that is not in the manifest")
        continue
    # Find total area for the fraction of acres burned per county
    for c_t in county_temp : 
        total_area = total_area + get_area(c_t)
    for c_temp in county_temp : 
        row = (row_id_temp,row_tmp.copy())
        row[1]["CountiesList"]=c_temp
        total_burn = float(row[1]["AcresBurnedDisplay"].replace(',',''))
        row[1]["AcresBurnedDisplay"]=total_burn*get_area(c_temp)/total_area
        fires_no_multi_county = fires_no_multi_county.append(row[1], ignore_index=True)
print(fires_no_multi_county.loc[fires_no_multi_county['Name'].str.contains("Grand Fire")])

      IncidentId                              UniqueId        Name  \
1579         670  e0d038e9-0926-4c05-8317-ec7075a04b12  Grand Fire   
1580         670  e0d038e9-0926-4c05-8317-ec7075a04b12  Grand Fire   

                                               Location  Latitude  Longitude  \
1579  South of Frazier Mountain Park Rd, South of Fr...   34.7861  -118.9411   
1580  South of Frazier Mountain Park Rd, South of Fr...   34.7861  -118.9411   

     AcresBurnedDisplay PercentContainedDisplay CountiesList  \
1579            3543.02                    100%         Kern   
1580            802.975                    100%      Ventura   

              UpdatedDate StartedDate  \
1579  2013-05-21 19:45:00  2013-05-15   
1580  2013-05-21 19:45:00  2013-05-15   

                                       AdminUnit IncidentTypeDisplay  \
1579  CAL FIRE / USFS Los Padres National Forest                       
1580  CAL FIRE / USFS Los Padres National Forest                       

              