# FEMA Wildfire Data Analysis for CCEE Whitepaper

### Dataset and Library Imports

In [3]:
import pandas as pd
from utils import *
master = pd.read_csv("../datasets/equity.csv")
fire_risk = pd.read_csv("../datasets/fema.csv")

### Filtering Data for Relevant Columns

Combined the FEMA dataset with the California school district dataset to create a singular master dataset. Cleaned the dataset by correcting for missing values, type errors and selecting the relevant columns. At this point the primary dataset contains 936 rows, one for each California school district. Each column either represents an indictor (i.e. County or district name) or the corresponding wildfire risk.

In [4]:
filtered = fire_risk[["COUNTY","WFIR_RISKR"]]
joined_df = filtered.merge(master, left_on='COUNTY', right_on='County')
filtered_wildfires = joined_df[["County","WFIR_RISKR", "District Name", "Number of Schools", "Student Enrollment "]]

# first 10 rows of filtered FEMA wildfire dataset
filtered_wildfires.head(10)

Unnamed: 0,County,WFIR_RISKR,District Name,Number of Schools,Student Enrollment
0,Alameda,Relatively Moderate,Alameda Unified,19,9155
1,Alameda,Relatively Moderate,Albany City Unified,7,3637
2,Alameda,Relatively Moderate,Berkeley Unified,18,9438
3,Alameda,Relatively Moderate,Castro Valley Unified,16,9454
4,Alameda,Relatively Moderate,Dublin Unified,16,13004
5,Alameda,Relatively Moderate,Emery Unified,3,676
6,Alameda,Relatively Moderate,Fremont Unified,41,32706
7,Alameda,Relatively Moderate,Hayward Unified,34,19171
8,Alameda,Relatively Moderate,Livermore Valley Joint Unified,19,13298
9,Alameda,Relatively Moderate,Mountain House Elementary,1,22


### Split Dataset into High, Medium, and Low Risk

Calculated/collected the number of counties that had a risk level of High, Medium, and Low. We then repeated this process for the total number of districts, schools and students.

In [5]:
# high risk
high_wild = filtered_wildfires[filtered_wildfires['WFIR_RISKR'].str.contains('High')]

# moderate risk
med_wild = filtered_wildfires[filtered_wildfires['WFIR_RISKR'].str.contains('Moderate')]

# low risk
low_wild = filtered_wildfires[filtered_wildfires['WFIR_RISKR'].str.contains('Low')]
low_wild = low_wild[~low_wild.iloc[:, 3].str.contains('-')]

### Print Results for Each Risk Category

In [6]:
print('High FEMA Wildfire Risk:')
print(high_wild.shape[0], 'districts')
print(high_wild["County"].nunique(), 'counties')
print(sum(int_fixer(high_wild["Number of Schools"])), 'schools')
print(sum(int_fixer(high_wild["Student Enrollment "])), 'students')

High FEMA Wildfire Risk:
649 districts
31 counties
7319 schools
4135548 students


In [7]:
print('Med FEMA Wildfire Risk:')
print(med_wild.shape[0], 'districts')
print(med_wild["County"].nunique(), 'counties')
print(sum(int_fixer(med_wild["Number of Schools"])), 'schools')
print(sum(int_fixer(med_wild["Student Enrollment "])), 'students')

Med FEMA Wildfire Risk:
216 districts
17 counties
2109 schools
989130 students


In [8]:
print('Low FEMA Wildfire Risk:')
print(low_wild.shape[0], 'districts')
print(low_wild["County"].nunique(), 'counties')
print(sum(int_fixer(low_wild["Number of Schools"])), 'schools')
print(sum(int_fixer(low_wild["Student Enrollment "])), 'students')

Low FEMA Wildfire Risk:
71 districts
10 counties
642 schools
301140 students
