# Converting Fire Stat data for use with Maps

In [8]:
import pandas as pd
import fiona

# Open the geopackage file directly using Fiona to access the attribute data without geometry
with fiona.open('Resources/California_County_Boundaries.gpkg') as county_file:
    county_attributes = [feature['properties'] for feature in county_file]

# Convert the attributes to a DataFrame
county_attributes_df = pd.DataFrame(county_attributes)

# Rename the COUNTY_NAME to County for the join
county_attributes_df = county_attributes_df.rename(columns={'COUNTY_NAME': 'County'})

# Display the merged DataFrame
county_attributes_df.head()




Unnamed: 0,COUNTY_ABBREV,COUNTY_CODE,COUNTY_FIPS,County,COUNTY_NUM,GlobalID,ISLAND
0,ALA,1,1,Alameda,1,{E6F92268-D2DD-4CFB-8B79-5B4B2F07C559},
1,ALP,2,3,Alpine,2,{870479B2-480A-494B-8352-AD60578839C1},
2,AMA,3,5,Amador,3,{4F45B3A6-BE10-461C-8945-6B2AAA7119F6},
3,BUT,4,7,Butte,4,{44FBA680-AECC-4E04-A499-29D69AFFBD4A},
4,CAL,5,9,Calaveras,5,{D11EF739-4A1E-414E-BFD1-E7DCD56CD61E},


In [9]:
# Load the fire data CSV file
file_path = r'Outputs/fires_2008_2022_cleaned.csv'
df = pd.read_csv(file_path)

# Clean the 'Acres' column by removing commas and quotes, then convert to integer
df['Acres'] = df['Acres'].replace({',': '', '"': ''}, regex=True).astype(int)

# Apply the transformation to the "County" column to not have all Caps
df['County'] = df['County'].str.title()

# Save the updated DataFrame back to a CSV file
output_path = 'Outputs/fires_2008_2022_cleaned_lc.csv'
df.to_csv(output_path, index=False)

df.head()

Unnamed: 0,County,Fire Name,Start,Contained,Acres,Deaths_FF,Deaths_Civil,Duration,Strux_Destr,Strux_Dmgd
0,Lake,"CONTROL BURN, GEYSERS",2008-02-13,2008-02-13,400,0,0,1,0,0
1,San Bernardino,Bluff,2008-03-16,2008-03-20,680,0,0,5,0,0
2,Mariposa,WAWONA NW,2008-04-09,2008-04-19,1130,0,0,11,0,0
3,Los Angeles,SANTA ANITA,2008-04-26,2008-05-02,584,0,0,7,0,0
4,Riverside,APACHE,2008-04-29,2008-05-04,769,0,0,6,0,0


In [10]:
# Get the unique county names from both datasets
fire_counties = df['County'].unique()
county_attributes_counties = county_attributes_df['County'].unique()

# Compare the county names to identify discrepancies
fire_counties_set = set(fire_counties)
county_attributes_counties_set = set(county_attributes_counties)

# Counties in the fire data not in the county attributes
missing_in_county_attributes = fire_counties_set - county_attributes_counties_set

# Counties in the county attributes not in the fire data
missing_in_fire_data = county_attributes_counties_set - fire_counties_set

missing_in_county_attributes, missing_in_fire_data


({'Jackson (Or)', 'Washoe (Nv)'}, {'Imperial', 'San Francisco'})

In [12]:
# Extract the year from the 'Start' column and add it as a new column
df['Year'] = pd.to_datetime(df['Start']).dt.year

# Export the updated DataFrame to a CSV file
fire_stats_year_file_path = 'Outputs/fire_stats_year_county.csv'
df.to_csv(fire_stats_year_file_path, index=False)

# Display the updated DataFrame
print(df.head())

           County              Fire Name       Start   Contained  Acres  \
0            Lake  CONTROL BURN, GEYSERS  2008-02-13  2008-02-13    400   
1  San Bernardino                  Bluff  2008-03-16  2008-03-20    680   
2        Mariposa              WAWONA NW  2008-04-09  2008-04-19   1130   
3     Los Angeles            SANTA ANITA  2008-04-26  2008-05-02    584   
4       Riverside                 APACHE  2008-04-29  2008-05-04    769   

   Deaths_FF  Deaths_Civil  Duration  Strux_Destr  Strux_Dmgd  Year  
0          0             0         1            0           0  2008  
1          0             0         5            0           0  2008  
2          0             0        11            0           0  2008  
3          0             0         7            0           0  2008  
4          0             0         6            0           0  2008  


In [13]:
# Sort the single-county dataset by 'Year' and 'County' name
df_sorted = df.sort_values(by=['Year', 'County'])

# Group by 'Year' and 'County' and calculate the sum for 'Acres' through 'Duration' columns
summary_totals = df_sorted.groupby(['Year', 'County'])[['Acres', 'Strux_Destr', 'Strux_Dmgd', 'Deaths_FF', 'Deaths_Civil', 'Duration']].sum()

# Convert the 'Start' and 'Contained' columns to datetime
df['Start'] = pd.to_datetime(df['Start'])
df['Contained'] = pd.to_datetime(df['Contained'])

# Function to generate a set of active days for a fire
def get_active_days(row):
    return pd.date_range(start=row['Start'], end=row['Contained'])

# Apply the function to each row to generate the active days
df['Active_Days'] = df.apply(get_active_days, axis=1)

# Group by 'Year' and 'County' and union all active days
def union_active_days(group):
    all_days = set().union(*group['Active_Days'])
    return len(all_days)

# Apply the union function to get the count of unique fire days
unique_fire_days = df.groupby(['Year', 'County']).apply(union_active_days)

# Add this count to your summary totals
summary_totals['Unique_Fire_Days'] = unique_fire_days

# Display the updated summary totals
summary_totals.head()

# Count the number of fires per year and county
fire_counts = df_sorted.groupby(['Year', 'County']).size()

# Add the count of fires to the summary totals
summary_totals['Total_Fires'] = fire_counts

# Display the summary
summary_totals.head()

# Export the summary totals to a CSV file
summary_totals_file_path = 'Outputs/summary_totals_by_county.csv'
summary_totals.to_csv(summary_totals_file_path)

# Display the final summary
summary_totals




  unique_fire_days = df.groupby(['Year', 'County']).apply(union_active_days)


Unnamed: 0_level_0,Unnamed: 1_level_0,Acres,Strux_Destr,Strux_Dmgd,Deaths_FF,Deaths_Civil,Duration,Unique_Fire_Days,Total_Fires
Year,County,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2008,Butte,91266,0,0,0,0,83,56,14
2008,Fresno,13973,0,0,0,0,121,119,4
2008,Humboldt,49635,0,0,0,0,208,88,5
2008,Kern,37845,0,0,0,0,33,31,3
2008,Lake,12032,0,0,0,0,46,37,3
...,...,...,...,...,...,...,...,...,...
2022,San Diego,5033,30,8,0,0,11,11,2
2022,San Luis Obispo,387,0,0,0,0,6,6,1
2022,Shasta,304,16,4,0,0,6,6,1
2022,Siskiyou,17379,122,26,0,2,72,59,2
