In [26]:
import sys
sys.path.append('../')

from common_imports import *
from common_functions import *

In [27]:
dir_path = "wildfire_data/California_Fire_Incidents.csv"
wildfire_df = pd.read_csv(dir_path)

In [28]:
wildfire_df.shape

(1636, 40)

In [29]:
wildfire_df.columns

Index(['AcresBurned', 'Active', 'AdminUnit', 'AirTankers', 'ArchiveYear',
       'CalFireIncident', 'CanonicalUrl', 'ConditionStatement',
       'ControlStatement', 'Counties', 'CountyIds', 'CrewsInvolved', 'Dozers',
       'Engines', 'Extinguished', 'Fatalities', 'Featured', 'Final',
       'FuelType', 'Helicopters', 'Injuries', 'Latitude', 'Location',
       'Longitude', 'MajorIncident', 'Name', 'PercentContained',
       'PersonnelInvolved', 'Public', 'SearchDescription', 'SearchKeywords',
       'Started', 'Status', 'StructuresDamaged', 'StructuresDestroyed',
       'StructuresEvacuated', 'StructuresThreatened', 'UniqueId', 'Updated',
       'WaterTenders'],
      dtype='object')

In [30]:
wildfire_df["ArchiveYear"].unique()

array([2014, 2015, 2017, 2019, 2018, 2013, 2016])

In [31]:
# Count occurrences of each combination of Counties and ArchiveYear
value_counts = wildfire_df.groupby(['Counties', 'ArchiveYear']).size().reset_index(name='Count')

# Sort the results by Count in descending order and get the top 10
top_counts = value_counts.sort_values(by='Count', ascending=False).head(10)
print(top_counts)


            Counties  ArchiveYear  Count
190        Riverside         2017     52
192        Riverside         2019     33
228  San Luis Obispo         2017     25
191        Riverside         2018     25
54            Fresno         2017     24
78              Kern         2017     24
216        San Diego         2017     20
186        Riverside         2013     19
19             Butte         2017     18
217        San Diego         2018     17


In [32]:
df_filtered = wildfire_df.drop(columns=['PercentContained', 'CanonicalUrl', 
                                        'ConditionStatement','SearchDescription', 
                                        'SearchKeywords', 'Featured', 'Final', 'Status'
                                        ])

In [33]:
# Use format="ISO8601" to parse ISO 8601 date strings
df_filtered['Started'] = pd.to_datetime(df_filtered['Started'], format="ISO8601", errors='coerce').dt.tz_localize(None)
df_filtered['Extinguished'] = pd.to_datetime(df_filtered['Extinguished'], format="ISO8601", errors='coerce').dt.tz_localize(None)

# Calculate the duration and store it in a new column
df_filtered['Duration'] = (df_filtered['Extinguished'] - df_filtered['Started']).dt.days.fillna(0)
# Fill NaNs with 0 for numerical columns
df_filtered.fillna(0, inplace=True)
df_filtered.head(5)

Unnamed: 0,AcresBurned,Active,AdminUnit,AirTankers,ArchiveYear,CalFireIncident,ControlStatement,Counties,CountyIds,CrewsInvolved,...,Public,Started,StructuresDamaged,StructuresDestroyed,StructuresEvacuated,StructuresThreatened,UniqueId,Updated,WaterTenders,Duration
0,12536.0,False,CAL FIRE Mendocino Unit,0.0,2014,True,0,Mendocino,23,0.0,...,True,2014-07-30 06:27:00,0.0,0.0,0.0,0.0,7c6a9617-1c79-4b4d-9833-e76532f59103,2014-09-09T19:30:00Z,0.0,41.0
1,403.0,False,CAL FIRE Mendocino Unit,0.0,2014,True,Black Bart Trail East,Mendocino,23,0.0,...,True,2014-09-13 15:48:00,0.0,11.0,0.0,0.0,60b1ae62-1688-44b3-8447-66785e692e92,2014-09-17T16:10:00Z,0.0,4.0
2,389.0,False,CAL FIRE Butte Unit,4.0,2014,True,0,Butte,4,29.0,...,True,2014-07-28 13:11:00,0.0,0.0,0.0,0.0,69b38608-fab1-4df5-b9e9-0fd28bc505ba,2014-08-01T17:30:00Z,13.0,4.0
3,62.0,False,CAL FIRE Mendocino Unit,0.0,2014,True,0,Mendocino,23,2.0,...,True,2014-09-21 16:13:00,0.0,0.0,0.0,0.0,a44b9e07-680d-4857-9d27-834363373a83,2014-09-22T16:30:00Z,0.0,1.0
4,30.0,False,CAL FIRE Siskiyou Unit,0.0,2014,True,0,Siskiyou,47,0.0,...,True,2014-07-29 13:00:00,0.0,0.0,0.0,0.0,279d9058-7545-488b-8a83-51b12e60cb47,2014-08-02T11:00:00Z,0.0,3.0


In [34]:
df_filtered['Started'] = pd.to_datetime(df_filtered['Started'])
df_filtered['Extinguished'] = pd.to_datetime(df_filtered['Extinguished'])
df_filtered['Longitude'] = pd.to_numeric(df_filtered['Longitude'], errors='coerce')
df_filtered['Latitude'] = pd.to_numeric(df_filtered['Latitude'], errors='coerce')

In [35]:
map = folium.Map(location=(37, -117), zoom_start=6, tiles="Cartodb Positron")
map
year_colors = {
    2013: 'blue',
    2014: 'green',
    2015: 'red',
    2016: 'purple',
    2017: 'black',
    2018: 'orange',
    2019: 'pink',
}
for _, row in df_filtered.iterrows():
    year = row['ArchiveYear']
    color = year_colors.get(year, 'gray')  # Default color if year not in dictionary
    loc_discrpt = row['Location']
    
    folium.CircleMarker(
        location=[row['Latitude'], row['Longitude']],
        radius=3,  # radius of the marker
        color=color,
        fill=True,
        fill_color=color,
        popup=f"Year: {year}, location: {loc_discrpt}"  # Show year on click
    ).add_to(map)

In [36]:
map