In [1]:
import pandas as pd
from pathlib import Path
import calendar

file_path=Path("Resources/fire_data/mapdataall.csv")
df=pd.read_csv(file_path, encoding="utf-8")

#df.head()

In [2]:
#gather list of original columns
#list(df.columns)

In [3]:
#create new data frame with select columns
fire_df = df[['incident_name',
 'incident_administrative_unit',
 'incident_county',
 'incident_acres_burned',
 'incident_longitude',
 'incident_latitude',
 'incident_dateonly_extinguished',
 'incident_dateonly_created'
]]

#rename columns
fire_df = fire_df.rename(columns={
    'incident_name': "name",
     'incident_administrative_unit': "admin unit",
     'incident_county': "county",
     'incident_acres_burned': "acres burned" ,
     'incident_longitude': "lon",
     'incident_latitude': "lat",
     'incident_dateonly_extinguished': "date extinguished",
     'incident_dateonly_created': "date created"
})

#fire_incidents_df.head()
fire_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2198 entries, 0 to 2197
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   name               2198 non-null   object 
 1   admin unit         2186 non-null   object 
 2   county             2188 non-null   object 
 3   acres burned       2148 non-null   float64
 4   lon                2198 non-null   float64
 5   lat                2198 non-null   float64
 6   date extinguished  1975 non-null   object 
 7   date created       2198 non-null   object 
dtypes: float64(3), object(5)
memory usage: 137.5+ KB


In [5]:
#drop rows where county and acres burned columns are null
fire_incidents = fire_df.dropna(subset=['acres burned','county', 'date extinguished'])

#convert to datetime
fire_incidents['date extinguished'] = pd.to_datetime(fire_incidents.loc[:,'date extinguished'])
fire_incidents['date created'] = pd.to_datetime(fire_incidents.loc[:,'date created'])

#calculate duration
fire_incidents['duration (days)'] = fire_incidents.loc[:,'date extinguished'] - fire_incidents.loc[:,'date created']

#add years columns. Note dates are currently in datetime
fire_incidents["year extinguished"]=fire_incidents.loc[:,"date extinguished"].dt.year
fire_incidents["year created"]=fire_incidents.loc[:,"date created"].dt.year

#add months column by name (& number?) do not need extinguished only created
fire_incidents["month created (num)"]=fire_incidents["date created"].dt.month
fire_incidents["month created (name)"]=fire_incidents["month created (num)"].apply(lambda x: calendar.month_name[x])

##***INVESTIGATE HOW TO GET RID OF WARNINGS

fire_incidents.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fire_incidents['date extinguished'] = pd.to_datetime(fire_incidents.loc[:,'date extinguished'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fire_incidents['date created'] = pd.to_datetime(fire_incidents.loc[:,'date created'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fire_incidents['duratio

Unnamed: 0,name,admin unit,county,acres burned,lon,lat,date extinguished,date created,duration (days),year extinguished,year created,month created (num),month created (name)
0,Bridge Fire,Shasta-Trinity National Forest,Shasta,37.0,-122.309,40.774,2018-01-09,2017-10-31,70 days,2018,2017,10,October
1,Pala Fire,CAL FIRE San Diego Unit,San Diego,122.0,1.0,1.0,2009-05-25,2009-05-24,1 days,2009,2009,5,May
2,River Fire,CAL FIRE San Bernardino Unit,Inyo,407.0,-118.01651,36.602575,2013-02-28,2013-02-24,4 days,2013,2013,2,February
3,Fawnskin Fire,San Bernardino National Forest,San Bernardino,30.0,-116.941311,34.288877,2013-04-22,2013-04-20,2 days,2013,2013,4,April
4,Gold Fire,CAL FIRE Madera-Mariposa-Merced Unit,Madera,274.0,-119.635004,37.116295,2013-05-01,2013-04-30,1 days,2013,2013,4,April


In [6]:
fire_incidents.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1943 entries, 0 to 2196
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype          
---  ------                --------------  -----          
 0   name                  1943 non-null   object         
 1   admin unit            1934 non-null   object         
 2   county                1943 non-null   object         
 3   acres burned          1943 non-null   float64        
 4   lon                   1943 non-null   float64        
 5   lat                   1943 non-null   float64        
 6   date extinguished     1943 non-null   datetime64[ns] 
 7   date created          1943 non-null   datetime64[ns] 
 8   duration (days)       1943 non-null   timedelta64[ns]
 9   year extinguished     1943 non-null   int32          
 10  year created          1943 non-null   int32          
 11  month created (num)   1943 non-null   int32          
 12  month created (name)  1943 non-null   object         
dtypes: datet

In [7]:
#county key metrics

#groupby county
county_group = fire_incidents.groupby(["month created (name)","county"]) #ADD GROUP BY MONTH FIRST

#create summary data frame per county
county_summary = county_group.agg(
    #count incidents
    incident_count=("county","count"), 
    
    #total acre burned
    total_acres_burned=("acres burned", "sum"),
    
    #avg acres burned and round to whole number
    avg_acres_burned=("acres burned", "mean"),
    
    #avg duration and only exctract days as an integer  
    avg_duration=("duration (days)", lambda x: x.mean().days)
)

# convert 'total_acres_burned' and 'avg_acres_burned' to whole numbers/integer
county_summary['total_acres_burned'] = county_summary['total_acres_burned'].astype(int)
county_summary['avg_acres_burned'] = county_summary['avg_acres_burned'].round(0).astype(int)

#sort by descending incident count
county_summary = county_summary.sort_values(by=["incident_count"], ascending=False)

#preview
county_summary.head(25)

#county_summary.info()

Unnamed: 0_level_0,Unnamed: 1_level_0,incident_count,total_acres_burned,avg_acres_burned,avg_duration
month created (name),county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
July,Riverside,48,41940,874,64
June,Riverside,40,9124,228,90
May,Riverside,32,12948,405,86
August,Riverside,29,26445,912,64
July,Shasta,23,93212,4053,68
July,San Diego,23,11081,482,65
August,Kern,23,56028,2436,43
June,San Diego,23,11802,513,74
May,San Diego,22,26162,1189,87
June,Butte,21,2306,110,78


In [9]:
# year key metrics

#groupby year created (***SHOULD WE ADD COUNTY****)
year_county_group = fire_incidents.groupby(["year created"])

#create data frame with incident count and acres burned
year_created_summary = year_county_group.agg(
    incident_count=("county","count"), 
    total_acres_burned=("acres burned", "sum")
)

#sort by descending incident count
year_created_summary = year_created_summary.sort_values(by=["year created"], ascending=False)

#preview
year_created_summary.head(25)

Unnamed: 0_level_0,incident_count,total_acres_burned
year created,Unnamed: 1_level_1,Unnamed: 2_level_1
2024,2,250.0
2023,93,310026.0
2022,84,140415.0
2021,157,2289096.0
2020,205,2453742.0
2019,205,200634.0
2018,300,1529897.0
2017,427,1258294.0
2016,155,452101.0
2015,97,412281.0


In [10]:
#month key metrics

#groupby month created 
month_group = fire_incidents.groupby(["month created (name)"])

#create data frame with incident count and acres burned
month_summary = month_group.agg(
    incident_count=("county","count"), 
    total_acres_burned=("acres burned", "sum")
)

#sort by descending incident count
month_summary = month_summary.sort_values(by=["incident_count"], ascending=False)

#preview
month_summary.head(25)

Unnamed: 0_level_0,incident_count,total_acres_burned
month created (name),Unnamed: 1_level_1,Unnamed: 2_level_1
July,478,3749362.0
June,427,412379.0
August,354,3852007.0
September,220,668800.0
May,165,125753.0
October,150,408635.0
April,44,14373.0
November,43,272865.0
December,27,321923.0
February,12,7558.0
