In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [2]:
num = pd.read_csv('../data/NFD_Number_of_fires_by_month.csv', encoding='ISO-8859-1')
num.head()

Unnamed: 0,Year,Année,ISO,Jurisdiction,Juridiction,Month,Mois,Number,Data Qualifier,Nombre,Qualificatifs de données
0,1990,1990,AB,Alberta,Alberta,January,Janvier,1.0,a,1.0,a
1,1990,1990,AB,Alberta,Alberta,October,Octobre,34.0,a,34.0,a
2,1990,1990,AB,Alberta,Alberta,November,Novembre,16.0,a,16.0,a
3,1990,1990,AB,Alberta,Alberta,December,Décembre,4.0,a,4.0,a
4,1990,1990,AB,Alberta,Alberta,February,Février,5.0,a,5.0,a


In [3]:
num.columns

Index(['Year', 'Année', 'ISO', 'Jurisdiction', 'Juridiction', 'Month', 'Mois',
       'Number', 'Data Qualifier', 'Nombre', 'Qualificatifs de données'],
      dtype='object')

In [4]:
num = num[['Year', 'Month', 'Jurisdiction', 'Number']]
num.head()

Unnamed: 0,Year,Month,Jurisdiction,Number
0,1990,January,Alberta,1.0
1,1990,October,Alberta,34.0
2,1990,November,Alberta,16.0
3,1990,December,Alberta,4.0
4,1990,February,Alberta,5.0


In [5]:
# Calculate total fires
total_fires = num.groupby(['Year', 'Month'])['Number'].sum().reset_index()
total_fires['Jurisdiction'] = 'Canada'

# Append total_fires to original df
num = pd.concat([num, total_fires], ignore_index=True)

num.head(5)

Unnamed: 0,Year,Month,Jurisdiction,Number
0,1990,January,Alberta,1.0
1,1990,October,Alberta,34.0
2,1990,November,Alberta,16.0
3,1990,December,Alberta,4.0
4,1990,February,Alberta,5.0


In [6]:
# for column in num.columns:
#     print(f"Column: {column}")
#     print(f"Unique values: {num[column].unique()}")
#     print(f"Number of unique values: {num[column].nunique()}")
#     # print(f"Value counts:")
#     # print(num[column].value_counts())

In [7]:
# drop unspecified value
num = num[num['Month'] != 'Unspecified']

In [8]:
# Create a datetime column from 'Year' and 'Month'
num['Date'] = pd.to_datetime(num['Month'] + ' ' + num['Year'].astype(str), format="%B %Y")

# Sort the dataframe by date
num = num.sort_values('Date')

num.head(5)

Unnamed: 0,Year,Month,Jurisdiction,Number,Date
0,1990,January,Alberta,1.0,1990-01-01
3200,1990,January,Canada,1.0,1990-01-01
74,1990,January,Prince Edward Island,0.0,1990-01-01
73,1990,February,Prince Edward Island,0.0,1990-02-01
4,1990,February,Alberta,5.0,1990-02-01


In [9]:
# # Create a figure and an axis
# plt.figure(figsize=(12, 6))

# # Loop over each unique jurisdiction
# for jurisdiction in num['Jurisdiction'].unique():
#     # Create a new dataframe for each jurisdiction
#     jurisdiction_df = num[num['Jurisdiction'] == jurisdiction]
#     # Plot 'Number' over time for the current jurisdiction
#     plt.plot(jurisdiction_df['Date'], jurisdiction_df['Number'], label=jurisdiction)

# # Set the title and labels
# plt.title('Number of fires over Time')
# plt.xlabel('Date')
# plt.ylabel('Number')

# # Add a legend
# plt.legend()

# # Show the plot
# plt.show()

In [10]:
# Create a new figure
fig_num = go.Figure()

# Loop over each unique jurisdiction
for jurisdiction in num['Jurisdiction'].unique():
    # Create a new dataframe for each jurisdiction
    jurisdiction_df = num[num['Jurisdiction'] == jurisdiction]
    # Add a line to the figure for the current jurisdiction
    fig_num.add_trace(go.Scatter(x= jurisdiction_df['Date'], y= jurisdiction_df['Number'], 
                             mode='lines', name= jurisdiction))

# Set the title and labels
fig_num.update_layout(title='Number of Fires over Time', 
                  xaxis_title='Date', yaxis_title='Number')

# Show the plot
fig_num.show()

In [11]:
num.to_pickle('../data/NFD_Number_of_fires_by_month.pkl')

In [12]:
num.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3473 entries, 0 to 3096
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Year          3473 non-null   int64         
 1   Month         3473 non-null   object        
 2   Jurisdiction  3473 non-null   object        
 3   Number        3434 non-null   float64       
 4   Date          3473 non-null   datetime64[ns]
dtypes: datetime64[ns](1), float64(1), int64(1), object(2)
memory usage: 162.8+ KB


In [13]:
num.describe()

Unnamed: 0,Year,Number,Date
count,3473.0,3434.0,3473
mean,2004.709185,127.317997,2005-03-08 17:42:16.481428224
min,1990.0,0.0,1990-01-01 00:00:00
25%,1997.0,5.0,1997-07-01 00:00:00
50%,2005.0,26.0,2005-04-01 00:00:00
75%,2012.0,105.0,2012-09-01 00:00:00
max,2020.0,3285.0,2020-12-01 00:00:00
std,8.888393,306.609901,


In [14]:
area = pd.read_csv('../data/NFD_Area_burned_by_month.csv', encoding='ISO-8859-1')
area.head()

Unnamed: 0,Year,Année,ISO,Jurisdiction,Juridiction,Month,Mois,Area (hectares),Data Qualifier,Superficie (en hectare),Qualificatifs de données
0,1990,1990,AB,Alberta,Alberta,January,Janvier,0.1,a,0.1,a
1,1990,1990,AB,Alberta,Alberta,October,Octobre,182.4,a,182.4,a
2,1990,1990,AB,Alberta,Alberta,November,Novembre,42.5,a,42.5,a
3,1990,1990,AB,Alberta,Alberta,December,Décembre,0.4,a,0.4,a
4,1990,1990,AB,Alberta,Alberta,February,Février,3.3,a,3.3,a


In [15]:
area.columns

Index(['Year', 'Année', 'ISO', 'Jurisdiction', 'Juridiction', 'Month', 'Mois',
       'Area (hectares)', 'Data Qualifier', 'Superficie (en hectare)',
       'Qualificatifs de données'],
      dtype='object')

In [16]:
area = area[['Year', 'Month', 'Jurisdiction','Area (hectares)']]
area.head()

Unnamed: 0,Year,Month,Jurisdiction,Area (hectares)
0,1990,January,Alberta,0.1
1,1990,October,Alberta,182.4
2,1990,November,Alberta,42.5
3,1990,December,Alberta,0.4
4,1990,February,Alberta,3.3


In [17]:
# Calculate total fires
total_area = area.groupby(['Year', 'Month'])['Area (hectares)'].sum().reset_index()
total_area['Jurisdiction'] = 'Canada'

# Append total_fires to original df
area = pd.concat([area, total_area], ignore_index=True)

area.head(5)

Unnamed: 0,Year,Month,Jurisdiction,Area (hectares)
0,1990,January,Alberta,0.1
1,1990,October,Alberta,182.4
2,1990,November,Alberta,42.5
3,1990,December,Alberta,0.4
4,1990,February,Alberta,3.3


In [18]:
# drop unspecified value
area = area[area['Month'] != 'Unspecified']

In [19]:
# Create a datetime column from 'Year' and 'Month'
area['Date'] = pd.to_datetime(area['Month'] + ' ' + area['Year'].astype(str), format="%B %Y")

# Sort the dataframe by date
area = area.sort_values('Date')

area.head(5)

Unnamed: 0,Year,Month,Jurisdiction,Area (hectares),Date
0,1990,January,Alberta,0.1,1990-01-01
3200,1990,January,Canada,0.1,1990-01-01
74,1990,January,Prince Edward Island,0.0,1990-01-01
73,1990,February,Prince Edward Island,0.0,1990-02-01
4,1990,February,Alberta,3.3,1990-02-01


In [20]:
# Create a new figure
fig_area = go.Figure()

# Loop over each unique jurisdiction
for jurisdiction in area['Jurisdiction'].unique():
    # Create a new dataframe for each jurisdiction
    jurisdiction_df_area = area[area['Jurisdiction'] == jurisdiction]
    # Add a line to the figure for the current jurisdiction
    fig_area.add_trace(go.Scatter(x= jurisdiction_df_area['Date'], y= jurisdiction_df_area['Area (hectares)'], 
                             mode='lines', name= jurisdiction))

# Set the title and labels
fig_area.update_layout(title='Area Burned by Month', 
                  xaxis_title='Date', yaxis_title='Area (hectares)')

# Show the plot
fig_area.show()

In [21]:
area.to_pickle('../data/NFD_Area_burned_by_month.pkl')

In [22]:
area.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3473 entries, 0 to 3096
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Year             3473 non-null   int64         
 1   Month            3473 non-null   object        
 2   Jurisdiction     3473 non-null   object        
 3   Area (hectares)  3427 non-null   float64       
 4   Date             3473 non-null   datetime64[ns]
dtypes: datetime64[ns](1), float64(1), int64(1), object(2)
memory usage: 162.8+ KB


In [23]:
loss = pd.read_csv('../data/NFD _Property_losses.csv', encoding='ISO-8859-1')
loss.head()

Unnamed: 0,Year,Année,ISO,Jurisdiction,Juridiction,Protection zone,Zone de protection,Dollars (En),Data qualifier,Dollars (Fr),Qualificatifs de données
0,1970,1970,AB,Alberta,Alberta,Intensive,Intensive,0.0,a,0.0,a
1,1970,1970,AB,Alberta,Alberta,Intensive,Intensive,,n,,n
2,1970,1970,BC,British Columbia,Colombie-Britannique,Intensive,Intensive,344684.0,a,344684.0,a
3,1970,1970,BC,British Columbia,Colombie-Britannique,Intensive,Intensive,,n,,n
4,1970,1970,MB,Manitoba,Manitoba,Intensive,Intensive,2002.0,a,2002.0,a


In [24]:
loss.columns

Index(['Year', 'Année', 'ISO', 'Jurisdiction', 'Juridiction',
       'Protection zone', 'Zone de protection', 'Dollars (En)',
       'Data qualifier', 'Dollars (Fr)', 'Qualificatifs de données'],
      dtype='object')

In [25]:
loss = loss[['Year', 'Jurisdiction', 'Dollars (En)']]
loss.head()

Unnamed: 0,Year,Jurisdiction,Dollars (En)
0,1970,Alberta,0.0
1,1970,Alberta,
2,1970,British Columbia,344684.0
3,1970,British Columbia,
4,1970,Manitoba,2002.0


In [26]:
loss = loss.rename(columns={'Dollars (En)': 'Dollars'}, inplace=False)

In [27]:
# Calculate total fires
total_loss = loss.groupby(['Year'])['Dollars'].sum().reset_index()
total_loss['Jurisdiction'] = 'Canada'

# Append total_fires to original df
loss = pd.concat([loss, total_loss], ignore_index=True)

loss.tail(5)

Unnamed: 0,Year,Jurisdiction,Dollars
1167,2016,Canada,2355100.0
1168,2017,Canada,14396450.0
1169,2018,Canada,6421667.0
1170,2019,Canada,345002.0
1171,2020,Canada,351580.0


In [28]:
loss = loss[(loss['Year'] >= 1990)]

In [29]:
loss.dropna(inplace=True)

In [30]:
# Create a new figure
fig_loss = go.Figure()

# Loop over each unique jurisdiction
for jurisdiction in loss['Jurisdiction'].unique():
    # Create a new dataframe for each jurisdiction
    jurisdiction_df_loss = loss[loss['Jurisdiction'] == jurisdiction]
    # Add a line to the figure for the current jurisdiction
    fig_loss.add_trace(go.Bar(x=jurisdiction_df_loss['Year'], y=jurisdiction_df_loss['Dollars'], name=jurisdiction))

# Set the title and labels
fig_loss.update_layout(title='Property losses from fires', 
                  xaxis_title='Year', yaxis_title='Dollars')

# Show the plot
fig_loss.show()

In [31]:
loss.to_pickle('../data/NFD _Property_losses.pkl')