In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd

%matplotlib inline

In [None]:
df = pd.read_excel("./wildfire.xlsx")
df.shape

(22914, 50)

In [None]:
df.columns

Index(['fire_year', 'fire_number', 'fire_name', 'current_size', 'size_class',
       'fire_location_latitude', 'fire_location_longitude', 'fire_origin',
       'general_cause_desc', 'industry_identifier_desc',
       'responsible_group_desc', 'activity_class', 'true_cause',
       'fire_start_date', 'det_agent', 'det_agent_type', 'discovered_date',
       'discovered_size', 'reported_date', 'dispatched_resource',
       'dispatch_date', 'start_for_fire_date', 'assessment_resource',
       'assessment_datetime', 'assessment_hectares', 'fire_spread_rate',
       'fire_type', 'fire_position_on_slope', 'weather_conditions_over_fire',
       'temperature', 'relative_humidity', 'wind_direction', 'wind_speed',
       'fuel_type', 'initial_action_by', 'ia_arrival_at_fire_date',
       'ia_access', 'fire_fighting_start_date', 'fire_fighting_start_size',
       'bucketing_on_fire', 'distance_from_water_source',
       'first_bucket_drop_date', 'bh_fs_date', 'bh_hectares', 'uc_fs_date',
       'u

In [None]:
df.describe()

Unnamed: 0,fire_year,current_size,fire_location_latitude,fire_location_longitude,discovered_size,assessment_hectares,fire_spread_rate,temperature,relative_humidity,wind_speed,fire_fighting_start_size,distance_from_water_source,bh_hectares,uc_hectares,to_hectares,ex_hectares
count,22914.0,22914.0,22914.0,22914.0,0.0,22914.0,20305.0,20257.0,20255.0,20254.0,16507.0,6365.0,22914.0,22914.0,2444.0,22914.0
mean,2012.820896,155.06068,54.95378,-115.222288,,2.154478,0.869692,17.859303,45.424438,8.763059,5.311316,2.648594,114.266344,168.228295,1.762881,155.06068
std,4.517263,5906.681475,2.642913,2.309475,,60.137154,2.573896,7.539028,18.737953,8.456218,109.736761,21.405949,5986.559818,6978.850959,12.589448,5906.681475
min,2006.0,0.01,48.998195,-119.999983,,0.01,-1.0,-35.0,0.0,0.0,0.01,0.0,0.01,0.01,0.01,0.01
25%,2009.0,0.01,53.075638,-116.791542,,0.01,0.0,14.0,31.0,3.0,0.01,0.5,0.01,0.01,0.01,0.01
50%,2013.0,0.02,55.300575,-115.151046,,0.01,0.0,19.0,40.0,6.0,0.02,1.0,0.01,0.02,0.01,0.02
75%,2016.0,0.3,56.797629,-114.330646,,0.2,1.0,23.0,56.0,12.0,0.3,2.0,0.3,0.3,0.2,0.3
max,2021.0,577646.8,59.99951,-110.000917,,6019.0,100.0,39.9,100.0,90.0,6728.0,700.0,602417.0,707648.0,244.2,577646.8


In [None]:
for col in df.columns:
    print(f"Column: {col}")
    print(df[col].value_counts(dropna=False).sort_values(ascending=False))
    print("\n" + "-"*50 + "\n")

Column: fire_year
2006    1954
2015    1898
2010    1840
2008    1712
2009    1710
2012    1568
2014    1470
2016    1376
2007    1349
2021    1342
2018    1279
2017    1244
2013    1226
2011    1218
2019    1005
2020     723
Name: fire_year, dtype: int64

--------------------------------------------------

Column: fire_number
CWF111    16
CWF003    16
EWF003    16
EWF011    16
EWF012    16
          ..
EWF261     1
GWF194     1
GWF184     1
EWF260     1
MWF174     1
Name: fire_number, Length: 2530, dtype: int64

--------------------------------------------------

Column: fire_name
NaN                               22310
                                    207
                                     28
                                     19
Birch Complex                        17
                                  ...  
Sock Lake                             1
Clear Hills Fire                      1
Buffalo River Fire                    1
Boundry Lake Fire                     1
Three Lakes

In [None]:
def extract_region(fire_number):

    forest_areas = {
        'C': 'Calgary',
        'E': 'Edson',
        'H': 'High Level',
        'G': 'Grande Prairie',
        'L': 'Lac La Biche',
        'M': 'Fort McMurray',
        'P': 'Peace River',
        'R': 'Rocky',
        'S': 'Slave Lake',
        'W': 'Whitecourt'
    }


    region = forest_areas.get(fire_number[0], "Unknown")

    return region

df["fire_region"] = df["fire_number"].apply(extract_region)

In [None]:
df["fire_name"] = df["fire_name"].fillna("Unnamed")

In [None]:
from sklearn.preprocessing import OrdinalEncoder

# Initialize the encoder
encoder = OrdinalEncoder(categories=[['A', 'B', 'C', 'D', 'E']])

# Fit and transform the data
df['size_class_encoded'] = encoder.fit_transform(df[['size_class']]).astype(int)

In [None]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors


# Initialize NearestNeighbors with n_neighbors=1 to find the closest point
nn = NearestNeighbors(n_neighbors=1)

# Separate indices for rows with missing and non-missing 'fire_origin' values
missing_indices = df[df['fire_origin'].isna()].index
non_missing_indices = df[~df['fire_origin'].isna()].index

# Fit the model on non-missing data using 'latitude' and 'longitude'
# Replace 'fire_location_latitude' and 'fire_location_longitude' with your actual column names
nn.fit(df.loc[non_missing_indices, ['fire_location_latitude', 'fire_location_longitude']])

# Find the nearest non-missing 'fire_origin' for each missing entry
distances, indices = nn.kneighbors(df.loc[missing_indices, ['fire_location_latitude', 'fire_location_longitude']])

# Impute missing 'fire_origin' values
# Here, indices are used to map the nearest neighbor from non-missing to missing. We flatten the indices array
# because it's in the shape (n_queries, 1) where n_queries is the number of missing 'fire_origin' rows.
df.loc[missing_indices, 'fire_origin'] = df.iloc[non_missing_indices].iloc[indices.flatten()]['fire_origin'].values

# One-hot encode the 'fire_origin' column and concat it with the original df
one_hot_encoded_df = pd.get_dummies(df['fire_origin'], prefix='fire_origin_encoded')
df = pd.concat([df, one_hot_encoded_df], axis=1)

# Display the modified DataFrame
df


Unnamed: 0,fire_year,fire_number,fire_name,current_size,size_class,fire_location_latitude,fire_location_longitude,fire_origin,general_cause_desc,industry_identifier_desc,...,ex_hectares,fire_region,size_class_encoded,fire_origin_encoded_DND,fire_origin_encoded_Indian Reservation,fire_origin_encoded_Metis Settlement,fire_origin_encoded_National Park,fire_origin_encoded_Private Land,fire_origin_encoded_Provincial Land,fire_origin_encoded_Provincial Park
0,2021,HWF053,Unnamed,5.50,C,59.522139,-119.926971,Provincial Land,Lightning,,...,5.50,High Level,2,0,0,0,0,0,1,0
1,2021,RWF005,Unnamed,0.01,A,52.692262,-116.118960,Provincial Land,Forest Industry,,...,0.01,Rocky,0,0,0,0,0,0,1,0
2,2021,LWF014,Unnamed,0.10,A,55.937050,-110.719950,Indian Reservation,Incendiary,,...,0.10,Lac La Biche,0,0,1,0,0,0,0,0
3,2021,MWF002,Unnamed,0.01,A,56.425933,-111.160750,Indian Reservation,Resident,,...,0.01,Fort McMurray,0,0,1,0,0,0,0,0
4,2021,EWF014,Unnamed,7.13,C,53.693450,-116.058633,Private Land,Resident,,...,7.13,Edson,2,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22909,2006,CWF231,Unnamed,0.10,A,51.161883,-114.869317,Indian Reservation,Resident,,...,0.10,Calgary,0,0,1,0,0,0,0,0
22910,2006,CWF232,Unnamed,0.01,A,51.084150,-115.392633,Provincial Park,Undetermined,,...,0.01,Calgary,0,0,0,0,0,0,0,1
22911,2006,CWF233,Unnamed,0.70,B,51.333900,-114.997667,Provincial Land,Recreation,,...,0.70,Calgary,1,0,0,0,0,0,1,0
22912,2006,CWF234,Unnamed,0.01,A,51.071467,-115.316300,Provincial Land,Resident,,...,0.01,Calgary,0,0,0,0,0,0,1,0


In [None]:
df['general_cause_desc'] = df['general_cause_desc'].replace(['Undetermined', 'Under Investigation'], 'Unknown')
one_hot_encoded_df = pd.get_dummies(df['general_cause_desc'], prefix='cause')
df = pd.concat([df, one_hot_encoded_df], axis=1)
df

Unnamed: 0,fire_year,fire_number,fire_name,current_size,size_class,fire_location_latitude,fire_location_longitude,fire_origin,general_cause_desc,industry_identifier_desc,...,cause_Lightning,cause_Oil & Gas Industry,cause_Other Industry,cause_Power Line Industry,cause_Prescribed Fire,cause_Railroad,cause_Recreation,cause_Resident,cause_Restart,cause_Unknown
0,2021,HWF053,Unnamed,5.50,C,59.522139,-119.926971,Provincial Land,Lightning,,...,1,0,0,0,0,0,0,0,0,0
1,2021,RWF005,Unnamed,0.01,A,52.692262,-116.118960,Provincial Land,Forest Industry,,...,0,0,0,0,0,0,0,0,0,0
2,2021,LWF014,Unnamed,0.10,A,55.937050,-110.719950,Indian Reservation,Incendiary,,...,0,0,0,0,0,0,0,0,0,0
3,2021,MWF002,Unnamed,0.01,A,56.425933,-111.160750,Indian Reservation,Resident,,...,0,0,0,0,0,0,0,1,0,0
4,2021,EWF014,Unnamed,7.13,C,53.693450,-116.058633,Private Land,Resident,,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22909,2006,CWF231,Unnamed,0.10,A,51.161883,-114.869317,Indian Reservation,Resident,,...,0,0,0,0,0,0,0,1,0,0
22910,2006,CWF232,Unnamed,0.01,A,51.084150,-115.392633,Provincial Park,Unknown,,...,0,0,0,0,0,0,0,0,0,1
22911,2006,CWF233,Unnamed,0.70,B,51.333900,-114.997667,Provincial Land,Recreation,,...,0,0,0,0,0,0,1,0,0,0
22912,2006,CWF234,Unnamed,0.01,A,51.071467,-115.316300,Provincial Land,Resident,,...,0,0,0,0,0,0,0,1,0,0


In [None]:
df['industry_identifier_desc'] = df['industry_identifier_desc'].fillna('Inapplicable')
# one_hot_encoded_df = pd.get_dummies(df['industry_identifier_desc'], prefix='industry')
# df = pd.concat([df, one_hot_encoded_df], axis=1)
df

Unnamed: 0,fire_year,fire_number,fire_name,current_size,size_class,fire_location_latitude,fire_location_longitude,fire_origin,general_cause_desc,industry_identifier_desc,...,cause_Lightning,cause_Oil & Gas Industry,cause_Other Industry,cause_Power Line Industry,cause_Prescribed Fire,cause_Railroad,cause_Recreation,cause_Resident,cause_Restart,cause_Unknown
0,2021,HWF053,Unnamed,5.50,C,59.522139,-119.926971,Provincial Land,Lightning,Inapplicable,...,1,0,0,0,0,0,0,0,0,0
1,2021,RWF005,Unnamed,0.01,A,52.692262,-116.118960,Provincial Land,Forest Industry,Inapplicable,...,0,0,0,0,0,0,0,0,0,0
2,2021,LWF014,Unnamed,0.10,A,55.937050,-110.719950,Indian Reservation,Incendiary,Inapplicable,...,0,0,0,0,0,0,0,0,0,0
3,2021,MWF002,Unnamed,0.01,A,56.425933,-111.160750,Indian Reservation,Resident,Inapplicable,...,0,0,0,0,0,0,0,1,0,0
4,2021,EWF014,Unnamed,7.13,C,53.693450,-116.058633,Private Land,Resident,Inapplicable,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22909,2006,CWF231,Unnamed,0.10,A,51.161883,-114.869317,Indian Reservation,Resident,Inapplicable,...,0,0,0,0,0,0,0,1,0,0
22910,2006,CWF232,Unnamed,0.01,A,51.084150,-115.392633,Provincial Park,Unknown,Inapplicable,...,0,0,0,0,0,0,0,0,0,1
22911,2006,CWF233,Unnamed,0.70,B,51.333900,-114.997667,Provincial Land,Recreation,Inapplicable,...,0,0,0,0,0,0,1,0,0,0
22912,2006,CWF234,Unnamed,0.01,A,51.071467,-115.316300,Provincial Land,Resident,Inapplicable,...,0,0,0,0,0,0,0,1,0,0


In [None]:
df['responsible_group_desc'] = df['responsible_group_desc'].fillna('Inapplicable')

# Update 'responsible_group_desc' to 'Inapplicable' where it is 'Others (explain in remarks)'
df.loc[df["responsible_group_desc"] == "Others (explain in remarks)", "responsible_group_desc"] = "Inapplicable"

# Define the threshold
threshold = 30

# Calculate value counts
value_counts = df['responsible_group_desc'].value_counts()

# Find categories to replace
to_replace = value_counts[value_counts < threshold].index

# Replace in DataFrame
df['responsible_group_desc'] = df['responsible_group_desc'].replace(to_replace, 'Other')
# one_hot_encoded_df = pd.get_dummies(df['responsible_group_desc'], prefix='responsible_group')

# # Concatenate with the original DataFrame
# df = pd.concat([df, one_hot_encoded_df], axis=1)
df

Unnamed: 0,fire_year,fire_number,fire_name,current_size,size_class,fire_location_latitude,fire_location_longitude,fire_origin,general_cause_desc,industry_identifier_desc,...,cause_Lightning,cause_Oil & Gas Industry,cause_Other Industry,cause_Power Line Industry,cause_Prescribed Fire,cause_Railroad,cause_Recreation,cause_Resident,cause_Restart,cause_Unknown
0,2021,HWF053,Unnamed,5.50,C,59.522139,-119.926971,Provincial Land,Lightning,Inapplicable,...,1,0,0,0,0,0,0,0,0,0
1,2021,RWF005,Unnamed,0.01,A,52.692262,-116.118960,Provincial Land,Forest Industry,Inapplicable,...,0,0,0,0,0,0,0,0,0,0
2,2021,LWF014,Unnamed,0.10,A,55.937050,-110.719950,Indian Reservation,Incendiary,Inapplicable,...,0,0,0,0,0,0,0,0,0,0
3,2021,MWF002,Unnamed,0.01,A,56.425933,-111.160750,Indian Reservation,Resident,Inapplicable,...,0,0,0,0,0,0,0,1,0,0
4,2021,EWF014,Unnamed,7.13,C,53.693450,-116.058633,Private Land,Resident,Inapplicable,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22909,2006,CWF231,Unnamed,0.10,A,51.161883,-114.869317,Indian Reservation,Resident,Inapplicable,...,0,0,0,0,0,0,0,1,0,0
22910,2006,CWF232,Unnamed,0.01,A,51.084150,-115.392633,Provincial Park,Unknown,Inapplicable,...,0,0,0,0,0,0,0,0,0,1
22911,2006,CWF233,Unnamed,0.70,B,51.333900,-114.997667,Provincial Land,Recreation,Inapplicable,...,0,0,0,0,0,0,1,0,0,0
22912,2006,CWF234,Unnamed,0.01,A,51.071467,-115.316300,Provincial Land,Resident,Inapplicable,...,0,0,0,0,0,0,0,1,0,0


In [None]:
df['activity_class'] = df['activity_class'].fillna('Inapplicable')
activity_mapping = {
    'Cooking': 'Cooking Activities',
    'Cooking and Warming': 'Cooking Activities',
    'Debris Disposal': 'Waste Management',
    'Refuse': 'Waste Management',
    'Refuse Disposal': 'Waste Management',
    'Piles': 'Debris Management',
    'Slash': 'Debris Management',
    'All Terrain Vehicles': 'Recreational Vehicle Use',
    'OHV Operation': 'Recreational Vehicle Use',
    'Vehicle Trans.': 'Transportation',
    'Transportation': 'Transportation',
    'Rail Transportation': 'Transportation',
    'Air Transportation': 'Transportation',
    'Children': 'Misuse by Individuals',
    'Arson': 'Intentional Damage',
    'Cover-up': 'Intentional Damage',
    'Dare': 'Intentional Damage',
    'Grudge': 'Intentional Damage',
    'Illness': 'Intentional Damage',
    'Personal Gain': 'Intentional Damage',
    'Flaring Gas': 'Industrial Activities',
    'Heavy Equipment': 'Industrial Activities',
    'Mechanized Travel': 'Industrial Activities',
    'Operations': 'Industrial Activities',
    'Welders': 'Industrial Activities',
    'Pipelines': 'Industrial Activities',
    'Power Lines': 'Infrastructure',
    'Utility (Water/Gas)': 'Infrastructure',
    'Smoking': 'Public Misconduct',
    'Ammunition': 'Public Misconduct',
    'Traditional Burning': 'Controlled Burning',
    'Prescribed Fire': 'Controlled Burning',
    'Restart': 'Controlled Burning',
    # Grouping low occurrence activities under "Other"
    'Brush': 'Other',
    'Grass': 'Other',
    'Structure Fire': 'Other',
    'Guides & Outfitters': 'Other',
    'Hikers': 'Other',
    'Horseback Riders': 'Other',
    'Mushroom Pickers': 'Other',
    'Picnickers': 'Other',
    'Swimmers': 'Other',
    'Trappers': 'Other',
    'Berry Pickers': 'Other',
    'Fishermen': 'Other',
    'Boat/canoe/kayak': 'Other',
    'Bikers (non-powered)': 'Other',
    'Non-mechanized Travel': 'Other',
    'Windrows': 'Other',
    'Power Saw': 'Other',
    'Refuelling': 'Other',
    'Employees': 'Other',
    'Other Employees': 'Other',
    'Contractors or Agents': 'Other',  # Consider merging with a larger category if relevant
    'Contractors': 'Other',  # Consider merging with a larger category if relevant
    'Construction Workers': 'Other',  # Consider merging with a larger category if relevant
    'Employment': 'Other',
    # Any other low-frequency categories not explicitly listed
}
# Replace NaN values with 'Inapplicable'
df['activity_class'] = df['activity_class'].fillna('Inapplicable')

# Apply the comprehensive mapping
df['activity_class_grouped'] = df['activity_class'].apply(lambda x: activity_mapping.get(x, x))

# one_hot_encoded_df = pd.get_dummies(df['activity_class_grouped'], prefix='activity')
# df = pd.concat([df, one_hot_encoded_df], axis=1)
df

Unnamed: 0,fire_year,fire_number,fire_name,current_size,size_class,fire_location_latitude,fire_location_longitude,fire_origin,general_cause_desc,industry_identifier_desc,...,cause_Oil & Gas Industry,cause_Other Industry,cause_Power Line Industry,cause_Prescribed Fire,cause_Railroad,cause_Recreation,cause_Resident,cause_Restart,cause_Unknown,activity_class_grouped
0,2021,HWF053,Unnamed,5.50,C,59.522139,-119.926971,Provincial Land,Lightning,Inapplicable,...,0,0,0,0,0,0,0,0,0,Inapplicable
1,2021,RWF005,Unnamed,0.01,A,52.692262,-116.118960,Provincial Land,Forest Industry,Inapplicable,...,0,0,0,0,0,0,0,0,0,Waste Management
2,2021,LWF014,Unnamed,0.10,A,55.937050,-110.719950,Indian Reservation,Incendiary,Inapplicable,...,0,0,0,0,0,0,0,0,0,Intentional Damage
3,2021,MWF002,Unnamed,0.01,A,56.425933,-111.160750,Indian Reservation,Resident,Inapplicable,...,0,0,0,0,0,0,1,0,0,Cooking Activities
4,2021,EWF014,Unnamed,7.13,C,53.693450,-116.058633,Private Land,Resident,Inapplicable,...,0,0,0,0,0,0,1,0,0,Waste Management
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22909,2006,CWF231,Unnamed,0.10,A,51.161883,-114.869317,Indian Reservation,Resident,Inapplicable,...,0,0,0,0,0,0,1,0,0,Misuse by Individuals
22910,2006,CWF232,Unnamed,0.01,A,51.084150,-115.392633,Provincial Park,Unknown,Inapplicable,...,0,0,0,0,0,0,0,0,1,Inapplicable
22911,2006,CWF233,Unnamed,0.70,B,51.333900,-114.997667,Provincial Land,Recreation,Inapplicable,...,0,0,0,0,0,1,0,0,0,Cooking Activities
22912,2006,CWF234,Unnamed,0.01,A,51.071467,-115.316300,Provincial Land,Resident,Inapplicable,...,0,0,0,0,0,0,1,0,0,Unclassified


In [None]:
true_cause_mapping = {
    'Unsafe Fire': 'Unsafe or Uncontrolled Fire',
    'Abandoned Fire': 'Unsafe or Uncontrolled Fire',
    'Unattended Fire': 'Unsafe or Uncontrolled Fire',
    'High Hazard': 'Unsafe or Uncontrolled Fire',
    'Insufficient Buffer': 'Unsafe or Uncontrolled Fire',
    'Insufficient Resources': 'Unsafe or Uncontrolled Fire',

    'Burning Substance': 'External Heat Sources',
    'Friction Spark': 'External Heat Sources',
    'Hot Exhaust': 'External Heat Sources',
    'Flammable Fluids': 'External Heat Sources',

    'Arson Suspected': 'Intentional',
    'Arson Known': 'Intentional',
    'Incendiary Device': 'Intentional',

    'Permit Related': 'Regulatory and Compliance Issues',
    'Mechanical Failure': 'Technical or Mechanical Failure',
    'Vehicle Fire': 'Vehicle-Related Incidents',

    'Line Impact': 'Electrical and Infrastructure Issues',
    'Winter Burning': 'Seasonal-Related Incidents',
    'Unpredictable Event': 'Natural or Unpredictable Causes',
    'Unclassified': 'Other or Unclassified Causes',
    'Animals': 'Animal Related Incidents'
}


# Replace NaN values with 'Inapplicable'
df['true_cause'] = df['true_cause'].fillna('Inapplicable')

# Apply the mapping to create a new grouped cause column
df['true_cause_grouped'] = df['true_cause'].apply(lambda x: true_cause_mapping.get(x, x))

# Apply one-hot encoding
# true_cause_dummies = pd.get_dummies(df['true_cause_grouped'], prefix='true_cause')

# # Concatenate with the original DataFrame
# df = pd.concat([df, true_cause_dummies], axis=1)
df

Unnamed: 0,fire_year,fire_number,fire_name,current_size,size_class,fire_location_latitude,fire_location_longitude,fire_origin,general_cause_desc,industry_identifier_desc,...,cause_Other Industry,cause_Power Line Industry,cause_Prescribed Fire,cause_Railroad,cause_Recreation,cause_Resident,cause_Restart,cause_Unknown,activity_class_grouped,true_cause_grouped
0,2021,HWF053,Unnamed,5.50,C,59.522139,-119.926971,Provincial Land,Lightning,Inapplicable,...,0,0,0,0,0,0,0,0,Inapplicable,Inapplicable
1,2021,RWF005,Unnamed,0.01,A,52.692262,-116.118960,Provincial Land,Forest Industry,Inapplicable,...,0,0,0,0,0,0,0,0,Waste Management,Seasonal-Related Incidents
2,2021,LWF014,Unnamed,0.10,A,55.937050,-110.719950,Indian Reservation,Incendiary,Inapplicable,...,0,0,0,0,0,0,0,0,Intentional Damage,Inapplicable
3,2021,MWF002,Unnamed,0.01,A,56.425933,-111.160750,Indian Reservation,Resident,Inapplicable,...,0,0,0,0,0,1,0,0,Cooking Activities,Unsafe or Uncontrolled Fire
4,2021,EWF014,Unnamed,7.13,C,53.693450,-116.058633,Private Land,Resident,Inapplicable,...,0,0,0,0,0,1,0,0,Waste Management,Unsafe or Uncontrolled Fire
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22909,2006,CWF231,Unnamed,0.10,A,51.161883,-114.869317,Indian Reservation,Resident,Inapplicable,...,0,0,0,0,0,1,0,0,Misuse by Individuals,External Heat Sources
22910,2006,CWF232,Unnamed,0.01,A,51.084150,-115.392633,Provincial Park,Unknown,Inapplicable,...,0,0,0,0,0,0,0,1,Inapplicable,Inapplicable
22911,2006,CWF233,Unnamed,0.70,B,51.333900,-114.997667,Provincial Land,Recreation,Inapplicable,...,0,0,0,0,1,0,0,0,Cooking Activities,Unsafe or Uncontrolled Fire
22912,2006,CWF234,Unnamed,0.01,A,51.071467,-115.316300,Provincial Land,Resident,Inapplicable,...,0,0,0,0,0,1,0,0,Unclassified,External Heat Sources


In [None]:
df['fire_start_date'].fillna(method='bfill', inplace=True)
df['fire_start_date'].fillna(method='ffill', inplace=True)

df["fire_start_date"]

0        2010-06-28 09:07:00
1        2020-03-25 08:00:00
2        2020-04-16 16:20:00
3        2020-04-16 17:00:00
4        2020-11-01 10:00:00
                ...         
22909    2007-03-11 17:10:00
22910    2007-03-13 10:00:00
22911    2007-03-25 16:00:00
22912    2007-03-28 03:55:00
22913    2007-03-28 03:55:00
Name: fire_start_date, Length: 22914, dtype: object

In [None]:
mode_det_agent = df['det_agent'].mode()[0]  # Calculate the mode
df['det_agent'].fillna(mode_det_agent, inplace=True)  # Fill NaN values with the mode

In [None]:
mode_det_agent_type = df['det_agent_type'].mode()[0]  # Calculate the mode
df['det_agent_type'].fillna(mode_det_agent_type, inplace=True)  # Fill NaN values with the mode

In [None]:
df.drop(["discovered_size"], axis=1)

Unnamed: 0,fire_year,fire_number,fire_name,current_size,size_class,fire_location_latitude,fire_location_longitude,fire_origin,general_cause_desc,industry_identifier_desc,...,cause_Other Industry,cause_Power Line Industry,cause_Prescribed Fire,cause_Railroad,cause_Recreation,cause_Resident,cause_Restart,cause_Unknown,activity_class_grouped,true_cause_grouped
0,2021,HWF053,Unnamed,5.50,C,59.522139,-119.926971,Provincial Land,Lightning,Inapplicable,...,0,0,0,0,0,0,0,0,Inapplicable,Inapplicable
1,2021,RWF005,Unnamed,0.01,A,52.692262,-116.118960,Provincial Land,Forest Industry,Inapplicable,...,0,0,0,0,0,0,0,0,Waste Management,Seasonal-Related Incidents
2,2021,LWF014,Unnamed,0.10,A,55.937050,-110.719950,Indian Reservation,Incendiary,Inapplicable,...,0,0,0,0,0,0,0,0,Intentional Damage,Inapplicable
3,2021,MWF002,Unnamed,0.01,A,56.425933,-111.160750,Indian Reservation,Resident,Inapplicable,...,0,0,0,0,0,1,0,0,Cooking Activities,Unsafe or Uncontrolled Fire
4,2021,EWF014,Unnamed,7.13,C,53.693450,-116.058633,Private Land,Resident,Inapplicable,...,0,0,0,0,0,1,0,0,Waste Management,Unsafe or Uncontrolled Fire
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22909,2006,CWF231,Unnamed,0.10,A,51.161883,-114.869317,Indian Reservation,Resident,Inapplicable,...,0,0,0,0,0,1,0,0,Misuse by Individuals,External Heat Sources
22910,2006,CWF232,Unnamed,0.01,A,51.084150,-115.392633,Provincial Park,Unknown,Inapplicable,...,0,0,0,0,0,0,0,1,Inapplicable,Inapplicable
22911,2006,CWF233,Unnamed,0.70,B,51.333900,-114.997667,Provincial Land,Recreation,Inapplicable,...,0,0,0,0,1,0,0,0,Cooking Activities,Unsafe or Uncontrolled Fire
22912,2006,CWF234,Unnamed,0.01,A,51.071467,-115.316300,Provincial Land,Resident,Inapplicable,...,0,0,0,0,0,1,0,0,Unclassified,External Heat Sources


In [None]:
choices = ['HAC', 'FPD Staff']

# Generate random indices for NaN values
nan_indices = df.index[df['dispatched_resource'].isna()]

# Fill NaN values with a 50/50 chance of selecting 'HAC' or 'FPD Staff'
for idx in nan_indices:
    df.at[idx, 'dispatched_resource'] = np.random.choice(choices)

In [None]:
fire_spread_rate_mean = df['fire_spread_rate'].mean()
df['fire_spread_rate'].fillna(fire_spread_rate_mean, inplace=True)

In [None]:
df['fire_type'].fillna('Unknown', inplace=True)
df.loc[df['fire_type'].str.strip().eq(''), 'fire_type'] = 'Unknown'
df.loc[df['fire_type'].str.strip() == 'Surface', 'fire_type'] = 'Surface'

one_hot_encoded_df = pd.get_dummies(df['fire_type'], prefix='fire_type')
df_with_encoding = pd.concat([df, one_hot_encoded_df], axis=1)

In [None]:
df['fire_position_on_slope'] = df['fire_position_on_slope'].fillna('Unkown')
one_hot_encoded_df = pd.get_dummies(df['fire_position_on_slope'], prefix='slope')
df_with_encoding = pd.concat([df, one_hot_encoded_df], axis=1)


In [None]:
df['weather_conditions_over_fire'].unique()

array(['Clear', 'Cloudy', 'CB Dry', 'Rainshowers', 'CB Wet', nan],
      dtype=object)

In [None]:
df["weather_conditions_over_fire"].fillna("Unknown", inplace=True)
weather_mapping = {
    'Unknown': 0,
    'Clear': 1,
    'CB Dry': 2,
    'Cloudy': 3,
    'CB Wet': 4,
    'Rainshowers': 5
}

df["weather_conditions_encoded"] = df['weather_conditions_over_fire'].map(weather_mapping)

In [None]:
df["weather_conditions_encoded"].unique()

array([1, 3, 2, 5, 4, 0])

In [None]:
temperature_median = df['temperature'].median()
df['temperature'].fillna(temperature_median, inplace=True)

humidity_median = df['relative_humidity'].median()
df['relative_humidity'].fillna(humidity_median, inplace=True)

In [None]:
df['wind_direction'] = df['wind_direction'].replace({' S': 'S', ' NW': 'NW'})

# Set NaN values in the "wind_direction" column to "Unknown" (as "U")
df['wind_direction'].fillna('Unknown', inplace=True)
df = pd.get_dummies(df, columns=['wind_direction'], prefix='wind_dir', dummy_na=True)

In [None]:
wind_speed_median = df['wind_speed'].median()
df['wind_speed'].fillna(wind_speed_median, inplace=True)

In [None]:
df["fuel_type"].fillna("Unknown", inplace=True)

In [None]:
initial_action_by_mode = df['initial_action_by'].mode()[0]
df['initial_action_by'].fillna(initial_action_by_mode, inplace=True)

In [None]:
df['ia_access'].fillna('Unknown', inplace=True)

In [None]:
fire_starting_start_size_median = df['fire_fighting_start_size'].median()
df['fire_fighting_start_size'].fillna(fire_starting_start_size_median, inplace=True)



In [None]:
df["bucketing_on_fire"].fillna("N", inplace=True)
df["bucketing_on_fire"].replace({"N": 0, "Y": 1, "y": 1}, inplace=True)

In [None]:
df['distance_from_water_source'].fillna(np.inf, inplace=True)

In [None]:
df.drop(["to_fs_date", "to_hectares"], axis=1, inplace=True)

In [None]:

min_reasonable_date = pd.Timestamp('2000-01-01')
max_reasonable_date = pd.Timestamp('2023-12-31')


date_features = ['fire_start_date', 'discovered_date', 'reported_date', 'dispatch_date','start_for_fire_date', 'assessment_datetime', 'ia_arrival_at_fire_date', 'fire_fighting_start_date', 'first_bucket_drop_date', 'ex_fs_date']

for feature in date_features:
    df[feature] = pd.to_datetime(df[feature], errors='coerce')
    df.loc[(df[feature] < min_reasonable_date) | (df[feature] > max_reasonable_date), feature] = pd.NaT


def generate_time_features(df):

    def timedelta_to_minutes(td):
        return td.total_seconds() / 60 if pd.notna(td) else np.nan


    def compute_time_difference(start, end):
        if pd.isna(start) or pd.isna(end):
            return np.nan
        return timedelta_to_minutes(end - start)

    df['time_to_discovery'] = df.apply(lambda row: compute_time_difference(row['fire_start_date'], row['discovered_date']), axis=1)
    df['time_to_report'] = df.apply(lambda row:


 compute_time_difference(row['fire_start_date'], row['reported_date']), axis=1)
    df['time_to_dispatch'] = df.apply(lambda row: compute_time_difference(row['fire_start_date'], row['dispatch_date']), axis=1)
    df['time_to_start_for_fire'] = df.apply(lambda row: compute_time_difference(row['fire_start_date'], row['start_for_fire_date']), axis=1)
    df['time_to_assessment'] = df.apply(lambda row: compute_time_difference(row['fire_start_date'], row['assessment_datetime']), axis=1)
    df['time_to_ia_arrival'] = df.apply(lambda row: compute_time_difference(row['fire_start_date'], row['ia_arrival_at_fire_date']), axis=1)
    df['time_to_start_fighting'] = df.apply(lambda row: compute_time_difference(row['fire_start_date'], row['fire_fighting_start_date']), axis=1)
    df['time_to_first_bucket_drop'] = df.apply(lambda row: compute_time_difference(row['fire_start_date'], row['first_bucket_drop_date']), axis=1)
    df['total_time_to_extinguish'] = df.apply(lambda row: compute_time_difference(row['fire_start_date'], row['ex_fs_date']), axis=1)

    return df

df = generate_time_features(df)

# List of columns for which missing values should be imputed with the median
columns_to_impute = ['time_to_discovery', 'time_to_report', 'time_to_dispatch',
                     'time_to_start_for_fire', 'time_to_assessment', 'time_to_ia_arrival',
                     'time_to_start_fighting', 'time_to_first_bucket_drop',
                     'total_time_to_extinguish']

# Impute missing values with the median for specified columns
df[columns_to_impute] = df[columns_to_impute].fillna(df[columns_to_impute].median())

In [None]:
df.columns[df.isnull().any()].tolist()

['fire_start_date',
 'discovered_date',
 'discovered_size',
 'reported_date',
 'dispatch_date',
 'start_for_fire_date',
 'ia_arrival_at_fire_date',
 'fire_fighting_start_date',
 'first_bucket_drop_date']

In [None]:
df[df['fire_start_date'].isna()]

Unnamed: 0,fire_year,fire_number,fire_name,current_size,size_class,fire_location_latitude,fire_location_longitude,fire_origin,general_cause_desc,industry_identifier_desc,...,wind_dir_nan,time_to_discovery,time_to_report,time_to_dispatch,time_to_start_for_fire,time_to_assessment,time_to_ia_arrival,time_to_start_fighting,time_to_first_bucket_drop,total_time_to_extinguish
1291,2021,LWF065,Unnamed,0.1,A,54.567167,-111.1981,Provincial Land,Incendiary,Inapplicable,...,0,60.0,60.0,85.0,91.0,130.0,153.0,175.0,137.0,1142.5
14316,2011,MWF047,Unnamed,1.6,B,58.952257,-111.030739,Provincial Land,Lightning,Inapplicable,...,0,60.0,60.0,85.0,91.0,130.0,153.0,175.0,137.0,1142.5


In [None]:
# If fire_start_date is not in datetime format, first convert it:
df['fire_start_date'] = pd.to_datetime(df['fire_start_date'])

# Extract the month and day of the week from fire_start_date
df['fire_start_month'] = df['fire_start_date'].dt.month
df['fire_start_weekday'] = df['fire_start_date'].dt.weekday  # Monday=0, Sunday=6

# Optionally, create a feature for the part of the day
# Define a function to categorize parts of the day
def part_of_day(hour):
    if 5 <= hour < 12:
        return 'Morning'
    elif 12 <= hour < 17:
        return 'Afternoon'
    elif 17 <= hour < 21:
        return 'Evening'
    else:
        return 'Night'

# Apply the function to the hour of the fire_start_date
df['fire_start_part_of_day'] = df['fire_start_date'].dt.hour.apply(part_of_day)

# Quick check on the new features
print(df[['fire_start_month', 'fire_start_weekday', 'fire_start_part_of_day']].head())

   fire_start_month  fire_start_weekday fire_start_part_of_day
0               6.0                 0.0                Morning
1               3.0                 2.0                Morning
2               4.0                 3.0              Afternoon
3               4.0                 3.0                Evening
4              11.0                 6.0                Morning


In [None]:
mode_fire_start_weekday = df['fire_start_weekday'].mode()[0]  # Calculate the mode
df['fire_start_weekday'].fillna(mode_fire_start_weekday, inplace=True)  # Fill NaN values with the mode
df['fire_start_weekday'] = df['fire_start_weekday'].astype(int)

In [None]:
mode_fire_start_month = df['fire_start_month'].mode()[0]  # Calculate the mode
df['fire_start_month'].fillna(mode_fire_start_month, inplace=True)  # Fill NaN values with the mode
df['fire_start_month'] = df['fire_start_month'].astype(int)

In [None]:
sorted(df['fire_start_part_of_day'].unique().tolist())

['Afternoon', 'Evening', 'Morning', 'Night']

In [None]:
df.columns[df.isnull().any()].tolist()

['fire_start_date',
 'discovered_date',
 'discovered_size',
 'reported_date',
 'dispatch_date',
 'start_for_fire_date',
 'ia_arrival_at_fire_date',
 'fire_fighting_start_date',
 'first_bucket_drop_date']

In [None]:
df.isnull().sum()

fire_year                    0
fire_number                  0
fire_name                    0
current_size                 0
size_class                   0
                            ..
time_to_first_bucket_drop    0
total_time_to_extinguish     0
fire_start_month             0
fire_start_weekday           0
fire_start_part_of_day       0
Length: 96, dtype: int64

In [None]:
for col in df.columns:
    print(f"Column: {col}")
    print(df[col].value_counts(dropna=False).sort_values(ascending=False))
    print("\n" + "-"*50 + "\n")

Column: fire_year
2006    1954
2015    1898
2010    1840
2008    1712
2009    1710
2012    1568
2014    1470
2016    1376
2007    1349
2021    1342
2018    1279
2017    1244
2013    1226
2011    1218
2019    1005
2020     723
Name: fire_year, dtype: int64

--------------------------------------------------

Column: fire_number
CWF111    16
CWF003    16
EWF003    16
EWF011    16
EWF012    16
          ..
EWF261     1
GWF194     1
GWF184     1
EWF260     1
MWF174     1
Name: fire_number, Length: 2530, dtype: int64

--------------------------------------------------

Column: fire_name
Unnamed                           22310
                                    207
                                     28
                                     19
Birch Complex                        17
                                  ...  
Sock Lake                             1
Clear Hills Fire                      1
Buffalo River Fire                    1
Boundry Lake Fire                     1
Three Lakes

In [None]:
df

Unnamed: 0,fire_year,fire_number,fire_name,current_size,size_class,fire_location_latitude,fire_location_longitude,fire_origin,general_cause_desc,industry_identifier_desc,...,time_to_dispatch,time_to_start_for_fire,time_to_assessment,time_to_ia_arrival,time_to_start_fighting,time_to_first_bucket_drop,total_time_to_extinguish,fire_start_month,fire_start_weekday,fire_start_part_of_day
0,2021,HWF053,Unnamed,5.50,C,59.522139,-119.926971,Provincial Land,Lightning,Inapplicable,...,5786377.0,5786377.0,5786377.0,5786376.0,5787766.0,137.0,5790517.0,6,0,Morning
1,2021,RWF005,Unnamed,0.01,A,52.692262,-116.118960,Provincial Land,Forest Industry,Inapplicable,...,533191.0,533192.0,533190.0,153.0,175.0,137.0,547230.0,3,2,Morning
2,2021,LWF014,Unnamed,0.10,A,55.937050,-110.719950,Indian Reservation,Incendiary,Inapplicable,...,525613.0,525614.0,525630.0,525628.0,525630.0,137.0,526915.0,4,3,Afternoon
3,2021,MWF002,Unnamed,0.01,A,56.425933,-111.160750,Indian Reservation,Resident,Inapplicable,...,526880.0,526890.0,526940.0,153.0,175.0,137.0,526944.0,4,3,Evening
4,2021,EWF014,Unnamed,7.13,C,53.693450,-116.058633,Private Land,Resident,Inapplicable,...,217751.0,217768.0,217829.0,153.0,175.0,137.0,233550.0,11,6,Morning
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22909,2006,CWF231,Unnamed,0.10,A,51.161883,-114.869317,Indian Reservation,Resident,Inapplicable,...,0.0,0.0,21.0,20.0,25.0,137.0,50.0,3,6,Evening
22910,2006,CWF232,Unnamed,0.01,A,51.084150,-115.392633,Provincial Park,Unknown,Inapplicable,...,8535.0,8550.0,8665.0,153.0,175.0,137.0,8680.0,3,1,Morning
22911,2006,CWF233,Unnamed,0.70,B,51.333900,-114.997667,Provincial Land,Recreation,Inapplicable,...,240.0,255.0,360.0,360.0,360.0,137.0,1200.0,3,6,Afternoon
22912,2006,CWF234,Unnamed,0.01,A,51.071467,-115.316300,Provincial Land,Resident,Inapplicable,...,335.0,350.0,425.0,153.0,175.0,137.0,455.0,3,2,Night


In [None]:
visualization_columns = [
    'fire_year', "fire_name", 'fire_start_date', 'discovered_date', 'reported_date',
    'dispatch_date', 'start_for_fire_date', 'assessment_datetime',
    'first_bucket_drop_date', 'ia_arrival_at_fire_date', 'fire_fighting_start_date',
    'bh_fs_date', 'uc_fs_date', 'ex_fs_date', 'fire_start_month', 'fire_start_weekday',
    'fire_start_part_of_day', 'fire_location_latitude', 'fire_location_longitude',
    'fire_region', 'current_size', 'size_class', 'fire_type',
    'fire_position_on_slope', 'fuel_type', 'general_cause_desc', 'true_cause',
    'det_agent', 'det_agent_type', 'industry_identifier_desc', 'responsible_group_desc',
    'activity_class', 'weather_conditions_over_fire', 'temperature',
    'relative_humidity', 'wind_speed', 'fire_spread_rate', 'initial_action_by',
    'dispatched_resource', 'assessment_resource', 'ia_access', 'bucketing_on_fire',
    'distance_from_water_source', 'assessment_hectares', 'bh_hectares',
    'uc_hectares', 'ex_hectares', 'time_to_discovery', 'time_to_report',
    'time_to_dispatch', 'time_to_start_for_fire', 'time_to_assessment',
    'time_to_ia_arrival', 'time_to_start_fighting', 'time_to_first_bucket_drop',
    'total_time_to_extinguish'
]

df_visualization = df[visualization_columns]

# Save the filtered dataframe to a CSV file for the visualization team
df_visualization.to_csv('wildfire_data_for_visualization.csv', index=False)

In [None]:
ai_modeling_columns = [
    'fire_year',
    'size_class_encoded', 'fire_location_latitude', 'fire_location_longitude',
    'fire_region', 'fire_start_month', 'fire_start_weekday',
    'fire_start_part_of_day', 'fire_type',
    'fuel_type', 'fire_origin_encoded_DND',
    'fire_origin_encoded_Indian Reservation', 'fire_origin_encoded_Metis Settlement',
    'fire_origin_encoded_National Park', 'fire_origin_encoded_Private Land',
    'fire_origin_encoded_Provincial Land', 'fire_origin_encoded_Provincial Park',
    'cause_Agriculture Industry', 'cause_Forest Industry', 'cause_Government',
    'cause_Incendiary', 'cause_Lightning', 'cause_Oil & Gas Industry',
    'cause_Other Industry', 'cause_Power Line Industry', 'cause_Prescribed Fire',
    'cause_Railroad', 'cause_Recreation', 'cause_Resident', 'cause_Restart',
    'cause_Unknown', 'activity_class_grouped', 'true_cause_grouped', 'det_agent',
    'det_agent_type', 'initial_action_by', 'dispatched_resource', 'assessment_resource',
    'ia_access', 'time_to_discovery', 'time_to_report', 'time_to_dispatch',
    'time_to_start_for_fire', 'time_to_assessment', 'time_to_ia_arrival',
    'time_to_start_fighting', 'time_to_first_bucket_drop', 'total_time_to_extinguish',
    'weather_conditions_encoded', 'wind_dir_CLM', 'wind_dir_E', 'wind_dir_N',
    'wind_dir_NE', 'wind_dir_NW', 'wind_dir_S', 'wind_dir_SE', 'wind_dir_SW',
    'wind_dir_Unknown', 'wind_dir_W', 'temperature', 'relative_humidity',
    'wind_speed', 'fire_spread_rate', 'bucketing_on_fire', 'distance_from_water_source',
    'assessment_hectares', 'bh_hectares', 'uc_hectares', 'ex_hectares'
]

df_ai_modeling = df[ai_modeling_columns]

# Save the filtered dataframe to a CSV file for the AI model creation team
df_ai_modeling.to_csv('wildfire_data_for_ai_modeling.csv', index=False)

In [None]:
df_ai_modeling.shape

(22914, 69)

In [None]:
df_ai_modeling.describe()

  diff_b_a = subtract(b, a)


Unnamed: 0,fire_year,size_class_encoded,fire_location_latitude,fire_location_longitude,fire_start_month,fire_start_weekday,fire_origin_encoded_DND,fire_origin_encoded_Indian Reservation,fire_origin_encoded_Metis Settlement,fire_origin_encoded_National Park,...,temperature,relative_humidity,wind_speed,fire_spread_rate,bucketing_on_fire,distance_from_water_source,assessment_hectares,bh_hectares,uc_hectares,ex_hectares
count,22914.0,22914.0,22914.0,22914.0,22914.0,22914.0,22914.0,22914.0,22914.0,22914.0,...,22914.0,22914.0,22914.0,22914.0,22914.0,22914.0,22914.0,22914.0,22914.0,22914.0
mean,2012.820896,0.443004,54.95378,-115.222288,6.26198,2.970106,0.004233,0.125818,0.006066,8.7e-05,...,17.991573,44.794973,8.442306,0.869692,0.277865,inf,2.154478,114.266344,168.228295,155.06068
std,4.517263,0.767188,2.642913,2.309475,1.862468,2.091631,0.064927,0.331652,0.077651,0.009342,...,7.097851,17.702647,7.999351,2.42293,0.447956,,60.137154,5986.559818,6978.850959,5906.681475
min,2006.0,0.0,48.998195,-119.999983,1.0,0.0,0.0,0.0,0.0,0.0,...,-35.0,0.0,0.0,-1.0,0.0,0.0,0.01,0.01,0.01,0.01
25%,2009.0,0.0,53.075638,-116.791542,5.0,1.0,0.0,0.0,0.0,0.0,...,15.0,33.25,4.0,0.0,0.0,4.0,0.01,0.01,0.01,0.01
50%,2013.0,0.0,55.300575,-115.151046,6.0,3.0,0.0,0.0,0.0,0.0,...,19.0,40.0,6.0,0.1,0.0,,0.01,0.01,0.02,0.02
75%,2016.0,1.0,56.797629,-114.330646,7.0,5.0,0.0,0.0,0.0,0.0,...,23.0,54.0,10.0,1.0,1.0,,0.2,0.3,0.3,0.3
max,2021.0,4.0,59.99951,-110.000917,12.0,6.0,1.0,1.0,1.0,1.0,...,39.9,100.0,90.0,100.0,1.0,inf,6019.0,602417.0,707648.0,577646.8


In [None]:
df_ai_modeling.columns

Index(['fire_year', 'size_class_encoded', 'fire_location_latitude',
       'fire_location_longitude', 'fire_region', 'fire_start_month',
       'fire_start_weekday', 'fire_start_part_of_day', 'fire_type',
       'fuel_type', 'fire_origin_encoded_DND',
       'fire_origin_encoded_Indian Reservation',
       'fire_origin_encoded_Metis Settlement',
       'fire_origin_encoded_National Park', 'fire_origin_encoded_Private Land',
       'fire_origin_encoded_Provincial Land',
       'fire_origin_encoded_Provincial Park', 'cause_Agriculture Industry',
       'cause_Forest Industry', 'cause_Government', 'cause_Incendiary',
       'cause_Lightning', 'cause_Oil & Gas Industry', 'cause_Other Industry',
       'cause_Power Line Industry', 'cause_Prescribed Fire', 'cause_Railroad',
       'cause_Recreation', 'cause_Resident', 'cause_Restart', 'cause_Unknown',
       'activity_class_grouped', 'true_cause_grouped', 'det_agent',
       'det_agent_type', 'initial_action_by', 'dispatched_resource',
     