In [None]:
Provisional machine learning model looking the accuracy of predicting forest fires in Alberta, CA
Segment 1 Deliverables 

In [142]:
# Import dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import pandas as pd
import tensorflow as tf

In [143]:
# Read CSVs (using raw files from https://wildfire.alberta.ca/resources/historical-data/historical-wildfire-database.aspx until ERD is set up)
fire_df = pd.concat(map(pd.read_csv, ["fires_2006to2018.csv", "fires_1996to2005.csv"]))
fire_df


Unnamed: 0,fire_number,fire_name,fire_year,calendar_year,assessment_datetime,assessment_hectares,current_size,size_class,fire_location_latitude,fire_location_longitude,...,fuel_type,other_fuel_type,bh_fs_date,bh_hectares,uc_fs_date,uc_hectares,to_fs_date,to_hectares,ex_fs_date,ex_hectares
0,CWF001,,2006,2006,2006-04-02 16:00:00,0.20,0.20,B,51.152933,-115.034600,...,O1b,,2006-04-02 16:00:00,0.20,2006-04-02 16:00:00,0.20,,,2006-04-03 18:00:00,0.20
1,CWF002,,2006,2006,2006-04-03 16:45:00,0.01,0.01,A,51.157633,-115.002133,...,O1b,,2006-04-03 16:45:00,0.01,2006-04-03 16:45:00,0.01,,,2006-04-03 16:50:00,0.01
2,CWF003,,2006,2006,2006-04-08 20:05:00,0.01,0.01,A,51.194400,-114.516167,...,,Campfire,2006-04-08 20:05:00,0.01,2006-04-08 20:05:00,0.01,,,2006-04-09 20:30:00,0.01
3,CWF004,,2006,2006,2006-04-13 18:20:00,0.75,0.75,B,51.125617,-114.841683,...,O1a,,2006-04-13 18:20:00,0.75,2006-04-13 18:20:00,0.75,,,2006-04-13 20:00:00,0.75
4,CWF005,,2006,2006,2006-04-14 17:25:00,0.01,0.01,A,50.409833,-114.478967,...,O1a,,2006-04-14 17:25:00,0.01,2006-04-14 17:25:00,0.01,,,2006-04-14 17:40:00,0.01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11347,WWF051,,2005,2005,2005-11-09 15:06:00,0.01,0.01,A,54.230383,-115.749217,...,M1,,2005-11-09 15:06:00,0.01,2005-11-09 16:00:00,0.01,,,2005-11-10 11:00:00,0.01
11348,WWF052,,2005,2005,2005-11-22 13:00:00,0.10,0.60,B,54.626694,-115.371483,...,C3,,2005-11-22 15:30:00,0.20,2005-11-22 18:00:00,0.60,2005-11-24 14:00:00,0.60,2005-12-21 10:51:00,0.60
11349,WWF053,,2005,2005,2005-11-24 14:00:00,0.01,0.01,A,54.539409,-115.119440,...,C2,,2005-11-24 14:01:00,0.01,2005-11-24 14:01:00,0.01,,,2005-11-24 14:02:00,0.01
11350,WWF054,,2005,2006,2006-02-13 11:00:00,0.50,0.50,B,54.757621,-115.383929,...,S1,,2006-02-13 11:00:00,0.50,2006-02-15 12:45:00,0.50,2006-02-13 16:00:00,0.50,2006-02-15 13:00:00,0.50


In [145]:
# Clean data by removing columns containing less useful data 
clean_fire_df = fire_df.drop(columns=["fire_number", "assessment_hectares","calendar_year", "fire_name", "fire_year", "to_fs_date", "to_hectares", "ex_fs_date", "ex_hectares", "industry_identifier_desc", "initial_action_by", "current_size", "size_class", "fire_origin"])
clean_fire_df

Unnamed: 0,assessment_datetime,fire_location_latitude,fire_location_longitude,general_cause_desc,responsible_group_desc,activity_class,true_cause,permit_detail_desc,fire_start_date,det_agent_type,...,fire_fighting_start_size,fire_type,fire_position_on_slope,weather_conditions_over_fire,fuel_type,other_fuel_type,bh_fs_date,bh_hectares,uc_fs_date,uc_hectares
0,2006-04-02 16:00:00,51.152933,-115.034600,Resident,Resident,Unclassified,Unsafe Fire,,2006-04-02 14:25:00,UNP,...,,Surface,Flat,Clear,O1b,,2006-04-02 16:00:00,0.20,2006-04-02 16:00:00,0.20
1,2006-04-03 16:45:00,51.157633,-115.002133,Undetermined,,,,,2006-04-03 15:30:00,UNP,...,,Surface,Flat,Cloudy,O1b,,2006-04-03 16:45:00,0.01,2006-04-03 16:45:00,0.01
2,2006-04-08 20:05:00,51.194400,-114.516167,Recreation,Hikers,Cooking,Abandoned Fire,,2006-04-08 19:00:00,UNP,...,0.01,Ground,Bottom,Cloudy,,Campfire,2006-04-08 20:05:00,0.01,2006-04-08 20:05:00,0.01
3,2006-04-13 18:20:00,51.125617,-114.841683,Resident,Resident,Smoking,Burning Substance,,2006-04-13 17:33:00,UNP,...,,Surface,Bottom,Cloudy,O1a,,2006-04-13 18:20:00,0.75,2006-04-13 18:20:00,0.75
4,2006-04-14 17:25:00,50.409833,-114.478967,Resident,Resident,Cooking,Unsafe Fire,,2006-04-14 15:17:00,UNP,...,,Surface,Flat,Cloudy,O1a,,2006-04-14 17:25:00,0.01,2006-04-14 17:25:00,0.01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11347,2005-11-09 15:06:00,54.230383,-115.749217,Recreation,Hunters,Cooking,Abandoned Fire,,2005-11-05 18:00:00,UNP,...,,Ground,Flat,Clear,M1,,2005-11-09 15:06:00,0.01,2005-11-09 16:00:00,0.01
11348,2005-11-22 13:00:00,54.626694,-115.371483,Other Industry,Others (explain in remarks),Brush,Unattended Fire,,2005-11-01 12:00:00,UNP,...,0.20,Surface,Flat,Clear,C3,,2005-11-22 15:30:00,0.20,2005-11-22 18:00:00,0.60
11349,2005-11-24 14:00:00,54.539409,-115.119440,Recreation,Hunters,Cooking,Abandoned Fire,,2005-11-05 12:00:00,UNP,...,,Ground,Flat,Clear,C2,,2005-11-24 14:01:00,0.01,2005-11-24 14:01:00,0.01
11350,2006-02-13 11:00:00,54.757621,-115.383929,Forest Industry,Contractors,Brush,Unattended Fire,,2006-01-30 12:00:00,UNP,...,,Surface,Flat,Clear,S1,,2006-02-13 11:00:00,0.50,2006-02-15 12:45:00,0.50


In [149]:
# Decide on features and label: 
# Features are date of the fire, latitude, longitude, cause of fire, who detected the fire, was there a permit for the fire, type of fire, weather conditions, fire position, fuel type 
# Output labels is the date & size of the fire being under controlled i.e. how big the fire became & how long it took to be under controlled
# The model will aim to calculate these parameters input and provide the probability in predicting the size and duration of potential forest fires

# Output labels 
y = clean_fire_df(columns=["bh_fs_date", "bh_hectares"])

# Features data 
X = clean_fire_df.drop(columns=["bh_fs_date", "bh_hectares"])


TypeError: 'DataFrame' object is not callable