In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.svm import SVC
import pandas as pd
import tensorflow as tf

In [2]:
# Import our input dataset
fire_data_df = pd.read_csv('../Resources/wildfire_data_final.csv')
fire_data_df

Unnamed: 0,Fire_Size,Fire_Size_Class,Fire_Cause,Latitude,Longitude,State,Discovery_Month,Discovery_Date,Discovery_Year,Vegetation,...,Wind_Pre_7,Wind_Present,Hum_Pre_30,Hum_Pre_15,Hum_Pre_7,Hum_Present,Prec_Pre_30,Prec_Pre_15,Prec_Pre_7,Prec_Present
0,60.0,C,Arson,34.947800,-88.722500,MS,Feb,1/30/2004,2004,16,...,2.695833,3.369050,75.531629,75.868613,76.812834,65.063800,168.8,42.2,18.1,124.5
1,1.0,B,Campfire,30.904720,-93.557500,TX,Nov,10/13/2005,2005,12,...,1.424783,2.148857,72.899478,75.061381,77.924623,70.732911,28.4,27.5,1.2,55.4
2,8.3,B,Debris Burning,30.845339,-83.127987,GA,Mar,2/4/2010,2010,12,...,2.224500,1.750701,71.260870,69.281030,64.797980,73.072072,76.3,26.2,8.4,40.5
3,1.0,B,Miscellaneous,42.731934,-77.905976,NY,Apr,3/4/2010,2010,4,...,3.744928,2.872771,68.640553,69.556263,63.966184,59.956798,52.9,38.4,2.3,30.5
4,20.0,C,Arson,31.122200,-88.099400,AL,Jun,5/5/2000,2000,12,...,2.899537,2.623313,73.717979,74.603325,69.440594,77.471227,93.7,85.3,41.4,154.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7279,3082.0,F,Lightning,48.383600,-117.891900,WA,Aug,7/15/2015,2015,15,...,2.026590,2.918241,37.301713,36.085561,40.526012,49.762009,2.0,2.0,2.0,9.2
7280,4800.0,F,Equipment Use,45.135700,-99.908700,SD,Apr,3/2/2015,2015,9,...,6.023590,5.456159,58.984906,61.466667,60.994872,56.239420,4.4,4.4,4.1,12.2
7281,5100.0,G,Equipment Use,45.069700,-99.821100,SD,Apr,3/3/2015,2015,9,...,5.436216,5.316949,57.976101,59.909524,58.129730,56.989091,4.4,4.4,4.1,12.7
7282,17823.0,G,Campfire,44.834600,-117.220600,OR,Sep,8/13/2015,2015,15,...,1.835821,2.292956,55.009259,62.368700,67.266304,58.917679,10.4,10.4,9.4,8.6


In [3]:
fire_data_df.columns

Index(['Fire_Size', 'Fire_Size_Class', 'Fire_Cause', 'Latitude', 'Longitude',
       'State', 'Discovery_Month', 'Discovery_Date', 'Discovery_Year',
       'Vegetation', 'Fire_Magnitude', 'Temp_Pre_30', 'Temp_Pre_15',
       'Temp_Pre_7', 'Temp_Present', 'Wind_Pre_30', 'Wind_Pre_15',
       'Wind_Pre_7', 'Wind_Present', 'Hum_Pre_30', 'Hum_Pre_15', 'Hum_Pre_7',
       'Hum_Present', 'Prec_Pre_30', 'Prec_Pre_15', 'Prec_Pre_7',
       'Prec_Present'],
      dtype='object')

In [4]:
fire_clean_df=fire_data_df.drop(columns=["Fire_Size_Class","Discovery_Month","Discovery_Year",
                                          "Fire_Magnitude",
                                           "Temp_Pre_30","Temp_Pre_15","Temp_Pre_7",
                                           "Wind_Pre_30","Wind_Pre_15","Wind_Pre_7",
                                           "Hum_Pre_30", "Hum_Pre_15", "Hum_Pre_7",
                                           "Prec_Pre_30", "Prec_Pre_15","Prec_Pre_7",
                                           "Vegetation","Fire_Size"
                                           ])
fire_clean_df

Unnamed: 0,Fire_Cause,Latitude,Longitude,State,Discovery_Date,Temp_Present,Wind_Present,Hum_Present,Prec_Present
0,Arson,34.947800,-88.722500,MS,1/30/2004,13.696600,3.369050,65.063800,124.5
1,Campfire,30.904720,-93.557500,TX,10/13/2005,11.985560,2.148857,70.732911,55.4
2,Debris Burning,30.845339,-83.127987,GA,2/4/2010,14.328047,1.750701,73.072072,40.5
3,Miscellaneous,42.731934,-77.905976,NY,3/4/2010,11.375329,2.872771,59.956798,30.5
4,Arson,31.122200,-88.099400,AL,5/5/2000,25.610425,2.623313,77.471227,154.3
...,...,...,...,...,...,...,...,...,...
7279,Lightning,48.383600,-117.891900,WA,7/15/2015,17.337247,2.918241,49.762009,9.2
7280,Equipment Use,45.135700,-99.908700,SD,3/2/2015,8.710361,5.456159,56.239420,12.2
7281,Equipment Use,45.069700,-99.821100,SD,3/3/2015,8.686747,5.316949,56.989091,12.7
7282,Campfire,44.834600,-117.220600,OR,8/13/2015,12.713333,2.292956,58.917679,8.6


In [5]:
# Generate our categorical variable list
fire_cat = fire_clean_df.dtypes[fire_clean_df.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
fire_clean_df[fire_cat].nunique()

# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(fire_clean_df[fire_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(fire_cat)
encode_df.head()

Unnamed: 0,Fire_Cause_Arson,Fire_Cause_Campfire,Fire_Cause_Children,Fire_Cause_Debris Burning,Fire_Cause_Equipment Use,Fire_Cause_Fireworks,Fire_Cause_Lightning,Fire_Cause_Miscellaneous,Fire_Cause_Missing/Undefined,Fire_Cause_Powerline,...,Discovery_Date_9/8/1999,Discovery_Date_9/8/2005,Discovery_Date_9/8/2006,Discovery_Date_9/8/2008,Discovery_Date_9/8/2013,Discovery_Date_9/9/2000,Discovery_Date_9/9/2005,Discovery_Date_9/9/2006,Discovery_Date_9/9/2010,Discovery_Date_9/9/2015
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# Merge one-hot encoded features and drop the originals
x_df = fire_clean_df.merge(encode_df,left_index=True, right_index=True)
x_df = x_df.drop(fire_cat,1)
x_df.head()

Unnamed: 0,Latitude,Longitude,Temp_Present,Wind_Present,Hum_Present,Prec_Present,Fire_Cause_Arson,Fire_Cause_Campfire,Fire_Cause_Children,Fire_Cause_Debris Burning,...,Discovery_Date_9/8/1999,Discovery_Date_9/8/2005,Discovery_Date_9/8/2006,Discovery_Date_9/8/2008,Discovery_Date_9/8/2013,Discovery_Date_9/9/2000,Discovery_Date_9/9/2005,Discovery_Date_9/9/2006,Discovery_Date_9/9/2010,Discovery_Date_9/9/2015
0,34.9478,-88.7225,13.6966,3.36905,65.0638,124.5,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,30.90472,-93.5575,11.98556,2.148857,70.732911,55.4,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,30.845339,-83.127987,14.328047,1.750701,73.072072,40.5,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,42.731934,-77.905976,11.375329,2.872771,59.956798,30.5,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,31.1222,-88.0994,25.610425,2.623313,77.471227,154.3,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
# Remove target from features data
import numpy as np
y = np.reshape(fire_data_df.Fire_Size_Class.values,(-1,1))
X = x_df
x_df.head()

# Split training/test datasets
# Random sample
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [41]:
# Create a random forest Classifier.
rf_model = RandomForestClassifier(n_estimators=500, random_state=10,max_depth=15)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

  """


In [36]:
# Evaluate the model with Fire_Cause,Fire_Size,Latitude,Longitude,State,Discovery_Date
y_pred = rf_model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score
rf_model.score(X_test_scaled, y_test)
# random state 10
# max depth = 60
# estimators = 90
# accuracy score = 0.7018121911037891

0.7018121911037891

In [9]:
# Evaluate the model with Fire_Cause,Fire_Size,Latitude,Longitude,State,Discovery_Date
y_pred = rf_model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score
rf_model.score(X_test_scaled, y_test)

# random state 42 
# max depth = 35
# estimators = 100
# accuracy score = 0.6847885777045579

0.6847885777045579

In [12]:
# Evaluate the model with Fire_Cause,Fire_Size,Latitude,Longitude,State,Discovery_Date
y_pred = rf_model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score
rf_model.score(X_test_scaled, y_test)
# random state 2
# max depth = 35
# estimators = 100
# accuracy score =0.6957715540911587

0.6957715540911587

In [14]:
# Evaluate the model with Fire_Cause,Fire_Size,Latitude,Longitude,State,Discovery_Date
y_pred = rf_model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score
rf_model.score(X_test_scaled, y_test)
# random state 2
# max depth = 40
# estimators = 100
# accuracy score = 0.700713893465129

0.700713893465129

In [20]:
# Evaluate the model with Fire_Cause,Fire_Size,Latitude,Longitude,State,Discovery_Date
y_pred = rf_model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score
rf_model.score(X_test_scaled, y_test)
# random state 2
# max depth = 70
# estimators = 100
# accuracy score = 0.6957715540911587

0.6957715540911587

In [38]:
# Evaluate the model with Fire_Cause,Fire_Size,Latitude,Longitude,State,Discovery_Date
y_pred = rf_model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score
rf_model.score(X_test_scaled, y_test)
# n_estimators=500, random_state=10,max_depth=15, min_samples_split = 2 # accuracy score = 0.6771004942339374

0.6771004942339374

In [18]:
# Evaluate the model with Fire_Cause,Fire_Size,Latitude,Longitude,State,Discovery_Date
y_pred = rf_model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score
rf_model.score(X_test_scaled, y_test)
# random state 2
# max depth = 100
# estimators = 100
# accuracy score = 0.6990664470071389

0.6990664470071389

In [26]:
# Evaluate the model with Fire_Cause,Fire_Size,Latitude,Longitude,State,Discovery_Date
y_pred = rf_model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score
rf_model.score(X_test_scaled, y_test)
# random state 2
# max depth = 60
# estimators = 70
# accuracy score = 0.6985172981878089

0.6985172981878089

In [22]:
# Evaluate the model with Fire_Cause,Fire_Size,Latitude,Longitude,State,Discovery_Date
y_pred = rf_model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score
rf_model.score(X_test_scaled, y_test)
# random state 2
# max depth = 60
# estimators = 100
# accuracy score = 0.6990664470071389

0.6990664470071389

In [24]:
# Evaluate the model with Fire_Cause,Fire_Size,Latitude,Longitude,State,Discovery_Date
y_pred = rf_model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score
rf_model.score(X_test_scaled, y_test)
# random state 2
# max depth = 60
# estimators = 80
# accuracy score = 0.700164744645799

0.700164744645799

In [28]:
# Evaluate the model with Fire_Cause,Fire_Size,Latitude,Longitude,State,Discovery_Date
y_pred = rf_model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score
rf_model.score(X_test_scaled, y_test)
# random state 2
# max depth = 60
# estimators = 90
# accuracy score = 0.700164744645799

0.700164744645799

In [30]:
# Evaluate the model with Fire_Cause,Fire_Size,Latitude,Longitude,State,Discovery_Date
y_pred = rf_model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score
rf_model.score(X_test_scaled, y_test)
# random state 1
# max depth = 60
# estimators = 90
# accuracy score = 0.700164744645799

0.6990664470071389

In [40]:
# Evaluate the model with Fire_Cause,Fire_Size,Latitude,Longitude,State,Discovery_Date
y_pred = rf_model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score
rf_model.score(X_test_scaled, y_test)
# n_estimators=500, 
# random_state=10,max_depth=15, 
# min_samples_split = 2 
# accuracy score = 0.6771004942339374
# min_samples_leaf = 1,

0.6771004942339374

In [42]:
# Evaluate the model with Fire_Cause,Fire_Size,Latitude,Longitude,State,Discovery_Date
y_pred = rf_model.predict(X_test_scaled)
from sklearn.metrics import accuracy_score
rf_model.score(X_test_scaled, y_test)

# n_estimators=500, 
# random_state=10
# max_depth=15, 
# accuracy score = 0.6771004942339374

0.6771004942339374