In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import tensorflow as tf

# Import our input dataset
fire_df = pd.read_csv('../Resources/wildfire_v2_clean_data.csv')
fire_df.head()

Unnamed: 0,fire_size,fire_size_class,fire_cause,latitude,longitude,state,discovery_month,discovery_date,discovery_year,Vegetation,...,Wind_cont,Hum_pre_30,Hum_pre_15,Hum_pre_7,Hum_cont,Prec_pre_30,Prec_pre_15,Prec_pre_7,Prec_cont,remoteness
0,10.0,C,Missing/Undefined,18.105072,-66.753044,PR,Feb,1/12/2007,2007,12,...,3.250413,78.21659,76.79375,76.381579,78.72437,0.0,0.0,0.0,0.0,0.017923
1,3.0,B,Arson,35.03833,-87.61,TN,Dec,11/11/2006,2006,15,...,2.12232,70.84,65.858911,55.505882,81.682678,59.8,8.4,0.0,86.8,0.184355
2,60.0,C,Arson,34.9478,-88.7225,MS,Feb,1/30/2004,2004,16,...,3.36905,75.531629,75.868613,76.812834,65.0638,168.8,42.2,18.1,124.5,0.194544
3,1.0,B,Debris Burning,39.6414,-119.3083,NV,Jun,5/7/2005,2005,0,...,0.0,44.778429,37.140811,35.353846,0.0,10.4,7.2,0.0,0.0,0.487447
4,2.0,B,Miscellaneous,30.7006,-90.5914,LA,Sep,8/23/1999,1999,12,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.214633


In [2]:
fire_df.columns

Index(['fire_size', 'fire_size_class', 'fire_cause', 'latitude', 'longitude',
       'state', 'discovery_month', 'discovery_date', 'discovery_year',
       'Vegetation', 'fire_mag', 'Temp_pre_30', 'Temp_pre_15', 'Temp_pre_7',
       'Temp_cont', 'Wind_pre_30', 'Wind_pre_15', 'Wind_pre_7', 'Wind_cont',
       'Hum_pre_30', 'Hum_pre_15', 'Hum_pre_7', 'Hum_cont', 'Prec_pre_30',
       'Prec_pre_15', 'Prec_pre_7', 'Prec_cont', 'remoteness'],
      dtype='object')

In [4]:
fire_df.drop(['fire_size','state','fire_mag','discovery_date','discovery_year','discovery_month'], axis=1, inplace=True)
fire_df.head()

Unnamed: 0,fire_size_class,fire_cause,latitude,longitude,Vegetation,Temp_pre_30,Temp_pre_15,Temp_pre_7,Temp_cont,Wind_pre_30,...,Wind_cont,Hum_pre_30,Hum_pre_15,Hum_pre_7,Hum_cont,Prec_pre_30,Prec_pre_15,Prec_pre_7,Prec_cont,remoteness
0,C,Missing/Undefined,18.105072,-66.753044,12,24.480974,24.716923,24.902597,24.527961,4.341807,...,3.250413,78.21659,76.79375,76.381579,78.72437,0.0,0.0,0.0,0.0,0.017923
1,B,Arson,35.03833,-87.61,15,7.553433,7.01,0.343529,10.448298,2.709764,...,2.12232,70.84,65.858911,55.505882,81.682678,59.8,8.4,0.0,86.8,0.184355
2,C,Arson,34.9478,-88.7225,16,4.97193,5.782766,5.55875,13.6966,3.364499,...,3.36905,75.531629,75.868613,76.812834,65.0638,168.8,42.2,18.1,124.5,0.194544
3,B,Debris Burning,39.6414,-119.3083,0,16.275967,18.996181,18.142564,0.0,4.054982,...,0.0,44.778429,37.140811,35.353846,0.0,10.4,7.2,0.0,0.0,0.487447
4,B,Miscellaneous,30.7006,-90.5914,12,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.214633


In [5]:
# Generate our categorical variable list
fire_cat = fire_df.dtypes[fire_df.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
fire_df[fire_cat].nunique()

fire_size_class     6
fire_cause         13
dtype: int64

In [7]:
fire_cause_unique=fire_df.fire_size_class.value_counts()
fire_cause_unique

B    36522
C    10811
G     3972
F     1968
D     1394
E      700
Name: fire_size_class, dtype: int64

In [8]:
devastating = ["F","G"]
not_devastating = ["B","C","D","E"]

# Replace in dataframe
for app in devastating:
    fire_df.fire_size_class = fire_df.fire_size_class.replace(app,"devastating")
for app in not_devastating:
    fire_df.fire_size_class = fire_df.fire_size_class.replace(app,"minor")

In [9]:
fire_df.head()

Unnamed: 0,fire_size_class,fire_cause,latitude,longitude,Vegetation,Temp_pre_30,Temp_pre_15,Temp_pre_7,Temp_cont,Wind_pre_30,...,Wind_cont,Hum_pre_30,Hum_pre_15,Hum_pre_7,Hum_cont,Prec_pre_30,Prec_pre_15,Prec_pre_7,Prec_cont,remoteness
0,minor,Missing/Undefined,18.105072,-66.753044,12,24.480974,24.716923,24.902597,24.527961,4.341807,...,3.250413,78.21659,76.79375,76.381579,78.72437,0.0,0.0,0.0,0.0,0.017923
1,minor,Arson,35.03833,-87.61,15,7.553433,7.01,0.343529,10.448298,2.709764,...,2.12232,70.84,65.858911,55.505882,81.682678,59.8,8.4,0.0,86.8,0.184355
2,minor,Arson,34.9478,-88.7225,16,4.97193,5.782766,5.55875,13.6966,3.364499,...,3.36905,75.531629,75.868613,76.812834,65.0638,168.8,42.2,18.1,124.5,0.194544
3,minor,Debris Burning,39.6414,-119.3083,0,16.275967,18.996181,18.142564,0.0,4.054982,...,0.0,44.778429,37.140811,35.353846,0.0,10.4,7.2,0.0,0.0,0.487447
4,minor,Miscellaneous,30.7006,-90.5914,12,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.214633


In [10]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(fire_df[fire_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(fire_cat)
encode_df.head()

Unnamed: 0,fire_size_class_devastating,fire_size_class_minor,fire_cause_Arson,fire_cause_Campfire,fire_cause_Children,fire_cause_Debris Burning,fire_cause_Equipment Use,fire_cause_Fireworks,fire_cause_Lightning,fire_cause_Miscellaneous,fire_cause_Missing/Undefined,fire_cause_Powerline,fire_cause_Railroad,fire_cause_Smoking,fire_cause_Structure
0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [11]:
# Merge one-hot encoded features and drop the originals
fire_df = fire_df.merge(encode_df,left_index=True, right_index=True)
fire_df = fire_df.drop(fire_cat,1)
fire_df.head()

Unnamed: 0,latitude,longitude,Vegetation,Temp_pre_30,Temp_pre_15,Temp_pre_7,Temp_cont,Wind_pre_30,Wind_pre_15,Wind_pre_7,...,fire_cause_Debris Burning,fire_cause_Equipment Use,fire_cause_Fireworks,fire_cause_Lightning,fire_cause_Miscellaneous,fire_cause_Missing/Undefined,fire_cause_Powerline,fire_cause_Railroad,fire_cause_Smoking,fire_cause_Structure
0,18.105072,-66.753044,12,24.480974,24.716923,24.902597,24.527961,4.341807,3.492857,3.262092,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,35.03833,-87.61,15,7.553433,7.01,0.343529,10.448298,2.709764,2.881707,1.976471,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,34.9478,-88.7225,16,4.97193,5.782766,5.55875,13.6966,3.364499,2.92383,2.695833,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,39.6414,-119.3083,0,16.275967,18.996181,18.142564,0.0,4.054982,3.398329,3.671282,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,30.7006,-90.5914,12,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [12]:
# Remove loan status target from features data
y = fire_df.fire_size_class_devastating
X = fire_df.drop(columns=["fire_size_class_devastating","fire_size_class_minor"])

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [13]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=100, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 0.982
