In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.svm import SVC
import pandas as pd
import tensorflow as tf

# Import our input dataset
fire_df = pd.read_csv('../Resources/wildfire_v2_clean_data.csv')
fire_df.head()

Unnamed: 0,fire_size,fire_size_class,fire_cause,latitude,longitude,state,discovery_month,discovery_date,discovery_year,Vegetation,...,Wind_cont,Hum_pre_30,Hum_pre_15,Hum_pre_7,Hum_cont,Prec_pre_30,Prec_pre_15,Prec_pre_7,Prec_cont,remoteness
0,10.0,C,Missing/Undefined,18.105072,-66.753044,PR,Feb,1/12/2007,2007,12,...,3.250413,78.21659,76.79375,76.381579,78.72437,0.0,0.0,0.0,0.0,0.017923
1,3.0,B,Arson,35.03833,-87.61,TN,Dec,11/11/2006,2006,15,...,2.12232,70.84,65.858911,55.505882,81.682678,59.8,8.4,0.0,86.8,0.184355
2,60.0,C,Arson,34.9478,-88.7225,MS,Feb,1/30/2004,2004,16,...,3.36905,75.531629,75.868613,76.812834,65.0638,168.8,42.2,18.1,124.5,0.194544
3,1.0,B,Debris Burning,39.6414,-119.3083,NV,Jun,5/7/2005,2005,0,...,0.0,44.778429,37.140811,35.353846,0.0,10.4,7.2,0.0,0.0,0.487447
4,2.0,B,Miscellaneous,30.7006,-90.5914,LA,Sep,8/23/1999,1999,12,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.214633


In [3]:
fire_df.dtypes

fire_size          float64
fire_size_class     object
fire_cause          object
latitude           float64
longitude          float64
state               object
discovery_month     object
discovery_date      object
discovery_year       int64
Vegetation           int64
fire_mag           float64
Temp_pre_30        float64
Temp_pre_15        float64
Temp_pre_7         float64
Temp_cont          float64
Wind_pre_30        float64
Wind_pre_15        float64
Wind_pre_7         float64
Wind_cont          float64
Hum_pre_30         float64
Hum_pre_15         float64
Hum_pre_7          float64
Hum_cont           float64
Prec_pre_30        float64
Prec_pre_15        float64
Prec_pre_7         float64
Prec_cont          float64
remoteness         float64
dtype: object

In [4]:
fire_df.drop(['discovery_date','state','fire_size_class','fire_mag'], axis=1, inplace=True)
fire_df.head()

Unnamed: 0,fire_size,fire_cause,latitude,longitude,discovery_month,discovery_year,Vegetation,Temp_pre_30,Temp_pre_15,Temp_pre_7,...,Wind_cont,Hum_pre_30,Hum_pre_15,Hum_pre_7,Hum_cont,Prec_pre_30,Prec_pre_15,Prec_pre_7,Prec_cont,remoteness
0,10.0,Missing/Undefined,18.105072,-66.753044,Feb,2007,12,24.480974,24.716923,24.902597,...,3.250413,78.21659,76.79375,76.381579,78.72437,0.0,0.0,0.0,0.0,0.017923
1,3.0,Arson,35.03833,-87.61,Dec,2006,15,7.553433,7.01,0.343529,...,2.12232,70.84,65.858911,55.505882,81.682678,59.8,8.4,0.0,86.8,0.184355
2,60.0,Arson,34.9478,-88.7225,Feb,2004,16,4.97193,5.782766,5.55875,...,3.36905,75.531629,75.868613,76.812834,65.0638,168.8,42.2,18.1,124.5,0.194544
3,1.0,Debris Burning,39.6414,-119.3083,Jun,2005,0,16.275967,18.996181,18.142564,...,0.0,44.778429,37.140811,35.353846,0.0,10.4,7.2,0.0,0.0,0.487447
4,2.0,Miscellaneous,30.7006,-90.5914,Sep,1999,12,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.214633


In [5]:
# Generate our categorical variable list
fire_cat = fire_df.dtypes[fire_df.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
fire_df[fire_cat].nunique()

fire_cause         13
discovery_month    12
dtype: int64

In [6]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(fire_df[fire_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(fire_cat)
encode_df.head()

Unnamed: 0,fire_cause_Arson,fire_cause_Campfire,fire_cause_Children,fire_cause_Debris Burning,fire_cause_Equipment Use,fire_cause_Fireworks,fire_cause_Lightning,fire_cause_Miscellaneous,fire_cause_Missing/Undefined,fire_cause_Powerline,...,discovery_month_Dec,discovery_month_Feb,discovery_month_Jan,discovery_month_Jul,discovery_month_Jun,discovery_month_Mar,discovery_month_May,discovery_month_Nov,discovery_month_Oct,discovery_month_Sep
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [7]:
# Merge one-hot encoded features and drop the originals
fire_df = fire_df.merge(encode_df,left_index=True, right_index=True)
fire_df = fire_df.drop(fire_cat,1)
fire_df.head()

Unnamed: 0,fire_size,latitude,longitude,discovery_year,Vegetation,Temp_pre_30,Temp_pre_15,Temp_pre_7,Temp_cont,Wind_pre_30,...,discovery_month_Dec,discovery_month_Feb,discovery_month_Jan,discovery_month_Jul,discovery_month_Jun,discovery_month_Mar,discovery_month_May,discovery_month_Nov,discovery_month_Oct,discovery_month_Sep
0,10.0,18.105072,-66.753044,2007,12,24.480974,24.716923,24.902597,24.527961,4.341807,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3.0,35.03833,-87.61,2006,15,7.553433,7.01,0.343529,10.448298,2.709764,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,60.0,34.9478,-88.7225,2004,16,4.97193,5.782766,5.55875,13.6966,3.364499,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,39.6414,-119.3083,2005,0,16.275967,18.996181,18.142564,0.0,4.054982,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,2.0,30.7006,-90.5914,1999,12,-1.0,-1.0,-1.0,-1.0,-1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [22]:
# Remove target from features data
y = fire_df.fire_size
X = fire_df.drop(columns=["fire_size"])
# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [14]:
import numpy as np

In [16]:
print(y_pred)
print(type(y_pred))

[1.19640000e+01 1.36960000e+00 1.23517000e+01 ... 2.33680429e+04
 5.14600000e+01 5.84000000e+00]
<class 'numpy.ndarray'>


In [17]:
print(y_test)
print(type(y_test))

45357       1.50
44979       1.48
48941       3.00
7182        2.00
12724       1.00
          ...   
31714       6.00
49628      11.00
50668    3310.00
23079       1.00
3169        8.00
Name: fire_size, Length: 13842, dtype: float64
<class 'pandas.core.series.Series'>


In [35]:
# Converting series to array 
y_test = y_test.to_numpy()
y_test
print(type(y_test))

<class 'numpy.ndarray'>


In [23]:
# Possible accuracy score solution??
#clf = SVC(kernel='linear')

In [24]:
# Create a random forest regressor.
rf_model = RandomForestRegressor(n_estimators=100, random_state=2)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)


In [34]:
clf = RandomForestRegressor(n_estimators=10)
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)
clf.score(y_test, y_pred)

ValueError: Expected 2D array, got 1D array instead:
array=[1.50e+00 1.48e+00 3.00e+00 ... 3.31e+03 1.00e+00 8.00e+00].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [25]:
# Need help with code 

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {clf.score(y_test,y_pred):.3f}")

ValueError: continuous is not supported