In [2]:
# Dependencies
import numpy as np
import pandas as pd

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func

# Additional Imports for ML model
from path import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# Import to look at data profile
from pandas_profiling import ProfileReport

In [3]:
# Import and read the data
fire_df = pd.read_csv("Resources/wildfire_data.csv")
fire_df.head(10)

Unnamed: 0,fire_size,fire_size_class,fire_cause,latitude,longitude,state,discovery_month,discovery_date,discovery_year,wstation_usaf,...,Wind_cont,Hum_pre_30,Hum_pre_15,Hum_pre_7,Hum_cont,Prec_pre_30,Prec_pre_15,Prec_pre_7,Prec_cont,remoteness
0,10.0,C,Missing/Undefined,18.105072,-66.753044,PR,Feb,1/12/2007,2007,785140,...,3.250413,78.21659,76.79375,76.381579,78.72437,0.0,0.0,0.0,0.0,0.017923
1,3.0,B,Arson,35.03833,-87.61,TN,Dec,11/11/2006,2006,723235,...,2.12232,70.84,65.858911,55.505882,81.682678,59.8,8.4,0.0,86.8,0.184355
2,60.0,C,Arson,34.9478,-88.7225,MS,Feb,1/30/2004,2004,723235,...,3.36905,75.531629,75.868613,76.812834,65.0638,168.8,42.2,18.1,124.5,0.194544
3,1.0,B,Debris Burning,39.6414,-119.3083,NV,Jun,5/7/2005,2005,724880,...,0.0,44.778429,37.140811,35.353846,0.0,10.4,7.2,0.0,0.0,0.487447
4,2.0,B,Miscellaneous,30.7006,-90.5914,LA,Sep,8/23/1999,1999,722312,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.214633
5,1.0,B,Debris Burning,32.0639,-82.4178,GA,Mar,2/20/1999,1999,722691,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.139643
6,5.2,B,Debris Burning,31.316978,-83.393649,GA,Jan,12/11/2008,2008,747810,...,0.0,79.896679,73.431818,0.0,0.0,26.0,0.0,0.0,0.0,0.148904
7,1.0,B,Campfire,30.90472,-93.5575,TX,Nov,10/13/2005,2005,722820,...,2.148857,72.899478,75.061381,77.924623,70.732911,28.4,27.5,1.2,55.4,0.241894
8,1.0,B,Arson,35.90031,-92.06118,AR,Aug,7/9/2007,2007,723448,...,2.10309,68.319022,67.575419,65.077844,60.196858,6.6,3.3,0.0,46.4,0.224629
9,1.0,B,Miscellaneous,48.8394,-99.7185,ND,Apr,3/20/2004,2004,727675,...,0.0,64.606509,55.943038,54.337838,0.0,12.3,1.8,0.0,0.0,0.291683


In [3]:
profile = ProfileReport(fire_df, title="Pandas Profiling Report")

In [4]:
profile.to_widgets()

Summarize dataset:   0%|          | 0/46 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render widgets:   0%|          | 0/1 [00:00<?, ?it/s]

VBox(children=(Tab(children=(Tab(children=(GridBox(children=(VBox(children=(GridspecLayout(children=(HTML(valu…

In [8]:
fire_df.count()

fire_size          55367
fire_size_class    55367
fire_cause         55367
latitude           55367
longitude          55367
state              55367
discovery_month    55367
discovery_date     55367
discovery_year     55367
wstation_usaf      55367
dstation_m         55367
wstation_wban      55367
wstation_byear     55367
wstation_eyear     55367
Vegetation         55367
fire_mag           55367
Temp_pre_30        55367
Temp_pre_15        55367
Temp_pre_7         55367
Temp_cont          55367
Wind_pre_30        55367
Wind_pre_15        55367
Wind_pre_7         55367
Wind_cont          55367
Hum_pre_30         55367
Hum_pre_15         55367
Hum_pre_7          55367
Hum_cont           55367
Prec_pre_30        55367
Prec_pre_15        55367
Prec_pre_7         55367
Prec_cont          55367
remoteness         55367
dtype: int64

In [5]:
fire_df.drop(columns=['wstation_usaf','dstation_m','wstation_wban','wstation_byear','wstation_eyear'], axis=1, inplace=True)
fire_df.head()

Unnamed: 0,fire_size,fire_size_class,fire_cause,latitude,longitude,state,discovery_month,discovery_date,discovery_year,Vegetation,...,Wind_cont,Hum_pre_30,Hum_pre_15,Hum_pre_7,Hum_cont,Prec_pre_30,Prec_pre_15,Prec_pre_7,Prec_cont,remoteness
0,10.0,C,Missing/Undefined,18.105072,-66.753044,PR,Feb,1/12/2007,2007,12,...,3.250413,78.21659,76.79375,76.381579,78.72437,0.0,0.0,0.0,0.0,0.017923
1,3.0,B,Arson,35.03833,-87.61,TN,Dec,11/11/2006,2006,15,...,2.12232,70.84,65.858911,55.505882,81.682678,59.8,8.4,0.0,86.8,0.184355
2,60.0,C,Arson,34.9478,-88.7225,MS,Feb,1/30/2004,2004,16,...,3.36905,75.531629,75.868613,76.812834,65.0638,168.8,42.2,18.1,124.5,0.194544
3,1.0,B,Debris Burning,39.6414,-119.3083,NV,Jun,5/7/2005,2005,0,...,0.0,44.778429,37.140811,35.353846,0.0,10.4,7.2,0.0,0.0,0.487447
4,2.0,B,Miscellaneous,30.7006,-90.5914,LA,Sep,8/23/1999,1999,12,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.214633


In [6]:
fire_df.to_csv('Resources/wildfire_data2.csv', index=False)